simhash 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/simhash.rb +5 -1
  2. metadata +5 -7
@@ -14,7 +14,11 @@ end
14
14
 
15
15
  module Simhash
16
16
  DEFAULT_STRING_HASH_METHOD = String.public_instance_methods.include?("hash_vl") ? :hash_vl : :hash_vl_rb
17
- PUNCTUATION_REGEXP = /(\s|\d|\W|\302\240| *— *|[«»…\-–—]| )+/u
17
+ PUNCTUATION_REGEXP = if RUBY_VERSION >= "1.9"
18
+ /(\s|\d|[^\p{L}]|\302\240| *— *|[«»…\-–—]| )+/u
19
+ else
20
+ /(\s|\d|\W|\302\240| *— *|[«»…\-–—]| )+/u
21
+ end
18
22
 
19
23
 
20
24
  def self.hash(tokens, options={})
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simhash
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 29
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 4
10
- version: 0.2.4
9
+ - 5
10
+ version: 0.2.5
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alex Gusev
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-04-06 00:00:00 +04:00
19
- default_executable:
18
+ date: 2011-10-28 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: unicode
@@ -68,7 +67,6 @@ files:
68
67
  - lib/string.rb
69
68
  - ext/string_hashing/extconf.rb
70
69
  - ext/string_hashing/string_hashing.c
71
- has_rdoc: true
72
70
  homepage: http://github.com/bookmate/simhash
73
71
  licenses: []
74
72
 
@@ -98,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
98
96
  requirements: []
99
97
 
100
98
  rubyforge_project: simhash
101
- rubygems_version: 1.6.2
99
+ rubygems_version: 1.8.10
102
100
  signing_key:
103
101
  specification_version: 3
104
102
  summary: "Gives you possbility to convert string into simhashes to futher use: finding near-duplicates, similar strings, etc."