simhash 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/simhash.rb +5 -1
- metadata +5 -7
data/lib/simhash.rb
CHANGED
@@ -14,7 +14,11 @@ end
|
|
14
14
|
|
15
15
|
module Simhash
|
16
16
|
DEFAULT_STRING_HASH_METHOD = String.public_instance_methods.include?("hash_vl") ? :hash_vl : :hash_vl_rb
|
17
|
-
PUNCTUATION_REGEXP =
|
17
|
+
PUNCTUATION_REGEXP = if RUBY_VERSION >= "1.9"
|
18
|
+
/(\s|\d|[^\p{L}]|\302\240| *— *|[«»…\-–—]| )+/u
|
19
|
+
else
|
20
|
+
/(\s|\d|\W|\302\240| *— *|[«»…\-–—]| )+/u
|
21
|
+
end
|
18
22
|
|
19
23
|
|
20
24
|
def self.hash(tokens, options={})
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simhash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 5
|
10
|
+
version: 0.2.5
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alex Gusev
|
@@ -15,8 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
19
|
-
default_executable:
|
18
|
+
date: 2011-10-28 00:00:00 Z
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
22
21
|
name: unicode
|
@@ -68,7 +67,6 @@ files:
|
|
68
67
|
- lib/string.rb
|
69
68
|
- ext/string_hashing/extconf.rb
|
70
69
|
- ext/string_hashing/string_hashing.c
|
71
|
-
has_rdoc: true
|
72
70
|
homepage: http://github.com/bookmate/simhash
|
73
71
|
licenses: []
|
74
72
|
|
@@ -98,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
98
96
|
requirements: []
|
99
97
|
|
100
98
|
rubyforge_project: simhash
|
101
|
-
rubygems_version: 1.
|
99
|
+
rubygems_version: 1.8.10
|
102
100
|
signing_key:
|
103
101
|
specification_version: 3
|
104
102
|
summary: "Gives you possbility to convert string into simhashes to futher use: finding near-duplicates, similar strings, etc."
|