simhash 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/simhash.rb +5 -1
  2. metadata +18 -4
data/lib/simhash.rb CHANGED
@@ -1,8 +1,12 @@
1
1
  $KCODE = 'u'
2
+
3
+ require 'active_support/core_ext/string/multibyte'
2
4
  require 'unicode'
5
+
3
6
  require 'string'
4
7
  require 'integer'
5
8
  require 'simhash/stopwords'
9
+
6
10
  begin
7
11
  require 'string_hashing'
8
12
  rescue LoadError
@@ -30,7 +34,7 @@ module Simhash
30
34
  # cutting stop-words
31
35
  token = token.split(" ").reject{ |w| Stopwords::ALL.index(" #{w} ") != nil }.join(" ") if options[:stop_words]
32
36
 
33
- next if token.size.zero? || token.size < token_min_size
37
+ next if token.size.zero? || token.mb_chars.size < token_min_size
34
38
  hashed_token = token.send(hashing_method, hashbits).to_i
35
39
  hashbits.times do |i|
36
40
  v[i] += (hashed_token & masks[i]).zero? ? -1 : +1
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simhash
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 0
10
- version: 0.2.0
9
+ - 1
10
+ version: 0.2.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alex Gusev
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-08-20 00:00:00 +04:00
18
+ date: 2010-09-01 00:00:00 +04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -34,6 +34,20 @@ dependencies:
34
34
  version: 0.3.1
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: activesupport
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ hash: 3
46
+ segments:
47
+ - 0
48
+ version: "0"
49
+ type: :runtime
50
+ version_requirements: *id002
37
51
  description: Implementation of Charikar simhashes in Ruby
38
52
  email: alex.gusev@bookmate.ru
39
53
  executables: []