simhash 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/simhash.rb +5 -1
  2. metadata +18 -4
data/lib/simhash.rb CHANGED
@@ -1,8 +1,12 @@
1
1
  $KCODE = 'u'
2
+
3
+ require 'active_support/core_ext/string/multibyte'
2
4
  require 'unicode'
5
+
3
6
  require 'string'
4
7
  require 'integer'
5
8
  require 'simhash/stopwords'
9
+
6
10
  begin
7
11
  require 'string_hashing'
8
12
  rescue LoadError
@@ -30,7 +34,7 @@ module Simhash
30
34
  # cutting stop-words
31
35
  token = token.split(" ").reject{ |w| Stopwords::ALL.index(" #{w} ") != nil }.join(" ") if options[:stop_words]
32
36
 
33
- next if token.size.zero? || token.size < token_min_size
37
+ next if token.size.zero? || token.mb_chars.size < token_min_size
34
38
  hashed_token = token.send(hashing_method, hashbits).to_i
35
39
  hashbits.times do |i|
36
40
  v[i] += (hashed_token & masks[i]).zero? ? -1 : +1
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simhash
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 0
10
- version: 0.2.0
9
+ - 1
10
+ version: 0.2.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alex Gusev
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-08-20 00:00:00 +04:00
18
+ date: 2010-09-01 00:00:00 +04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -34,6 +34,20 @@ dependencies:
34
34
  version: 0.3.1
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: activesupport
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ hash: 3
46
+ segments:
47
+ - 0
48
+ version: "0"
49
+ type: :runtime
50
+ version_requirements: *id002
37
51
  description: Implementation of Charikar simhashes in Ruby
38
52
  email: alex.gusev@bookmate.ru
39
53
  executables: []