simhash 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/simhash.rb +4 -0
  2. metadata +4 -4
data/lib/simhash.rb CHANGED
@@ -20,6 +20,7 @@ module Simhash
20
20
  hashbits = options[:hashbits] || 64
21
21
  token_min_size = options[:token_min_size].to_i
22
22
  hashing_method = options[:hashing_method] || DEFAULT_STRING_HASH_METHOD
23
+ stop_sentenses = options[:stop_sentenses]
23
24
 
24
25
  v = [0] * hashbits
25
26
  masks = v.dup
@@ -33,6 +34,9 @@ module Simhash
33
34
 
34
35
  # cutting stop-words
35
36
  token = token.split(" ").reject{ |w| Stopwords::ALL.index(" #{w} ") != nil }.join(" ") if options[:stop_words]
37
+
38
+ # cutting stop-sentenses
39
+ next if stop_sentenses && stop_sentenses.include?(" #{token} ")
36
40
 
37
41
  next if token.size.zero? || token.mb_chars.size < token_min_size
38
42
  hashed_token = token.send(hashing_method, hashbits).to_i
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simhash
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 1
10
- version: 0.2.1
9
+ - 2
10
+ version: 0.2.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alex Gusev
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-01 00:00:00 +04:00
18
+ date: 2010-09-07 00:00:00 +04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency