simhash 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/simhash.rb +4 -0
  2. metadata +4 -4
data/lib/simhash.rb CHANGED
@@ -20,6 +20,7 @@ module Simhash
20
20
  hashbits = options[:hashbits] || 64
21
21
  token_min_size = options[:token_min_size].to_i
22
22
  hashing_method = options[:hashing_method] || DEFAULT_STRING_HASH_METHOD
23
+ stop_sentenses = options[:stop_sentenses]
23
24
 
24
25
  v = [0] * hashbits
25
26
  masks = v.dup
@@ -33,6 +34,9 @@ module Simhash
33
34
 
34
35
  # cutting stop-words
35
36
  token = token.split(" ").reject{ |w| Stopwords::ALL.index(" #{w} ") != nil }.join(" ") if options[:stop_words]
37
+
38
+ # cutting stop-sentenses
39
+ next if stop_sentenses && stop_sentenses.include?(" #{token} ")
36
40
 
37
41
  next if token.size.zero? || token.mb_chars.size < token_min_size
38
42
  hashed_token = token.send(hashing_method, hashbits).to_i
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simhash
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 1
10
- version: 0.2.1
9
+ - 2
10
+ version: 0.2.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alex Gusev
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-01 00:00:00 +04:00
18
+ date: 2010-09-07 00:00:00 +04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency