soulmate 0.0.6 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown CHANGED
@@ -110,6 +110,9 @@ Add this to gemfile:
110
110
 
111
111
  Then you can query soulmate at the /sm url, for example: http://localhost:3000/sm/search?types[]=venues&limit=6&term=kitten
112
112
 
113
+ ### Rendering an autocompleter
114
+
115
+ Soulmate doesn't include any client-side code necessary to render an autocompleter, but Mitch Crowe put together a pretty cool looking jquery plugin designed for exactly that: <a href="https://github.com/mcrowe/soulmate.js">soulmate.js</a>.
113
116
 
114
117
  Contributing to soulmate
115
118
  ------------------------
data/bin/soulmate CHANGED
@@ -20,6 +20,12 @@ parser = OptionParser.new do |opts|
20
20
  Soulmate.redis = host
21
21
  end
22
22
 
23
+ opts.on("-s", "--stop-words [FILE]", "Path to file containing a list of stop words") do |fn|
24
+ File.open(fn) do |file|
25
+ Soulmate.stop_words = file.readlines.map{ |l| l.strip }
26
+ end
27
+ end
28
+
23
29
  opts.on("-h", "--help", "Show this message") do
24
30
  puts opts
25
31
  exit
data/lib/soulmate.rb CHANGED
@@ -13,7 +13,7 @@ module Soulmate
13
13
  extend self
14
14
 
15
15
  MIN_COMPLETE = 2
16
- STOP_WORDS = ["vs", "at"]
16
+ DEFAULT_STOP_WORDS = ["vs", "at", "the"]
17
17
 
18
18
  def redis=(url)
19
19
  @redis = nil
@@ -34,4 +34,12 @@ module Soulmate
34
34
  )
35
35
  end
36
36
 
37
+ def stop_words
38
+ @stop_words ||= DEFAULT_STOP_WORDS
39
+ end
40
+
41
+ def stop_words=(arr)
42
+ @stop_words = Array(arr).flatten
43
+ end
44
+
37
45
  end
@@ -2,7 +2,9 @@ module Soulmate
2
2
  module Helpers
3
3
 
4
4
  def prefixes_for_phrase(phrase)
5
- words = normalize(phrase).split(' ')
5
+ words = normalize(phrase).split(' ').reject do |w|
6
+ Soulmate.stop_words.include?(w)
7
+ end
6
8
  words.map do |w|
7
9
  (MIN_COMPLETE-1..(w.length-1)).map{ |l| w[0..l] }
8
10
  end.flatten.uniq
@@ -4,12 +4,13 @@ module Soulmate
4
4
 
5
5
  def load(items)
6
6
  # delete the sorted sets for this type
7
- # wrap in multi/exec?
8
7
  phrases = Soulmate.redis.smembers(base)
9
- phrases.each do |p|
10
- Soulmate.redis.del("#{base}:#{p}")
8
+ Soulmate.redis.pipelined do
9
+ phrases.each do |p|
10
+ Soulmate.redis.del("#{base}:#{p}")
11
+ end
12
+ Soulmate.redis.del(base)
11
13
  end
12
- Soulmate.redis.del(base)
13
14
 
14
15
  # Redis can continue serving cached requests for this type while the reload is
15
16
  # occuring. Some requests may be cached incorrectly as empty set (for requests
@@ -32,12 +33,14 @@ module Soulmate
32
33
  # kill any old items with this id
33
34
  remove("id" => item["id"]) unless opts[:skip_duplicate_check]
34
35
 
35
- # store the raw data in a separate key to reduce memory usage
36
- Soulmate.redis.hset(database, item["id"], MultiJson.encode(item))
37
- phrase = ([item["term"]] + (item["aliases"] || [])).join(' ')
38
- prefixes_for_phrase(phrase).each do |p|
39
- Soulmate.redis.sadd(base, p) # remember this prefix in a master set
40
- Soulmate.redis.zadd("#{base}:#{p}", item["score"], item["id"]) # store the id of this term in the index
36
+ Soulmate.redis.pipelined do
37
+ # store the raw data in a separate key to reduce memory usage
38
+ Soulmate.redis.hset(database, item["id"], MultiJson.encode(item))
39
+ phrase = ([item["term"]] + (item["aliases"] || [])).join(' ')
40
+ prefixes_for_phrase(phrase).each do |p|
41
+ Soulmate.redis.sadd(base, p) # remember this prefix in a master set
42
+ Soulmate.redis.zadd("#{base}:#{p}", item["score"], item["id"]) # store the id of this term in the index
43
+ end
41
44
  end
42
45
  end
43
46
 
@@ -47,11 +50,13 @@ module Soulmate
47
50
  if prev_item
48
51
  prev_item = MultiJson.decode(prev_item)
49
52
  # undo the operations done in add
50
- Soulmate.redis.hdel(database, prev_item["id"])
51
- phrase = ([prev_item["term"]] + (prev_item["aliases"] || [])).join(' ')
52
- prefixes_for_phrase(phrase).each do |p|
53
- Soulmate.redis.srem(base, p)
54
- Soulmate.redis.zrem("#{base}:#{p}", prev_item["id"])
53
+ Soulmate.redis.pipelined do
54
+ Soulmate.redis.hdel(database, prev_item["id"])
55
+ phrase = ([prev_item["term"]] + (prev_item["aliases"] || [])).join(' ')
56
+ prefixes_for_phrase(phrase).each do |p|
57
+ Soulmate.redis.srem(base, p)
58
+ Soulmate.redis.zrem("#{base}:#{p}", prev_item["id"])
59
+ end
55
60
  end
56
61
  end
57
62
  end
@@ -5,9 +5,7 @@ module Soulmate
5
5
  def matches_for_term(term, options = {})
6
6
  options = { :limit => 5, :cache => true }.merge(options)
7
7
 
8
- words = normalize(term).split(' ').reject do |w|
9
- w.size < MIN_COMPLETE or STOP_WORDS.include?(w)
10
- end.sort
8
+ words = normalize(term).split(' ').reject{ |w| w.size < MIN_COMPLETE }.sort
11
9
 
12
10
  return [] if words.empty?
13
11
 
@@ -1,8 +1,8 @@
1
1
  module Soulmate
2
2
  module Version
3
3
  MAJOR = 0
4
- MINOR = 0
5
- PATCH = 6
4
+ MINOR = 1
5
+ PATCH = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].compact.join('.')
8
8
  end
data/soulmate.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{soulmate}
8
- s.version = "0.0.6"
8
+ s.version = "0.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Eric Waller"]
12
- s.date = %q{2011-11-18}
12
+ s.date = %q{2012-01-05}
13
13
  s.description = %q{Soulmate is a tool to help solve the common problem of developing a fast autocomplete feature. It uses Redis's sorted sets to build an index of partial words and corresponding top matches, and provides a simple sinatra app to query them. Soulmate finishes your sentences.}
14
14
  s.email = %q{eric@seatgeek.com}
15
15
  s.executables = ["soulmate", "soulmate-web"]
@@ -36,6 +36,7 @@ Gem::Specification.new do |s|
36
36
  "soulmate.gemspec",
37
37
  "test/db/.gitkeep",
38
38
  "test/helper.rb",
39
+ "test/samples/stop-words.txt",
39
40
  "test/samples/venues.json",
40
41
  "test/test.conf",
41
42
  "test/test_soulmate.rb"
@@ -0,0 +1,4 @@
1
+ vs
2
+ at
3
+ the
4
+ to
@@ -94,6 +94,10 @@ class TestSoulmate < Test::Unit::TestCase
94
94
  def test_prefixes_for_phrase
95
95
  loader = Soulmate::Loader.new('venues')
96
96
 
97
+ Soulmate.stop_words = ['the']
98
+
99
+ assert_equal ["kn", "kni", "knic", "knick", "knicks"], loader.prefixes_for_phrase("the knicks")
100
+ assert_equal ["te", "tes", "test", "testi", "testin", "th", "thi", "this"], loader.prefixes_for_phrase("testin' this")
97
101
  assert_equal ["te", "tes", "test", "testi", "testin", "th", "thi", "this"], loader.prefixes_for_phrase("testin' this")
98
102
  assert_equal ["te", "tes", "test"], loader.prefixes_for_phrase("test test")
99
103
  assert_equal ["so", "sou", "soul", "soulm", "soulma", "soulmat", "soulmate"], loader.prefixes_for_phrase("SoUlmATE")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: soulmate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-11-18 00:00:00.000000000 -05:00
12
+ date: 2012-01-05 00:00:00.000000000 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: redis
17
- requirement: &2165158440 !ruby/object:Gem::Requirement
17
+ requirement: &2160527080 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: 2.1.1
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *2165158440
25
+ version_requirements: *2160527080
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: multi_json
28
- requirement: &2165156820 !ruby/object:Gem::Requirement
28
+ requirement: &2160525840 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ~>
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 1.0.3
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *2165156820
36
+ version_requirements: *2160525840
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: rack-contrib
39
- requirement: &2165155380 !ruby/object:Gem::Requirement
39
+ requirement: &2160524600 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: '0'
45
45
  type: :runtime
46
46
  prerelease: false
47
- version_requirements: *2165155380
47
+ version_requirements: *2160524600
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: vegas
50
- requirement: &2165152760 !ruby/object:Gem::Requirement
50
+ requirement: &2160523120 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ~>
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 0.1.8
56
56
  type: :runtime
57
57
  prerelease: false
58
- version_requirements: *2165152760
58
+ version_requirements: *2160523120
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: sinatra
61
- requirement: &2165151760 !ruby/object:Gem::Requirement
61
+ requirement: &2160516060 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ~>
@@ -66,10 +66,10 @@ dependencies:
66
66
  version: 1.2.3
67
67
  type: :runtime
68
68
  prerelease: false
69
- version_requirements: *2165151760
69
+ version_requirements: *2160516060
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: shoulda
72
- requirement: &2165151060 !ruby/object:Gem::Requirement
72
+ requirement: &2160514180 !ruby/object:Gem::Requirement
73
73
  none: false
74
74
  requirements:
75
75
  - - ! '>='
@@ -77,10 +77,10 @@ dependencies:
77
77
  version: '0'
78
78
  type: :development
79
79
  prerelease: false
80
- version_requirements: *2165151060
80
+ version_requirements: *2160514180
81
81
  - !ruby/object:Gem::Dependency
82
82
  name: bundler
83
- requirement: &2165150120 !ruby/object:Gem::Requirement
83
+ requirement: &2160512560 !ruby/object:Gem::Requirement
84
84
  none: false
85
85
  requirements:
86
86
  - - ~>
@@ -88,10 +88,10 @@ dependencies:
88
88
  version: 1.0.0
89
89
  type: :development
90
90
  prerelease: false
91
- version_requirements: *2165150120
91
+ version_requirements: *2160512560
92
92
  - !ruby/object:Gem::Dependency
93
93
  name: jeweler
94
- requirement: &2165149320 !ruby/object:Gem::Requirement
94
+ requirement: &2160510860 !ruby/object:Gem::Requirement
95
95
  none: false
96
96
  requirements:
97
97
  - - ~>
@@ -99,10 +99,10 @@ dependencies:
99
99
  version: 1.5.2
100
100
  type: :development
101
101
  prerelease: false
102
- version_requirements: *2165149320
102
+ version_requirements: *2160510860
103
103
  - !ruby/object:Gem::Dependency
104
104
  name: rcov
105
- requirement: &2165148620 !ruby/object:Gem::Requirement
105
+ requirement: &2160509180 !ruby/object:Gem::Requirement
106
106
  none: false
107
107
  requirements:
108
108
  - - ! '>='
@@ -110,10 +110,10 @@ dependencies:
110
110
  version: '0'
111
111
  type: :development
112
112
  prerelease: false
113
- version_requirements: *2165148620
113
+ version_requirements: *2160509180
114
114
  - !ruby/object:Gem::Dependency
115
115
  name: redis
116
- requirement: &2165147860 !ruby/object:Gem::Requirement
116
+ requirement: &2160504040 !ruby/object:Gem::Requirement
117
117
  none: false
118
118
  requirements:
119
119
  - - ! '>='
@@ -121,10 +121,10 @@ dependencies:
121
121
  version: '2.0'
122
122
  type: :runtime
123
123
  prerelease: false
124
- version_requirements: *2165147860
124
+ version_requirements: *2160504040
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: vegas
127
- requirement: &2165146920 !ruby/object:Gem::Requirement
127
+ requirement: &2160500820 !ruby/object:Gem::Requirement
128
128
  none: false
129
129
  requirements:
130
130
  - - ! '>='
@@ -132,10 +132,10 @@ dependencies:
132
132
  version: 0.1.0
133
133
  type: :runtime
134
134
  prerelease: false
135
- version_requirements: *2165146920
135
+ version_requirements: *2160500820
136
136
  - !ruby/object:Gem::Dependency
137
137
  name: sinatra
138
- requirement: &2165145580 !ruby/object:Gem::Requirement
138
+ requirement: &2160499040 !ruby/object:Gem::Requirement
139
139
  none: false
140
140
  requirements:
141
141
  - - ! '>='
@@ -143,10 +143,10 @@ dependencies:
143
143
  version: '1.0'
144
144
  type: :runtime
145
145
  prerelease: false
146
- version_requirements: *2165145580
146
+ version_requirements: *2160499040
147
147
  - !ruby/object:Gem::Dependency
148
148
  name: json
149
- requirement: &2165137880 !ruby/object:Gem::Requirement
149
+ requirement: &2160480560 !ruby/object:Gem::Requirement
150
150
  none: false
151
151
  requirements:
152
152
  - - ~>
@@ -154,7 +154,7 @@ dependencies:
154
154
  version: 1.4.6
155
155
  type: :runtime
156
156
  prerelease: false
157
- version_requirements: *2165137880
157
+ version_requirements: *2160480560
158
158
  description: Soulmate is a tool to help solve the common problem of developing a fast
159
159
  autocomplete feature. It uses Redis's sorted sets to build an index of partial words
160
160
  and corresponding top matches, and provides a simple sinatra app to query them.
@@ -186,6 +186,7 @@ files:
186
186
  - soulmate.gemspec
187
187
  - test/db/.gitkeep
188
188
  - test/helper.rb
189
+ - test/samples/stop-words.txt
189
190
  - test/samples/venues.json
190
191
  - test/test.conf
191
192
  - test/test_soulmate.rb
@@ -205,7 +206,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
205
206
  version: '0'
206
207
  segments:
207
208
  - 0
208
- hash: 4567717493668113734
209
+ hash: 1384397421883417859
209
210
  required_rubygems_version: !ruby/object:Gem::Requirement
210
211
  none: false
211
212
  requirements: