soulmate 0.0.6 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +3 -0
- data/bin/soulmate +6 -0
- data/lib/soulmate.rb +9 -1
- data/lib/soulmate/helpers.rb +3 -1
- data/lib/soulmate/loader.rb +20 -15
- data/lib/soulmate/matcher.rb +1 -3
- data/lib/soulmate/version.rb +2 -2
- data/soulmate.gemspec +3 -2
- data/test/samples/stop-words.txt +4 -0
- data/test/test_soulmate.rb +4 -0
- metadata +30 -29
data/README.markdown
CHANGED
@@ -110,6 +110,9 @@ Add this to gemfile:
|
|
110
110
|
|
111
111
|
Then you can query soulmate at the /sm url, for example: http://localhost:3000/sm/search?types[]=venues&limit=6&term=kitten
|
112
112
|
|
113
|
+
### Rendering an autocompleter
|
114
|
+
|
115
|
+
Soulmate doesn't include any client-side code necessary to render an autocompleter, but Mitch Crowe put together a pretty cool looking jquery plugin designed for exactly that: <a href="https://github.com/mcrowe/soulmate.js">soulmate.js</a>.
|
113
116
|
|
114
117
|
Contributing to soulmate
|
115
118
|
------------------------
|
data/bin/soulmate
CHANGED
@@ -20,6 +20,12 @@ parser = OptionParser.new do |opts|
|
|
20
20
|
Soulmate.redis = host
|
21
21
|
end
|
22
22
|
|
23
|
+
opts.on("-s", "--stop-words [FILE]", "Path to file containing a list of stop words") do |fn|
|
24
|
+
File.open(fn) do |file|
|
25
|
+
Soulmate.stop_words = file.readlines.map{ |l| l.strip }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
23
29
|
opts.on("-h", "--help", "Show this message") do
|
24
30
|
puts opts
|
25
31
|
exit
|
data/lib/soulmate.rb
CHANGED
@@ -13,7 +13,7 @@ module Soulmate
|
|
13
13
|
extend self
|
14
14
|
|
15
15
|
MIN_COMPLETE = 2
|
16
|
-
|
16
|
+
DEFAULT_STOP_WORDS = ["vs", "at", "the"]
|
17
17
|
|
18
18
|
def redis=(url)
|
19
19
|
@redis = nil
|
@@ -34,4 +34,12 @@ module Soulmate
|
|
34
34
|
)
|
35
35
|
end
|
36
36
|
|
37
|
+
def stop_words
|
38
|
+
@stop_words ||= DEFAULT_STOP_WORDS
|
39
|
+
end
|
40
|
+
|
41
|
+
def stop_words=(arr)
|
42
|
+
@stop_words = Array(arr).flatten
|
43
|
+
end
|
44
|
+
|
37
45
|
end
|
data/lib/soulmate/helpers.rb
CHANGED
@@ -2,7 +2,9 @@ module Soulmate
|
|
2
2
|
module Helpers
|
3
3
|
|
4
4
|
def prefixes_for_phrase(phrase)
|
5
|
-
words = normalize(phrase).split(' ')
|
5
|
+
words = normalize(phrase).split(' ').reject do |w|
|
6
|
+
Soulmate.stop_words.include?(w)
|
7
|
+
end
|
6
8
|
words.map do |w|
|
7
9
|
(MIN_COMPLETE-1..(w.length-1)).map{ |l| w[0..l] }
|
8
10
|
end.flatten.uniq
|
data/lib/soulmate/loader.rb
CHANGED
@@ -4,12 +4,13 @@ module Soulmate
|
|
4
4
|
|
5
5
|
def load(items)
|
6
6
|
# delete the sorted sets for this type
|
7
|
-
# wrap in multi/exec?
|
8
7
|
phrases = Soulmate.redis.smembers(base)
|
9
|
-
|
10
|
-
|
8
|
+
Soulmate.redis.pipelined do
|
9
|
+
phrases.each do |p|
|
10
|
+
Soulmate.redis.del("#{base}:#{p}")
|
11
|
+
end
|
12
|
+
Soulmate.redis.del(base)
|
11
13
|
end
|
12
|
-
Soulmate.redis.del(base)
|
13
14
|
|
14
15
|
# Redis can continue serving cached requests for this type while the reload is
|
15
16
|
# occuring. Some requests may be cached incorrectly as empty set (for requests
|
@@ -32,12 +33,14 @@ module Soulmate
|
|
32
33
|
# kill any old items with this id
|
33
34
|
remove("id" => item["id"]) unless opts[:skip_duplicate_check]
|
34
35
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
36
|
+
Soulmate.redis.pipelined do
|
37
|
+
# store the raw data in a separate key to reduce memory usage
|
38
|
+
Soulmate.redis.hset(database, item["id"], MultiJson.encode(item))
|
39
|
+
phrase = ([item["term"]] + (item["aliases"] || [])).join(' ')
|
40
|
+
prefixes_for_phrase(phrase).each do |p|
|
41
|
+
Soulmate.redis.sadd(base, p) # remember this prefix in a master set
|
42
|
+
Soulmate.redis.zadd("#{base}:#{p}", item["score"], item["id"]) # store the id of this term in the index
|
43
|
+
end
|
41
44
|
end
|
42
45
|
end
|
43
46
|
|
@@ -47,11 +50,13 @@ module Soulmate
|
|
47
50
|
if prev_item
|
48
51
|
prev_item = MultiJson.decode(prev_item)
|
49
52
|
# undo the operations done in add
|
50
|
-
Soulmate.redis.
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
53
|
+
Soulmate.redis.pipelined do
|
54
|
+
Soulmate.redis.hdel(database, prev_item["id"])
|
55
|
+
phrase = ([prev_item["term"]] + (prev_item["aliases"] || [])).join(' ')
|
56
|
+
prefixes_for_phrase(phrase).each do |p|
|
57
|
+
Soulmate.redis.srem(base, p)
|
58
|
+
Soulmate.redis.zrem("#{base}:#{p}", prev_item["id"])
|
59
|
+
end
|
55
60
|
end
|
56
61
|
end
|
57
62
|
end
|
data/lib/soulmate/matcher.rb
CHANGED
@@ -5,9 +5,7 @@ module Soulmate
|
|
5
5
|
def matches_for_term(term, options = {})
|
6
6
|
options = { :limit => 5, :cache => true }.merge(options)
|
7
7
|
|
8
|
-
words = normalize(term).split(' ').reject
|
9
|
-
w.size < MIN_COMPLETE or STOP_WORDS.include?(w)
|
10
|
-
end.sort
|
8
|
+
words = normalize(term).split(' ').reject{ |w| w.size < MIN_COMPLETE }.sort
|
11
9
|
|
12
10
|
return [] if words.empty?
|
13
11
|
|
data/lib/soulmate/version.rb
CHANGED
data/soulmate.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{soulmate}
|
8
|
-
s.version = "0.0
|
8
|
+
s.version = "0.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Eric Waller"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2012-01-05}
|
13
13
|
s.description = %q{Soulmate is a tool to help solve the common problem of developing a fast autocomplete feature. It uses Redis's sorted sets to build an index of partial words and corresponding top matches, and provides a simple sinatra app to query them. Soulmate finishes your sentences.}
|
14
14
|
s.email = %q{eric@seatgeek.com}
|
15
15
|
s.executables = ["soulmate", "soulmate-web"]
|
@@ -36,6 +36,7 @@ Gem::Specification.new do |s|
|
|
36
36
|
"soulmate.gemspec",
|
37
37
|
"test/db/.gitkeep",
|
38
38
|
"test/helper.rb",
|
39
|
+
"test/samples/stop-words.txt",
|
39
40
|
"test/samples/venues.json",
|
40
41
|
"test/test.conf",
|
41
42
|
"test/test_soulmate.rb"
|
data/test/test_soulmate.rb
CHANGED
@@ -94,6 +94,10 @@ class TestSoulmate < Test::Unit::TestCase
|
|
94
94
|
def test_prefixes_for_phrase
|
95
95
|
loader = Soulmate::Loader.new('venues')
|
96
96
|
|
97
|
+
Soulmate.stop_words = ['the']
|
98
|
+
|
99
|
+
assert_equal ["kn", "kni", "knic", "knick", "knicks"], loader.prefixes_for_phrase("the knicks")
|
100
|
+
assert_equal ["te", "tes", "test", "testi", "testin", "th", "thi", "this"], loader.prefixes_for_phrase("testin' this")
|
97
101
|
assert_equal ["te", "tes", "test", "testi", "testin", "th", "thi", "this"], loader.prefixes_for_phrase("testin' this")
|
98
102
|
assert_equal ["te", "tes", "test"], loader.prefixes_for_phrase("test test")
|
99
103
|
assert_equal ["so", "sou", "soul", "soulm", "soulma", "soulmat", "soulmate"], loader.prefixes_for_phrase("SoUlmATE")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: soulmate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-01-05 00:00:00.000000000 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: redis
|
17
|
-
requirement: &
|
17
|
+
requirement: &2160527080 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 2.1.1
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *2160527080
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: multi_json
|
28
|
-
requirement: &
|
28
|
+
requirement: &2160525840 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.0.3
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *2160525840
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: rack-contrib
|
39
|
-
requirement: &
|
39
|
+
requirement: &2160524600 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: '0'
|
45
45
|
type: :runtime
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *2160524600
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: vegas
|
50
|
-
requirement: &
|
50
|
+
requirement: &2160523120 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ~>
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 0.1.8
|
56
56
|
type: :runtime
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *2160523120
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: sinatra
|
61
|
-
requirement: &
|
61
|
+
requirement: &2160516060 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ~>
|
@@ -66,10 +66,10 @@ dependencies:
|
|
66
66
|
version: 1.2.3
|
67
67
|
type: :runtime
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *2160516060
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: shoulda
|
72
|
-
requirement: &
|
72
|
+
requirement: &2160514180 !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
74
74
|
requirements:
|
75
75
|
- - ! '>='
|
@@ -77,10 +77,10 @@ dependencies:
|
|
77
77
|
version: '0'
|
78
78
|
type: :development
|
79
79
|
prerelease: false
|
80
|
-
version_requirements: *
|
80
|
+
version_requirements: *2160514180
|
81
81
|
- !ruby/object:Gem::Dependency
|
82
82
|
name: bundler
|
83
|
-
requirement: &
|
83
|
+
requirement: &2160512560 !ruby/object:Gem::Requirement
|
84
84
|
none: false
|
85
85
|
requirements:
|
86
86
|
- - ~>
|
@@ -88,10 +88,10 @@ dependencies:
|
|
88
88
|
version: 1.0.0
|
89
89
|
type: :development
|
90
90
|
prerelease: false
|
91
|
-
version_requirements: *
|
91
|
+
version_requirements: *2160512560
|
92
92
|
- !ruby/object:Gem::Dependency
|
93
93
|
name: jeweler
|
94
|
-
requirement: &
|
94
|
+
requirement: &2160510860 !ruby/object:Gem::Requirement
|
95
95
|
none: false
|
96
96
|
requirements:
|
97
97
|
- - ~>
|
@@ -99,10 +99,10 @@ dependencies:
|
|
99
99
|
version: 1.5.2
|
100
100
|
type: :development
|
101
101
|
prerelease: false
|
102
|
-
version_requirements: *
|
102
|
+
version_requirements: *2160510860
|
103
103
|
- !ruby/object:Gem::Dependency
|
104
104
|
name: rcov
|
105
|
-
requirement: &
|
105
|
+
requirement: &2160509180 !ruby/object:Gem::Requirement
|
106
106
|
none: false
|
107
107
|
requirements:
|
108
108
|
- - ! '>='
|
@@ -110,10 +110,10 @@ dependencies:
|
|
110
110
|
version: '0'
|
111
111
|
type: :development
|
112
112
|
prerelease: false
|
113
|
-
version_requirements: *
|
113
|
+
version_requirements: *2160509180
|
114
114
|
- !ruby/object:Gem::Dependency
|
115
115
|
name: redis
|
116
|
-
requirement: &
|
116
|
+
requirement: &2160504040 !ruby/object:Gem::Requirement
|
117
117
|
none: false
|
118
118
|
requirements:
|
119
119
|
- - ! '>='
|
@@ -121,10 +121,10 @@ dependencies:
|
|
121
121
|
version: '2.0'
|
122
122
|
type: :runtime
|
123
123
|
prerelease: false
|
124
|
-
version_requirements: *
|
124
|
+
version_requirements: *2160504040
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: vegas
|
127
|
-
requirement: &
|
127
|
+
requirement: &2160500820 !ruby/object:Gem::Requirement
|
128
128
|
none: false
|
129
129
|
requirements:
|
130
130
|
- - ! '>='
|
@@ -132,10 +132,10 @@ dependencies:
|
|
132
132
|
version: 0.1.0
|
133
133
|
type: :runtime
|
134
134
|
prerelease: false
|
135
|
-
version_requirements: *
|
135
|
+
version_requirements: *2160500820
|
136
136
|
- !ruby/object:Gem::Dependency
|
137
137
|
name: sinatra
|
138
|
-
requirement: &
|
138
|
+
requirement: &2160499040 !ruby/object:Gem::Requirement
|
139
139
|
none: false
|
140
140
|
requirements:
|
141
141
|
- - ! '>='
|
@@ -143,10 +143,10 @@ dependencies:
|
|
143
143
|
version: '1.0'
|
144
144
|
type: :runtime
|
145
145
|
prerelease: false
|
146
|
-
version_requirements: *
|
146
|
+
version_requirements: *2160499040
|
147
147
|
- !ruby/object:Gem::Dependency
|
148
148
|
name: json
|
149
|
-
requirement: &
|
149
|
+
requirement: &2160480560 !ruby/object:Gem::Requirement
|
150
150
|
none: false
|
151
151
|
requirements:
|
152
152
|
- - ~>
|
@@ -154,7 +154,7 @@ dependencies:
|
|
154
154
|
version: 1.4.6
|
155
155
|
type: :runtime
|
156
156
|
prerelease: false
|
157
|
-
version_requirements: *
|
157
|
+
version_requirements: *2160480560
|
158
158
|
description: Soulmate is a tool to help solve the common problem of developing a fast
|
159
159
|
autocomplete feature. It uses Redis's sorted sets to build an index of partial words
|
160
160
|
and corresponding top matches, and provides a simple sinatra app to query them.
|
@@ -186,6 +186,7 @@ files:
|
|
186
186
|
- soulmate.gemspec
|
187
187
|
- test/db/.gitkeep
|
188
188
|
- test/helper.rb
|
189
|
+
- test/samples/stop-words.txt
|
189
190
|
- test/samples/venues.json
|
190
191
|
- test/test.conf
|
191
192
|
- test/test_soulmate.rb
|
@@ -205,7 +206,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
205
206
|
version: '0'
|
206
207
|
segments:
|
207
208
|
- 0
|
208
|
-
hash:
|
209
|
+
hash: 1384397421883417859
|
209
210
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
210
211
|
none: false
|
211
212
|
requirements:
|