soulmate 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.5.2"
12
+ gem "rcov", ">= 0"
13
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,20 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ git (1.2.5)
5
+ jeweler (1.5.2)
6
+ bundler (~> 1.0.0)
7
+ git (>= 1.2.5)
8
+ rake
9
+ rake (0.8.7)
10
+ rcov (0.9.9)
11
+ shoulda (2.11.3)
12
+
13
+ PLATFORMS
14
+ ruby
15
+
16
+ DEPENDENCIES
17
+ bundler (~> 1.0.0)
18
+ jeweler (~> 1.5.2)
19
+ rcov
20
+ shoulda
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Eric Waller
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = soulmate
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to soulmate
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2011 Eric Waller. See LICENSE.txt for
18
+ further details.
19
+
data/Rakefile ADDED
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "soulmate"
16
+ gem.homepage = "http://github.com/seatgeek/soulmate"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{Redis-backed service for fast autocompleting - extracted from SeatGeek}
19
+ gem.description = %Q{Soulmate is a tool to help solve the common problem of developing a fast autocomplete feature. It uses Redis's sorted sets to build an index of partial words and corresponding top matches, and provides a simple sinatra app to query them. Soulmate finished your sentences.}
20
+ gem.email = "eric@seatgeek.com"
21
+ gem.authors = ["Eric Waller"]
22
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
23
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
+ gem.add_runtime_dependency 'redis', '>= 2.0'
25
+ gem.add_runtime_dependency 'vegas', '>= 0.1.0'
26
+ gem.add_runtime_dependency 'sinatra', '>= 1.0'
27
+ gem.add_runtime_dependency 'json', '~> 1.4.6'
28
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
29
+ end
30
+ Jeweler::RubygemsDotOrgTasks.new
31
+
32
+ require 'rake/testtask'
33
+ Rake::TestTask.new(:test) do |test|
34
+ test.libs << 'lib' << 'test'
35
+ test.pattern = 'test/**/test_*.rb'
36
+ test.verbose = true
37
+ end
38
+
39
+ require 'rcov/rcovtask'
40
+ Rcov::RcovTask.new do |test|
41
+ test.libs << 'test'
42
+ test.pattern = 'test/**/test_*.rb'
43
+ test.verbose = true
44
+ end
45
+
46
+ task :default => :test
47
+
48
+ require 'rake/rdoctask'
49
+ Rake::RDocTask.new do |rdoc|
50
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
51
+
52
+ rdoc.rdoc_dir = 'rdoc'
53
+ rdoc.title = "soulmate #{version}"
54
+ rdoc.rdoc_files.include('README*')
55
+ rdoc.rdoc_files.include('lib/**/*.rb')
56
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
data/bin/soulmate ADDED
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ begin
5
+ require 'redis'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'redis'
9
+ end
10
+ require 'soulmate'
11
+ require 'optparse'
12
+
13
+ parser = OptionParser.new do |opts|
14
+ opts.banner = "Usage: soulmate [options] COMMAND"
15
+
16
+ opts.separator ""
17
+ opts.separator "Options:"
18
+
19
+ opts.on("-r", "--redis [HOST:PORT]", "Redis connection string") do |host|
20
+ Soulmate.redis = host
21
+ end
22
+
23
+ opts.on("-h", "--help", "Show this message") do
24
+ puts opts
25
+ exit
26
+ end
27
+
28
+ opts.separator ""
29
+ opts.separator "Commands:"
30
+ opts.separator " load TYPE Loads items of specified type read from stdin in the JSON lines format"
31
+ end
32
+
33
+ def load(type)
34
+ puts "Loading items of type #{type}..."
35
+ items = $stdin.read.split("\n").map { |l| JSON.parse(l) }
36
+ total = Soulmate::Loader.new(type).load(items)
37
+ puts "Loaded a total of #{total} items"
38
+ end
39
+
40
+ parser.parse!
41
+
42
+ case ARGV[0]
43
+ when 'load'
44
+ load ARGV[1]
45
+ else
46
+ puts parser.help
47
+ end
data/bin/soulmate-web ADDED
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ begin
5
+ require 'vegas'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'vegas'
9
+ end
10
+ require 'soulmate/server'
11
+
12
+
13
+ Vegas::Runner.new(Soulmate::Server, 'soulmate-web', {
14
+ :before_run => lambda {|v|
15
+ # path = (ENV['RESQUECONFIG'] || v.args.first)
16
+ # load path.to_s.strip if path
17
+ }
18
+ }) do |runner, opts, app|
19
+ opts.on("-r", "--redis [HOST:PORT]", "Redis connection string") do |host|
20
+ runner.logger.info "Using Redis connection string '#{host}'"
21
+ Soulmate.redis = host
22
+ end
23
+ end
@@ -0,0 +1,25 @@
1
+ module Soulmate
2
+
3
+ class Base
4
+
5
+ include Helpers
6
+
7
+ attr_accessor :type
8
+
9
+ def initialize(type)
10
+ @type = normalize(type)
11
+ end
12
+
13
+ def base
14
+ "soulmate-index:#{type}"
15
+ end
16
+
17
+ def database
18
+ "soulmate-data:#{type}"
19
+ end
20
+
21
+ def cachebase
22
+ "soulmate-cache:#{type}"
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,16 @@
1
+ module Soulmate
2
+ module Helpers
3
+
4
+ def prefixes_for_phrase(phrase)
5
+ words = normalize(phrase).split(' ')
6
+ words.map do |w|
7
+ (MIN_COMPLETE-1..(w.length-1)).map{ |l| w[0..l] }
8
+ end.flatten
9
+ end
10
+
11
+ def normalize(str)
12
+ str.downcase.gsub(/[^a-z0-9 ]/i, '').strip
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,44 @@
1
+ module Soulmate
2
+
3
+ class Loader < Base
4
+
5
+ def load(items)
6
+ # delete the sorted sets for this type
7
+ # wrap in multi/exec?
8
+ phrases = Soulmate.redis.smembers(base)
9
+ phrases.each do |p|
10
+ Soulmate.redis.del("#{base}:#{p}")
11
+ end
12
+ Soulmate.redis.del(base)
13
+
14
+ # Redis can continue serving cached requests for this type while the reload is
15
+ # occuring. Some requests may be cached incorrectly as empty set (for requests
16
+ # which come in after the above delete, but before the loading completes). But
17
+ # everything will work itself out as soon as the cache expires again.
18
+
19
+ # delete the data stored for this type
20
+ Soulmate.redis.del(database)
21
+
22
+ items_loaded = 0
23
+ items.each_with_index do |item, i|
24
+ id = item["id"]
25
+ term = item["term"]
26
+ score = item["score"]
27
+
28
+ if id and term
29
+ # store the raw data in a separate key to reduce memory usage
30
+ Soulmate.redis.hset(database, id, JSON.dump(item))
31
+
32
+ prefixes_for_phrase(term).each do |p|
33
+ Soulmate.redis.sadd(base, p) # remember this prefix in a master set
34
+ Soulmate.redis.zadd("#{base}:#{p}", score, id) # store the id of this term in the index
35
+ end
36
+ items_loaded += 1
37
+ end
38
+ puts "added #{i} entries" if i % 100 == 0 and i != 0
39
+ end
40
+
41
+ items_loaded
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,24 @@
1
+ module Soulmate
2
+
3
+ class Matcher < Base
4
+
5
+ def matches_for_term(term, options = {})
6
+ words = normalize(term).split(' ').reject do |w|
7
+ w.size < MIN_COMPLETE or STOP_WORDS.include?(w)
8
+ end.sort
9
+
10
+ options[:limit] ||= 5
11
+
12
+ cachekey = "#{cachebase}:" + words.join('|')
13
+
14
+ if !Soulmate.redis.exists(cachekey)
15
+ interkeys = words.map { |w| "#{base}:#{w}" }
16
+ Soulmate.redis.zinterstore(cachekey, interkeys)
17
+ Soulmate.redis.expire(cachekey, 10 * 60) # expire after 10 minutes
18
+ end
19
+
20
+ ids = Soulmate.redis.zrevrange(cachekey, 0, options[:limit] - 1)
21
+ ids.size > 0 ? Soulmate.redis.hmget(database, *ids).map { |r| JSON.parse(r) } : []
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,39 @@
1
+ require 'sinatra/base'
2
+ require 'soulmate'
3
+
4
+ module Soulmate
5
+
6
+ class Server < Sinatra::Base
7
+ include Helpers
8
+
9
+ before do
10
+ content_type 'application/json', :charset => 'utf-8'
11
+ end
12
+
13
+ get '/' do
14
+ '{ "soulmate": 0.1.0, "status": "ok" }'
15
+ end
16
+
17
+ get '/search' do
18
+ limit = (params[:limit] || 5).to_i
19
+ types = params[:types].map { |t| normalize(t) }
20
+ term = params[:term]
21
+
22
+ results = {}
23
+ types.each do |type|
24
+ matcher = Soulmate::Matcher.new(type)
25
+ results[type] = matcher.matches_for_term(term, :limit => limit)
26
+ end
27
+
28
+ JSON.pretty_generate({
29
+ :term => params[:term],
30
+ :results => results
31
+ })
32
+ end
33
+
34
+ not_found do
35
+ '{ "error": "not found" }'
36
+ end
37
+
38
+ end
39
+ end
data/lib/soulmate.rb ADDED
@@ -0,0 +1,36 @@
1
+ require 'uri'
2
+ require 'json'
3
+ require 'redis'
4
+
5
+ require 'soulmate/helpers'
6
+ require 'soulmate/base'
7
+ require 'soulmate/matcher'
8
+ require 'soulmate/loader'
9
+
10
+ module Soulmate
11
+
12
+ extend self
13
+
14
+ MIN_COMPLETE = 2
15
+ STOP_WORDS = ["vs", "at"]
16
+
17
+ def redis=(url)
18
+ @redis = nil
19
+ @redis_url = url
20
+ redis
21
+ end
22
+
23
+ def redis
24
+ @redis ||= (
25
+ url = URI(@redis_url || "redis://127.0.0.1:6379/0")
26
+
27
+ ::Redis.new({
28
+ :host => url.host,
29
+ :port => url.port,
30
+ :db => url.path[1..-1],
31
+ :password => url.password
32
+ })
33
+ )
34
+ end
35
+
36
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'soulmate'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestSoulmate < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,197 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: soulmate
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 0
9
+ version: 0.0.0
10
+ platform: ruby
11
+ authors:
12
+ - Eric Waller
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-02-14 00:00:00 -05:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: shoulda
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ version: "0"
30
+ type: :development
31
+ prerelease: false
32
+ version_requirements: *id001
33
+ - !ruby/object:Gem::Dependency
34
+ name: bundler
35
+ requirement: &id002 !ruby/object:Gem::Requirement
36
+ none: false
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ segments:
41
+ - 1
42
+ - 0
43
+ - 0
44
+ version: 1.0.0
45
+ type: :development
46
+ prerelease: false
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: jeweler
50
+ requirement: &id003 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 1
57
+ - 5
58
+ - 2
59
+ version: 1.5.2
60
+ type: :development
61
+ prerelease: false
62
+ version_requirements: *id003
63
+ - !ruby/object:Gem::Dependency
64
+ name: rcov
65
+ requirement: &id004 !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ segments:
71
+ - 0
72
+ version: "0"
73
+ type: :development
74
+ prerelease: false
75
+ version_requirements: *id004
76
+ - !ruby/object:Gem::Dependency
77
+ name: redis
78
+ requirement: &id005 !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ segments:
84
+ - 2
85
+ - 0
86
+ version: "2.0"
87
+ type: :runtime
88
+ prerelease: false
89
+ version_requirements: *id005
90
+ - !ruby/object:Gem::Dependency
91
+ name: vegas
92
+ requirement: &id006 !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ segments:
98
+ - 0
99
+ - 1
100
+ - 0
101
+ version: 0.1.0
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: *id006
105
+ - !ruby/object:Gem::Dependency
106
+ name: sinatra
107
+ requirement: &id007 !ruby/object:Gem::Requirement
108
+ none: false
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ segments:
113
+ - 1
114
+ - 0
115
+ version: "1.0"
116
+ type: :runtime
117
+ prerelease: false
118
+ version_requirements: *id007
119
+ - !ruby/object:Gem::Dependency
120
+ name: json
121
+ requirement: &id008 !ruby/object:Gem::Requirement
122
+ none: false
123
+ requirements:
124
+ - - ~>
125
+ - !ruby/object:Gem::Version
126
+ segments:
127
+ - 1
128
+ - 4
129
+ - 6
130
+ version: 1.4.6
131
+ type: :runtime
132
+ prerelease: false
133
+ version_requirements: *id008
134
+ description: Soulmate is a tool to help solve the common problem of developing a fast autocomplete feature. It uses Redis's sorted sets to build an index of partial words and corresponding top matches, and provides a simple sinatra app to query them. Soulmate finished your sentences.
135
+ email: eric@seatgeek.com
136
+ executables:
137
+ - soulmate
138
+ - soulmate-web
139
+ extensions: []
140
+
141
+ extra_rdoc_files:
142
+ - LICENSE.txt
143
+ - README.rdoc
144
+ files:
145
+ - .document
146
+ - Gemfile
147
+ - Gemfile.lock
148
+ - LICENSE.txt
149
+ - README.rdoc
150
+ - Rakefile
151
+ - VERSION
152
+ - bin/soulmate
153
+ - bin/soulmate-web
154
+ - lib/soulmate.rb
155
+ - lib/soulmate/base.rb
156
+ - lib/soulmate/helpers.rb
157
+ - lib/soulmate/loader.rb
158
+ - lib/soulmate/matcher.rb
159
+ - lib/soulmate/server.rb
160
+ - test/helper.rb
161
+ - test/test_soulmate.rb
162
+ has_rdoc: true
163
+ homepage: http://github.com/seatgeek/soulmate
164
+ licenses:
165
+ - MIT
166
+ post_install_message:
167
+ rdoc_options: []
168
+
169
+ require_paths:
170
+ - lib
171
+ required_ruby_version: !ruby/object:Gem::Requirement
172
+ none: false
173
+ requirements:
174
+ - - ">="
175
+ - !ruby/object:Gem::Version
176
+ hash: -513163149
177
+ segments:
178
+ - 0
179
+ version: "0"
180
+ required_rubygems_version: !ruby/object:Gem::Requirement
181
+ none: false
182
+ requirements:
183
+ - - ">="
184
+ - !ruby/object:Gem::Version
185
+ segments:
186
+ - 0
187
+ version: "0"
188
+ requirements: []
189
+
190
+ rubyforge_project:
191
+ rubygems_version: 1.3.7
192
+ signing_key:
193
+ specification_version: 3
194
+ summary: Redis-backed service for fast autocompleting - extracted from SeatGeek
195
+ test_files:
196
+ - test/helper.rb
197
+ - test/test_soulmate.rb