soulheart 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 701b63e892265647a1615c790e81ab3e4132bb0a
4
+ data.tar.gz: baa1a417ca1bedf0efff4cd795a5f0f6679c60e8
5
+ SHA512:
6
+ metadata.gz: 37fdeb72ef3cb8179b4aafc1eb9860e8d96a469a7fe866b51160004fb8298da877496b22d46df5537f0b4f10dd35fb927720c1d3420b7a489179c0a3537b5c46
7
+ data.tar.gz: 8580e37cbd146a1b03d15dbb4ddd913ed1e30c4d4c08c79f7fa4b7b6b46cc63013c0bd47a4902216235491fc9c3b61e5a41fd6b08cb47ee9f8dff0d5ee744a86
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ # rdoc generated
2
+ rdoc
3
+
4
+ # yard generated
5
+ doc
6
+ .yardoc
7
+
8
+ # bundler
9
+ .bundle
10
+
11
+ gem
12
+
13
+ test/db/*.rdb
14
+
15
+ dump.rdb
16
+ tmp/
17
+
18
+ Gemfile.lock
19
+
20
+ .rvmrc
21
+ .ruby-*
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/.travis.yml ADDED
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.0
4
+ bundler_args: "--without=guard"
5
+ notifications:
6
+ disabled: true
7
+ script:
8
+ - bundle exec rake spec
9
+ # - bundle exec rubocop
10
+ addons:
11
+ code_climate:
12
+ repo_token: e72d8fa152922cef05c311cd49d3db9016b82486b712399a8e7c7da2af5e071e
data/Gemfile ADDED
@@ -0,0 +1,22 @@
1
+ source 'http://rubygems.org'
2
+
3
+ group :development do
4
+ gem 'rspec', '~> 2.14.1'
5
+ gem 'bundler'
6
+ gem 'guard'
7
+ gem 'guard-rspec', '~> 4.2.8'
8
+ gem 'rack-contrib'
9
+ gem 'rubocop'
10
+ end
11
+
12
+ group :test do
13
+ gem 'rack-test'
14
+ gem "codeclimate-test-reporter", require: nil
15
+ end
16
+
17
+ gem 'rake'
18
+ gem 'redis', '>= 3.0.1'
19
+ gem 'hiredis', '~> 0.4.5'
20
+ gem 'vegas', '>= 0.1.0'
21
+ gem 'sinatra'
22
+ gem 'multi_json', '>= 1.11.0'
data/Guardfile ADDED
@@ -0,0 +1,9 @@
1
+ rspec_opts = {
2
+ failed_mode: :focus
3
+ }
4
+
5
+ guard :rspec, cmd: "bundle exec rspec" do
6
+ watch(%r{^spec/.+_spec\.rb$})
7
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
8
+ watch(%r{^lib/soulheart/(.+)\.rb$}) { |m| "spec/soulheart/#{m[1]}_spec.rb" }
9
+ end
data/LICENSE.md ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011-2015 Eric Waller, Seth Herr, and Soulheart contributors
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,203 @@
1
+ Soulheart [![Build Status](https://travis-ci.org/sethherr/soulheart.svg)](https://travis-ci.org/sethherr/soulheart) [![Code Climate](https://codeclimate.com/github/sethherr/soulheart/badges/gpa.svg)](https://codeclimate.com/github/sethherr/soulheart) [![Test Coverage](https://codeclimate.com/github/sethherr/soulheart/badges/coverage.svg)](https://codeclimate.com/github/sethherr/soulheart/coverage)
2
+ ========
3
+
4
+ This is an updated fork of [Seatgeek/Soulmate](https://github.com/seatgeek/soulmate) to address a few issues - namely [CORS support](../../issues/2), [minimum entry length](../../issues/3) and [playing better with Selectize & Select2](../../issues/4) - also the future.
5
+
6
+ Since [Seatgeek no longer uses Soulmate](https://news.ycombinator.com/item?id=9317891), and this is a pretty large rewrite that doesn't emphasize backward compatibility, it's a new project and gem.
7
+
8
+ ========
9
+
10
+ **Every item has to have a unique name**
11
+
12
+ Loading data:
13
+
14
+ Pushing new options via rake task. Use a gist url! Do it with .csvs!
15
+
16
+ Structuring data
17
+
18
+ The only required attribute is term, which is the search term to go by. So this would be a valid thing:
19
+
20
+ ```javascript
21
+ [
22
+ { "term": "Something sweet" },
23
+ { "term": "The color blue" }
24
+ ]
25
+ ```
26
+
27
+ Here are all the possible (non-required) values:
28
+
29
+
30
+ | Key | Type | Default |
31
+ | ----------- | ----- | ---------- |
32
+ | `priority` | integer | 100 |
33
+ | `types` | string | 'default' |
34
+ | `data` | hash | {} |
35
+
36
+ e.g.
37
+
38
+ ```javascript
39
+ [
40
+ {
41
+ "term": "Something sweet",
42
+ "priority": 99,
43
+ "types": "niceness", // A set type for searching by type
44
+ "data": { // Any key value pair you want, this is the return value
45
+ "id": 99,
46
+ "url": "http://example.com",
47
+ "whatever": "The color purple"
48
+ }
49
+ },
50
+ { "term": "The color blue" }
51
+ ]
52
+ ```
53
+
54
+ *If you set `term` in `data`, it will respond with that rather than the term it searches by. I haven't figured out a use case for this yet, but I'm sure one exists.*
55
+
56
+ ======
57
+
58
+ I'm testing with:
59
+
60
+ - ruby >= 2.1
61
+ - redis >= 3
62
+
63
+ To test, run `bundle exec guard`. It will live reload the files you change.
64
+
65
+
66
+ ======
67
+
68
+ :x::o::x::o::x::o: *Soulmate's README follows ([issue for making new documentation](../../issues/1))*
69
+
70
+ Soulmate is a tool to help solve the common problem of developing a fast autocomplete feature. It uses Redis's sorted sets to build an index of partially completed words and the corresponding top matching items, and provides a simple sinatra app to query them. Soulmate finishes your sentences.
71
+
72
+ Soulmate was designed to be simple and fast, and offers the following:
73
+
74
+ * Provide suggestions for multiple types of items in a single query (at SeatGeek we're autocompleting for performers, events, and venues)
75
+ * Results are ordered by a user-specified score
76
+ * Arbitrary metadata for each item (at SeatGeek we're storing both a url and a subtitle)
77
+
78
+ An item is a simple JSON object that looks like:
79
+
80
+ {
81
+ "id": 3,
82
+ "term": "Citi Field",
83
+ "score": 81,
84
+ "data": {
85
+ "url": "/citi-field-tickets/",
86
+ "subtitle": "Flushing, NY"
87
+ }
88
+ }
89
+
90
+ Where `id` is a unique identifier (within the specific type), `term` is the phrase you wish to provide completions for, `score` is a user-specified ranking metric (redis will order things lexicographically for items with the same score), and `data` is an optional container for metadata you'd like to return when this item is matched (at SeatGeek we're including a url for the item as well as a subtitle for when we present it in an autocomplete dropdown).
91
+
92
+ Getting Started
93
+ ---------------
94
+
95
+ As always, kick things off with a `gem install`:
96
+
97
+ gem install soulmate
98
+
99
+ ### Loading Items
100
+
101
+ You can load data into Soulmate by piping items in the JSON lines format into `soulmate load TYPE`.
102
+
103
+ Here's a sample `venues.json` (one JSON item per line):
104
+
105
+ {"id":1,"term":"Dodger Stadium","score":85,"data":{"url":"\/dodger-stadium-tickets\/","subtitle":"Los Angeles, CA"}}
106
+ {"id":28,"term":"Angel Stadium","score":85,"data":{"url":"\/angel-stadium-tickets\/","subtitle":"Anaheim, CA"}}
107
+ {"id":30,"term":"Chase Field ","score":85,"data":{"url":"\/chase-field-tickets\/","subtitle":"Phoenix, AZ"}}
108
+ {"id":29,"term":"Sun Life Stadium","score":84,"data":{"url":"\/sun-life-stadium-tickets\/","subtitle":"Miami, FL"}}
109
+ {"id":2,"term":"Turner Field","score":83,"data":{"url":"\/turner-field-tickets\/","subtitle":"Atlanta, GA"}}
110
+
111
+ And here's the load command (Soulmate assumes redis is running locally on the default port, or you can specify a redis connection string with the `--redis` argument):
112
+
113
+ $ soulmate load venue --redis=redis://localhost:6379/0 < venues.json
114
+
115
+ You can also provide an array of strings under the `aliases` key that will also be added to the index for this item.
116
+
117
+ ### Querying for Data
118
+
119
+ Once it's loaded, we can query this data by starting `soulmate-web`:
120
+
121
+ $ soulmate-web --foreground --no-launch --redis=redis://localhost:6379/0
122
+
123
+ And viewing the service in your browser: http://localhost:5678/search?types[]=venue&term=stad. You should see something like:
124
+
125
+ {
126
+ "term": "stad",
127
+ "results": {
128
+ "venue": [
129
+ {
130
+ "id": 28,
131
+ "term": "Angel Stadium",
132
+ "score": 85,
133
+ "data": {
134
+ "url": "/angel-stadium-tickets/",
135
+ "subtitle": "Anaheim, CA"
136
+ }
137
+ },
138
+ {
139
+ "id": 1,
140
+ "term": "Dodger Stadium",
141
+ "score": 85,
142
+ "data": {
143
+ "url": "/dodger-stadium-tickets/",
144
+ "subtitle": "Los Angeles, CA"
145
+ }
146
+ },
147
+ {
148
+ "id": 29,
149
+ "term": "Sun Life Stadium",
150
+ "score": 84,
151
+ "data": {
152
+ "url": "/sun-life-stadium-tickets/",
153
+ "subtitle": "Miami, FL"
154
+ }
155
+ }
156
+ ]
157
+ }
158
+ }
159
+
160
+ The `/search` method supports multiple `types` as well as an optional `limit`. For example: `http://localhost:5678/search?types[]=event&types[]=venue&types[]=performer&limit=3&term=yank`. You can also add the `callback` parameter to enable JSONP output.
161
+
162
+ ### Mounting soulmate into a rails app
163
+
164
+ If you are integrating Soulmate into a rails app, an alternative to launching a separate 'soulmate-web' server is to mount the sinatra app inside of rails.
165
+
166
+ Add this to routes.rb:
167
+
168
+ mount Soulmate::Server, :at => "/sm"
169
+
170
+ Add this to gemfile:
171
+
172
+ gem 'rack-contrib'
173
+ gem 'soulmate', :require => 'soulmate/server'
174
+
175
+ Then you can query soulmate at the /sm url, for example: http://localhost:3000/sm/search?types[]=venues&limit=6&term=kitten
176
+
177
+ You can also config your redis instance:
178
+
179
+ # config/initializers/soulmate.rb
180
+
181
+ Soulmate.redis = 'redis://127.0.0.1:6379/0'
182
+ # or you can asign an existing instance of Redis, Redis::Namespace, etc.
183
+ # Soulmate.redis = $redis
184
+
185
+ ### Rendering an autocompleter
186
+
187
+ Soulmate doesn't include any client-side code necessary to render an autocompleter, but Mitch Crowe put together a pretty cool looking jquery plugin designed for exactly that: <a href="https://github.com/mcrowe/soulmate.js">soulmate.js</a>.
188
+
189
+ Contributing to soulmate
190
+ ------------------------
191
+
192
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
193
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
194
+ * Fork the project
195
+ * Start a feature/bugfix branch
196
+ * Commit and push until you are happy with your contribution
197
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
198
+
199
+ Copyright
200
+ ---------
201
+
202
+ Copyright (c) 2011 Eric Waller. See LICENSE.txt for further details.
203
+
data/Rakefile ADDED
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/env rake
2
+
3
+ require "bundler/gem_helper"
4
+
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ REDIS_DIR = File.expand_path(File.join("..", "test"), __FILE__)
15
+ REDIS_CNF = File.join(REDIS_DIR, "test.conf")
16
+ REDIS_PID = File.join(REDIS_DIR, "db", "redis.pid")
17
+ REDIS_LOCATION = ENV['REDIS_LOCATION']
18
+
19
+ desc "Run rcov and manage server start/stop"
20
+ task :rcoverage => [:start, :rcov, :stop]
21
+
22
+ desc "Start the Redis server"
23
+ task :start do
24
+ redis_running = \
25
+ begin
26
+ File.exists?(REDIS_PID) && Process.kill(0, File.read(REDIS_PID).to_i)
27
+ rescue Errno::ESRCH
28
+ FileUtils.rm REDIS_PID
29
+ false
30
+ end
31
+
32
+ if REDIS_LOCATION
33
+ system "#{REDIS_LOCATION}/redis-server #{REDIS_CNF}" unless redis_running
34
+ else
35
+ system "redis-server #{REDIS_CNF}" unless redis_running
36
+ end
37
+ end
38
+
39
+ desc "Stop the Redis server"
40
+ task :stop do
41
+ if File.exists?(REDIS_PID)
42
+ Process.kill "INT", File.read(REDIS_PID).to_i
43
+ FileUtils.rm REDIS_PID
44
+ end
45
+ end
46
+
47
+
48
+ require "rspec/core/rake_task"
49
+
50
+ desc "Run all specs"
51
+ RSpec::Core::RakeTask.new(:spec) do |t|
52
+ t.rspec_opts = %w(--color)
53
+ t.verbose = false
54
+ end
55
+
56
+ task :default => :spec
data/bin/soulheart ADDED
@@ -0,0 +1,181 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ begin
5
+ require 'redis'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'redis'
9
+ end
10
+ require 'soulheart'
11
+ require 'optparse'
12
+ require 'tempfile'
13
+
14
+ parser = OptionParser.new do |opts|
15
+ opts.banner = "Usage: soulheart [options] COMMAND"
16
+
17
+ opts.separator ""
18
+ opts.separator "Options:"
19
+
20
+ opts.on("-r", "--redis [HOST:PORT]", "Redis connection string") do |host|
21
+ Soulheart.redis = host
22
+ end
23
+
24
+ opts.on("-s", "--stop-words [FILE]", "Path to file containing a list of stop words") do |fn|
25
+ File.open(fn) do |file|
26
+ Soulheart.stop_words = file.readlines.map{ |l| l.strip }.reject{ |w| w.empty? }
27
+ end
28
+ end
29
+
30
+ opts.on("-h", "--help", "Show this message") do
31
+ puts opts
32
+ exit
33
+ end
34
+
35
+ opts.on("-b", "--batch-size", "Number of lines to read at a time") do |size|
36
+ BATCH_SIZE = size
37
+ end
38
+
39
+ opts.separator ""
40
+ opts.separator "Commands:"
41
+ opts.separator " load TYPE FILE Replaces collection specified by TYPE with items read from FILE in the JSON lines format."
42
+ opts.separator " add TYPE Adds items to collection specified by TYPE read from stdin in the JSON lines format."
43
+ opts.separator " remove TYPE Removes items from collection specified by TYPE read from stdin in the JSON lines format. Items only require an 'id', all other fields are ignored."
44
+ opts.separator " query TYPE QUERY Queries for items from collection specified by TYPE."
45
+ end
46
+
47
+ def generate(type, file)
48
+ include Soulheart::Helpers
49
+
50
+ begin
51
+ temp = Tempfile.new("soulheart")
52
+
53
+ if File.exists?(file)
54
+ start_time = Time.now.to_i
55
+ base = "soulheart-index:#{type}"
56
+ database = "soulheart-data:#{type}"
57
+ # hset = "*4\r\n$4\r\nHSET\r\n$#{database.length}\r\n#{database}\r\n$"
58
+ # del = "*2\r\n$3\r\nDEL\r\n$"
59
+ begin
60
+ f = File.open(file)
61
+ # cleanup
62
+ phrases = Soulheart.redis.smembers(base)
63
+ phrases.each do |phrase|
64
+ temp << gen_redis_proto("DEL", phrase)
65
+ # temp << del + phrase.length.to_s + "\r\n" + phrase + "\r\n"
66
+ end
67
+ temp << gen_redis_proto("DEL", base)
68
+ # temp << del + base.length.to_s + "\r\n" + base + "\r\n"
69
+ while !f.eof?
70
+ line = f.gets.chomp
71
+ line =~ /"id":(\d+)/
72
+ id = $1
73
+ line =~ /"score":(\d+)/
74
+ score = $1
75
+ json = MultiJson.decode(line)
76
+ temp << gen_redis_proto("HSET", database, id, line)
77
+ # temp << hset + $1.length.to_s + "\r\n" + $1 + "\r\n$" + line.length.to_s + "\r\n" + line + "\r\n"
78
+ phrase = json.key?("aliases") ? json["term"] + " " + json["aliases"] : json["term"]
79
+ prefixes_for_phrase(phrase).each do |p|
80
+ temp << gen_redis_proto("SADD", base, p)
81
+ temp << gen_redis_proto("ZADD", base + ":" + p, score, id)
82
+ end
83
+ end
84
+ ensure
85
+ f.close
86
+ end
87
+ puts "Converted in #{Time.now.to_i - start_time} second(s)"
88
+ puts "Importing into redis ..."
89
+ `time redis-cli --pipe < #{temp.path}`
90
+ else
91
+ puts "Couldn't open file: #{file}"
92
+ end
93
+ ensure
94
+ temp.close
95
+ end
96
+ end
97
+
98
+ def load(file)
99
+ if File.exists?(file)
100
+ start_time = Time.now.to_i
101
+ loader = Soulheart::Loader.new
102
+
103
+ puts "Loading items in batches of #{BATCH_SIZE} ..."
104
+ count = 0
105
+ begin
106
+ f = File.open(file)
107
+ while !f.eof?
108
+ lines = []
109
+ BATCH_SIZE.times do
110
+ break if f.eof?
111
+ lines << MultiJson.decode(f.gets)
112
+ count += 1
113
+ end
114
+ loader.load(lines)
115
+ puts "Loaded #{count} items ..." unless f.eof?
116
+ end
117
+ ensure
118
+ f.close
119
+ end
120
+ puts "Loaded a total of #{count} items in #{Time.now.to_i - start_time} second(s)"
121
+ else
122
+ puts "Couldn't open file: #{file}"
123
+ end
124
+ end
125
+
126
+ def add(type)
127
+ puts "Adding items of type #{type}..."
128
+ loader = Soulheart::Loader.new(type)
129
+ items = $stdin.read.split("\n").map { |l| MultiJson.decode(l) }
130
+ items.each do |item|
131
+ loader.add(item)
132
+ end
133
+ puts "Loaded a total of #{items.size} items"
134
+ end
135
+
136
+ def remove(type)
137
+ puts "Removing items of type #{type}..."
138
+ loader = Soulheart::Loader.new(type)
139
+ items = $stdin.read.split("\n").map { |l| MultiJson.decode(l) }
140
+ items.each do |item|
141
+ loader.remove(item)
142
+ end
143
+ puts "Removed a total of #{items.size} items"
144
+ end
145
+
146
+ def query(type, query)
147
+ puts "> Querying '#{type}' for '#{query}'"
148
+ matcher = Soulheart::Matcher.new(type)
149
+ results = matcher.matches_for_term(query, :limit => 0)
150
+ results.each do |item|
151
+ puts MultiJson.encode(item)
152
+ end
153
+ puts "> Found #{results.size} matches"
154
+ end
155
+
156
+ def gen_redis_proto(*cmd)
157
+ proto = "*"+cmd.length.to_s+"\r\n"
158
+ cmd.each{|arg|
159
+ proto << "$"+arg.bytesize.to_s+"\r\n"
160
+ proto << arg+"\r\n"
161
+ }
162
+ proto
163
+ end
164
+
165
+ parser.parse!
166
+ BATCH_SIZE ||= 1000
167
+
168
+ case ARGV[0]
169
+ when 'generate'
170
+ generate ARGV[1], ARGV[2]
171
+ when 'load'
172
+ load ARGV[1]
173
+ when 'add'
174
+ add ARGV[1]
175
+ when 'remove'
176
+ remove ARGV[1]
177
+ when 'query'
178
+ query ARGV[1], ARGV[2]
179
+ else
180
+ puts parser.help
181
+ end