soulheart 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/bin/soulheart-web ADDED
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ begin
5
+ require 'vegas'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'vegas'
9
+ end
10
+ require 'soulheart/server'
11
+
12
+
13
+ Vegas::Runner.new(Soulheart::Server, 'soulheart-web', {
14
+ :before_run => lambda {|v|
15
+ # path = (ENV['RESQUECONFIG'] || v.args.first)
16
+ # load path.to_s.strip if path
17
+ }
18
+ }) do |runner, opts, app|
19
+ opts.on("-r", "--redis [HOST:PORT]", "Redis connection string") do |host|
20
+ runner.logger.info "Using Redis connection string '#{host}'"
21
+ Soulheart.redis = host
22
+ end
23
+ opts.on("-s", "--stop-words [FILE]", "Path to file containing a list of stop words") do |fn|
24
+ File.open(fn) do |file|
25
+ Soulheart.stop_words = file.readlines.map{ |l| l.strip }.reject{ |w| w.empty? }
26
+ end
27
+ end
28
+ end
data/lib/soulheart.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'multi_json'
2
+
3
+ require 'soulheart/version'
4
+ require 'soulheart/helpers'
5
+ require 'soulheart/base'
6
+ require 'soulheart/matcher'
7
+ require 'soulheart/loader'
8
+ require 'soulheart/config'
9
+
10
+ module Soulheart
11
+ extend Config
12
+ end
@@ -0,0 +1,54 @@
1
+ module Soulheart
2
+
3
+ class Base
4
+
5
+ include Helpers
6
+
7
+ attr_accessor :type
8
+
9
+ def redis
10
+ Soulheart.redis
11
+ end
12
+
13
+ def cache_length
14
+ 10 * 60 # Setting to 10 minutes, but making it possible to edit down the line
15
+ end
16
+
17
+ def base_id
18
+ ENV['RACK_ENV'] != 'test' ? "soulheart:" : "soulheart_test:"
19
+ end
20
+
21
+ def set_category_combos_array
22
+ redis.expire category_combos_id, 0
23
+ ar = redis.smembers(categories_id).map{ |c| normalize(c) }.uniq.sort
24
+ ar = 1.upto(ar.size).flat_map {|n| ar.combination(n).map{|el| el.join('')}}
25
+ ar.last.replace('all')
26
+ redis.sadd category_combos_id, ar
27
+ ar
28
+ end
29
+
30
+ def category_combos_id
31
+ "#{base_id}category_combos:"
32
+ end
33
+
34
+ def categories_id
35
+ "#{base_id}categories:"
36
+ end
37
+
38
+ def category_id(name='all')
39
+ "#{categories_id}#{name}:"
40
+ end
41
+
42
+ def no_query_id(category=category_id)
43
+ "all:#{category}"
44
+ end
45
+
46
+ def results_hashes_id
47
+ "#{base_id}database:"
48
+ end
49
+
50
+ def cache_id(type='all')
51
+ "#{base_id}:cache:#{type}:"
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,45 @@
1
+ require 'uri'
2
+ require 'redis'
3
+
4
+ module Soulheart
5
+ module Config
6
+ DEFAULT_STOP_WORDS = ["vs", "at", "the"]
7
+
8
+ # Accepts:
9
+ # 1. A Redis URL String 'redis://host:port/db'
10
+ # 2. An existing instance of Redis, Redis::Namespace, etc.
11
+ def redis=(server)
12
+ if server.is_a?(String)
13
+ @redis = nil
14
+ @redis_url = server
15
+ else
16
+ @redis = server
17
+ end
18
+
19
+ redis
20
+ end
21
+
22
+ # Returns the current Redis connection. If none has been created, will
23
+ # create a new one.
24
+ def redis
25
+ @redis ||= (
26
+ url = URI(@redis_url || ENV["REDIS_URL"] || "redis://127.0.0.1:6379/0")
27
+ ::Redis.new({
28
+ # driver: :hiredis,
29
+ host: url.host,
30
+ port: url.port,
31
+ db: url.path[1..-1],
32
+ password: url.password
33
+ })
34
+ )
35
+ end
36
+
37
+ def stop_words
38
+ @stop_words ||= DEFAULT_STOP_WORDS
39
+ end
40
+
41
+ def stop_words=(arr)
42
+ @stop_words = Array(arr).flatten
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+
3
+ module Soulheart
4
+ module Helpers
5
+ def blank?
6
+ respond_to?(:empty?) ? !!empty? : !self
7
+ end
8
+
9
+ def normalize(str)
10
+ # Letter, Mark, Number, Connector_Punctuation (Chinese, Japanese, etc.)
11
+ str.downcase.gsub(/[^\p{Word}\ ]/i, '').strip
12
+ end
13
+
14
+ def prefixes_for_phrase(phrase)
15
+ words = normalize(phrase).split(' ').reject do |w|
16
+ Soulheart.stop_words.include?(w)
17
+ end
18
+ words.map do |w|
19
+ (0..(w.length-1)).map{ |l| w[0..l] }
20
+ end.flatten.uniq
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,106 @@
1
+ module Soulheart
2
+
3
+ class Loader < Base
4
+
5
+ def default_items_hash(text, category)
6
+ category ||= 'default'
7
+ {
8
+ 'category' => normalize(category),
9
+ 'priority' => 100,
10
+ 'term' => normalize(text),
11
+ 'aliases' => [],
12
+ 'data' => {
13
+ 'text' => text,
14
+ 'category' => category
15
+ },
16
+ }
17
+ end
18
+
19
+ def delete_categories
20
+ redis.expire categories_id, 0
21
+ end
22
+
23
+ def reset_categories(categories)
24
+ delete_categories
25
+ redis.sadd categories_id, categories
26
+ end
27
+
28
+ def delete_data
29
+ # delete the sorted sets for this type
30
+ phrases = redis.smembers(base_id)
31
+ redis.pipelined do
32
+ phrases.each do |p|
33
+ redis.del("#{base_id}:#{p}")
34
+ end
35
+ redis.del(base_id)
36
+ end
37
+
38
+ # Redis can continue serving cached requests for this type while the reload is
39
+ # occuring. Some requests may be cached incorrectly as empty set (for requests
40
+ # which come in after the above delete, but before the loading completes). But
41
+ # everything will work itself out as soon as the cache expires again.
42
+
43
+ # delete the data stored for this type
44
+ redis.del(results_hashes_id)
45
+ end
46
+
47
+ def load(items)
48
+ delete_data
49
+ # Replace with item return so we know we have category_id
50
+ items.each { |item| item.replace(add_item(item)) }
51
+ set_category_combos_array.each do |category_combo|
52
+ items.each do |item|
53
+ next unless category_combo.match(item['category']) || category_combo == 'all'
54
+ add_item(item, category_id(category_combo), cleaned: true) # send it base
55
+ end
56
+ end
57
+ end
58
+
59
+ def clean(item)
60
+ raise ArgumentError, "Items must have text" unless item["text"]
61
+ default_items_hash(item.delete('text'), item.delete('category')).
62
+ tap{ |i| i['data'].merge!(item.delete('data')) if item['data'] }.
63
+ merge item
64
+ end
65
+
66
+ def add_item(item, category_base_id=nil, cleaned: false)
67
+ unless cleaned
68
+ item = clean(item)
69
+ category_base_id ||= category_id(item['category'])
70
+ unless redis.smembers(categories_id).include?(item['category'])
71
+ redis.sadd categories_id, item['category']
72
+ end
73
+ end
74
+ redis.pipelined do
75
+ redis.zadd(no_query_id(category_base_id), item["priority"], item["term"]) # Add to master set for queryless searches
76
+ # store the raw data in a separate key to reduce memory usage, if it's cleaned it's done
77
+ redis.hset(results_hashes_id, item['term'], MultiJson.encode(item['data'])) unless cleaned
78
+ phrase = ([item["term"]] + (item["aliases"] || [])).join(' ')
79
+ # Store all the prefixes
80
+ prefixes_for_phrase(phrase).each do |p|
81
+ redis.sadd(base_id, p) unless cleaned # remember prefix in a master set
82
+ # store the normalized term in the index for each of the categories
83
+ redis.zadd("#{category_base_id}#{p}", item["priority"], item["term"])
84
+ end
85
+ end
86
+ item
87
+ end
88
+
89
+ # remove only cares about an item's id, but for consistency takes an object
90
+ def remove(item)
91
+ prev_item = Soulheart.redis.hget(base_id, item["term"])
92
+ if prev_item
93
+ prev_item = MultiJson.decode(prev_item)
94
+ # undo the operations done in add
95
+ Soulheart.redis.pipelined do
96
+ Soulheart.redis.hdel(base_id, prev_item["term"])
97
+ phrase = ([prev_item["term"]] + (prev_item["aliases"] || [])).join(' ')
98
+ prefixes_for_phrase(phrase).each do |p|
99
+ Soulheart.redis.srem(base_id, p)
100
+ Soulheart.redis.zrem("#{base_id}:#{p}", prev_item["term"])
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,69 @@
1
+ module Soulheart
2
+
3
+ class Matcher < Base
4
+ def initialize(params={})
5
+ @opts = self.class.default_params_hash.merge params
6
+ clean_opts
7
+ end
8
+
9
+ def self.default_params_hash
10
+ {
11
+ 'page' => 0,
12
+ 'per_page' => 5,
13
+ 'categories' => [],
14
+ 'query' => '',
15
+ 'cache' => true
16
+ }
17
+ end
18
+
19
+ def clean_opts
20
+ unless @opts['categories'] == '' || @opts['categories'] == []
21
+ @opts['categories'] = @opts['categories'].split(/,|\+/) unless @opts['categories'].kind_of?(Array)
22
+ @opts['categories'] = @opts['categories'].map{ |s| normalize(s) }.uniq.sort
23
+ @opts['categories'] = [] if @opts['categories'].length == redis.scard(categories_id)
24
+ end
25
+ @opts['query'] = normalize(@opts['query']).split(' ')
26
+ # .reject{ |i| i && i.length > 0 } .split(' ').reject{ Soulmate.stop_words.include?(w) }
27
+ @opts
28
+ end
29
+
30
+ def categories_string
31
+ @opts['categories'].empty? ? 'all' : @opts['categories'].join('')
32
+ end
33
+
34
+ def category_id_from_opts
35
+ category_id(categories_string)
36
+ end
37
+
38
+ def cache_id_from_opts
39
+ "#{cache_id(categories_string)}#{@opts['query'].join(':')}:"
40
+ end
41
+
42
+ def interkeys_from_opts(cid)
43
+ # If there isn't a query, we use a special key in redis
44
+ @opts['query'].empty? ? [no_query_id(cid)] : @opts['query'].map { |w| "#{cid}#{w}" }
45
+ end
46
+
47
+ def matches
48
+ cachekey = cache_id_from_opts
49
+ cid = category_id_from_opts
50
+
51
+ if !@opts['cache'] || !redis.exists(cachekey) || redis.exists(cachekey) == 0
52
+ interkeys = interkeys_from_opts(cid)
53
+ redis.zinterstore(cachekey, interkeys)
54
+ redis.expire(cachekey, cache_length) # cache_length is set in base.rb
55
+ end
56
+ page = @opts['page'].to_i
57
+ per_page = @opts['per_page'].to_i
58
+ ids = redis.zrevrange(cachekey, page*per_page, per_page-1) # Using 'ids', even though keys are now terms - because clarity?
59
+ if ids.size > 0
60
+ results = redis.hmget(results_hashes_id, *ids)
61
+ results = results.reject{ |r| r.nil? } # handle cached results for ids which have since been deleted
62
+ results.map { |r| MultiJson.decode(r) }
63
+ else
64
+ []
65
+ end
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,33 @@
1
+ require 'sinatra/base'
2
+ require 'soulheart'
3
+ require 'rack/contrib'
4
+
5
+ module Soulheart
6
+
7
+ class Server < Sinatra::Base
8
+ include Helpers
9
+
10
+ before do
11
+ content_type 'application/json', :charset => 'utf-8'
12
+ headers['Access-Control-Allow-Origin'] = '*'
13
+ headers['Access-Control-Allow-Methods'] = 'POST, PUT, GET, OPTIONS'
14
+ headers['Access-Control-Request-Method'] = '*'
15
+ headers['Access-Control-Allow-Headers'] = 'Origin, X-Requested-With, Content-Type, Accept, Authorization'
16
+ end
17
+
18
+ get '/' do
19
+ matches = Matcher.new(params).matches
20
+ MultiJson.encode({ results: matches })
21
+ end
22
+
23
+ get '/status' do
24
+ MultiJson.encode({ soulheart: Soulheart::VERSION, :status => "ok" })
25
+ end
26
+
27
+ not_found do
28
+ content_type 'application/json', :charset => 'utf-8'
29
+ MultiJson.encode({ :error => "not found" })
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,3 @@
1
+ module Soulheart
2
+ VERSION = "0.0.1"
3
+ end
data/logo.png ADDED
Binary file
data/soulheart.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/soulheart/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.version = Soulheart::VERSION
6
+ gem.authors = ["Seth Herr"]
7
+ gem.email = ["seth.william.herr@gmail.com"]
8
+ gem.description = gem.summary = "Simple, fast autocomplete server for Ruby and Rails"
9
+ gem.homepage = "https://github.com/sethherr/soulheart"
10
+ gem.license = "MIT"
11
+ gem.executables = ["soulheart", "soulheart-web"]
12
+ gem.files = `git ls-files | grep -Ev '^(test)'`.split("\n")
13
+ gem.name = "soulheart"
14
+ gem.require_paths = ["lib"]
15
+ gem.add_dependency 'hiredis', '~> 0.4.5'
16
+ gem.add_dependency 'redis', '>= 3.0.6'
17
+ gem.add_dependency 'vegas', '>= 0.1.0'
18
+ gem.add_dependency 'json'
19
+ gem.add_dependency 'sinatra'
20
+ gem.add_development_dependency 'rack-contrib'
21
+ gem.add_development_dependency 'rake'
22
+ gem.add_development_dependency 'rspec'
23
+ gem.add_development_dependency 'rubocop'
24
+ end
@@ -0,0 +1,15 @@
1
+ {"text":"Steel","category":"Frame Materials" }
2
+ {"text":"Brompton Bicycle","priority":51,"category":"Frame Manufacturer","data":{"id":8,"url":"http://www.brompton.com"}}
3
+ {"text":"Jamis","priority":75,"category":"Frame Manufacturer","data":{"id":2222,"url":"http://jamisbikes.com"}}
4
+ {"text":"Surly","priority":102,"category":"Frame Manufacturer"}
5
+ {"text":"Jagwire","priority":40,"category":"Manufacturer"}
6
+ {"text":"Jannd","priority":41,"category":"Manufacturer"}
7
+ {"text":"Sram","priority":50,"category":"Manufacturer","data":{"id":8,"url":"http://sram.com"}}
8
+ {"text":"Brooks England LTD.","priority":50,"category":"Manufacturer","data":{"id":200,"url":"http://www.brooksengland.com/"}}
9
+ {"text":"Dodger Stadium","priority":84,"data":{"id":1,"url":"\/dodger-stadium-tickets\/","subtitle":"Los Angeles, CA"},"aliases":["Chavez Ravine"]}
10
+ {"text":"Angel Stadium","priority":90,"data":{"id":28,"url":"\/angel-stadium-tickets\/","subtitle":"Anaheim, CA"},"aliases":["Edison International Field of Anaheim"]}
11
+ {"text":"Chase Field ","priority":80,"data":{"id":30,"url":"\/chase-field-tickets\/","subtitle":"Phoenix, AZ"},"aliases":["Bank One Ballpark", "Bank One Stadium"]}
12
+ {"text":"Sun Life Stadium","priority":75,"data":{"id":29,"url":"\/sun-life-stadium-tickets\/","subtitle":"Miami, FL"},"aliases":["Dolphins Stadium","Land Shark Stadium"]}
13
+ {"text":"Turner Field","priority":50,"data":{"id":2,"url":"\/turner-field-tickets\/","subtitle":"Atlanta, GA"}}
14
+ {"text":"Citi Field","priority":92,"data":{"id":3,"url":"\/citi-field-tickets\/","subtitle":"Atlanta, GA"},"aliases":["Shea Stadium"]}
15
+ {"text":"中国佛山 李小龙","priority":94,"data":{"id":8,"url":"\/Bruce Lee\/","subtitle":"Chinese Foshan"},"aliases":["Li XiaoLong"]}
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+
3
+ describe Soulheart::Loader do
4
+
5
+ describe :clean_data do
6
+ it "sets the default category, priority and normalizes term" do
7
+ item = { 'text' => ' FooBar' }
8
+ result = Soulheart::Loader.new.clean(item)
9
+ expect(result['priority']).to eq(100)
10
+ expect(result['term']).to eq('foobar')
11
+ expect(result['category']).to eq('default')
12
+ expect(result['data']['text']).to eq(' FooBar')
13
+ end
14
+
15
+ it "doesn't overwrite the submitted params" do
16
+ item = {
17
+ 'text' => 'Cool ',
18
+ 'priority' => 50,
19
+ 'category' => 'Gooble',
20
+ 'data' => {
21
+ 'id' => 199
22
+ }
23
+ }
24
+ result = Soulheart::Loader.new.clean(item)
25
+ expect(result['term']).to eq('cool')
26
+ expect(result['priority']).to eq(50)
27
+ expect(result['data']['text']).to eq('Cool ')
28
+ expect(result['data']['id']).to eq(199)
29
+ expect(result['category']).to eq('gooble')
30
+ expect(result['data']['category']).to eq('Gooble')
31
+ end
32
+
33
+ it "raises argument error if text is passed" do
34
+ expect{
35
+ Soulheart::Loader.new.clean({'name' => 'stuff'})
36
+ }.to raise_error(/must have/i)
37
+ end
38
+ end
39
+
40
+ describe :add_item do
41
+ it "adds an item, adds prefix scopes, adds category" do
42
+ item = {
43
+ 'text' => 'Brompton Bicycle',
44
+ 'priority' => 50,
45
+ 'category' => 'Gooble',
46
+ 'data' => {
47
+ 'id' => 199
48
+ }
49
+ }
50
+ loader = Soulheart::Loader.new
51
+ redis = loader.redis
52
+ redis.expire loader.results_hashes_id, 0
53
+ loader.add_item(item)
54
+ redis = loader.redis
55
+ target = "{\"text\":\"Brompton Bicycle\",\"category\":\"Gooble\",\"id\":199}"
56
+ result = redis.hget(loader.results_hashes_id, 'brompton bicycle')
57
+ expect(result).to eq(target)
58
+ prefixed = redis.zrevrange "#{loader.category_id('gooble')}:brom", 0, -1
59
+ expect(prefixed[0]).to eq('brompton bicycle')
60
+ expect(redis.smembers(loader.categories_id).include?('gooble')).to be_true
61
+ end
62
+ end
63
+
64
+ describe :store_terms do
65
+ it "stores terms by priority and adds categories for each possible category combination" do
66
+ items = []
67
+ file = File.read('spec/fixtures/multiple_categories.json')
68
+ file.each_line { |l| items << MultiJson.decode(l) }
69
+ loader = Soulheart::Loader.new
70
+ redis = loader.redis
71
+ loader.delete_categories
72
+ loader.load(items)
73
+
74
+ cat_prefixed = redis.zrevrange "#{loader.category_id('frame manufacturermanufacturer')}:brom", 0, -1
75
+ expect(cat_prefixed.count).to eq(1)
76
+ expect(redis.smembers(loader.categories_id).count).to be > 3
77
+
78
+ prefixed = redis.zrevrange "#{loader.category_id('all')}:bro", 0, -1
79
+ expect(prefixed.count).to eq(2)
80
+ expect(prefixed[0]).to eq('brompton bicycle')
81
+ end
82
+
83
+ it "stores terms by priority and doesn't add run categories if none are present" do
84
+ items = [
85
+ {'text' => 'cool thing', 'category' => 'AWESOME'},
86
+ {'text' => 'Sweet', 'category' => ' awesome'}
87
+ ]
88
+ loader = Soulheart::Loader.new
89
+ redis = loader.redis
90
+ loader.delete_categories
91
+ loader.load(items)
92
+ expect(redis.smembers(loader.category_combos_id).count).to eq(1)
93
+ end
94
+ end
95
+
96
+ end