soulheart 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/soulheart-web ADDED
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ begin
5
+ require 'vegas'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'vegas'
9
+ end
10
+ require 'soulheart/server'
11
+
12
+
13
+ Vegas::Runner.new(Soulheart::Server, 'soulheart-web', {
14
+ :before_run => lambda {|v|
15
+ # path = (ENV['RESQUECONFIG'] || v.args.first)
16
+ # load path.to_s.strip if path
17
+ }
18
+ }) do |runner, opts, app|
19
+ opts.on("-r", "--redis [HOST:PORT]", "Redis connection string") do |host|
20
+ runner.logger.info "Using Redis connection string '#{host}'"
21
+ Soulheart.redis = host
22
+ end
23
+ opts.on("-s", "--stop-words [FILE]", "Path to file containing a list of stop words") do |fn|
24
+ File.open(fn) do |file|
25
+ Soulheart.stop_words = file.readlines.map{ |l| l.strip }.reject{ |w| w.empty? }
26
+ end
27
+ end
28
+ end
data/lib/soulheart.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'multi_json'
2
+
3
+ require 'soulheart/version'
4
+ require 'soulheart/helpers'
5
+ require 'soulheart/base'
6
+ require 'soulheart/matcher'
7
+ require 'soulheart/loader'
8
+ require 'soulheart/config'
9
+
10
+ module Soulheart
11
+ extend Config
12
+ end
@@ -0,0 +1,54 @@
1
+ module Soulheart
2
+
3
+ class Base
4
+
5
+ include Helpers
6
+
7
+ attr_accessor :type
8
+
9
+ def redis
10
+ Soulheart.redis
11
+ end
12
+
13
+ def cache_length
14
+ 10 * 60 # Setting to 10 minutes, but making it possible to edit down the line
15
+ end
16
+
17
+ def base_id
18
+ ENV['RACK_ENV'] != 'test' ? "soulheart:" : "soulheart_test:"
19
+ end
20
+
21
+ def set_category_combos_array
22
+ redis.expire category_combos_id, 0
23
+ ar = redis.smembers(categories_id).map{ |c| normalize(c) }.uniq.sort
24
+ ar = 1.upto(ar.size).flat_map {|n| ar.combination(n).map{|el| el.join('')}}
25
+ ar.last.replace('all')
26
+ redis.sadd category_combos_id, ar
27
+ ar
28
+ end
29
+
30
+ def category_combos_id
31
+ "#{base_id}category_combos:"
32
+ end
33
+
34
+ def categories_id
35
+ "#{base_id}categories:"
36
+ end
37
+
38
+ def category_id(name='all')
39
+ "#{categories_id}#{name}:"
40
+ end
41
+
42
+ def no_query_id(category=category_id)
43
+ "all:#{category}"
44
+ end
45
+
46
+ def results_hashes_id
47
+ "#{base_id}database:"
48
+ end
49
+
50
+ def cache_id(type='all')
51
+ "#{base_id}:cache:#{type}:"
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,45 @@
1
+ require 'uri'
2
+ require 'redis'
3
+
4
+ module Soulheart
5
+ module Config
6
+ DEFAULT_STOP_WORDS = ["vs", "at", "the"]
7
+
8
+ # Accepts:
9
+ # 1. A Redis URL String 'redis://host:port/db'
10
+ # 2. An existing instance of Redis, Redis::Namespace, etc.
11
+ def redis=(server)
12
+ if server.is_a?(String)
13
+ @redis = nil
14
+ @redis_url = server
15
+ else
16
+ @redis = server
17
+ end
18
+
19
+ redis
20
+ end
21
+
22
+ # Returns the current Redis connection. If none has been created, will
23
+ # create a new one.
24
+ def redis
25
+ @redis ||= (
26
+ url = URI(@redis_url || ENV["REDIS_URL"] || "redis://127.0.0.1:6379/0")
27
+ ::Redis.new({
28
+ # driver: :hiredis,
29
+ host: url.host,
30
+ port: url.port,
31
+ db: url.path[1..-1],
32
+ password: url.password
33
+ })
34
+ )
35
+ end
36
+
37
+ def stop_words
38
+ @stop_words ||= DEFAULT_STOP_WORDS
39
+ end
40
+
41
+ def stop_words=(arr)
42
+ @stop_words = Array(arr).flatten
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+
3
+ module Soulheart
4
+ module Helpers
5
+ def blank?
6
+ respond_to?(:empty?) ? !!empty? : !self
7
+ end
8
+
9
+ def normalize(str)
10
+ # Letter, Mark, Number, Connector_Punctuation (Chinese, Japanese, etc.)
11
+ str.downcase.gsub(/[^\p{Word}\ ]/i, '').strip
12
+ end
13
+
14
+ def prefixes_for_phrase(phrase)
15
+ words = normalize(phrase).split(' ').reject do |w|
16
+ Soulheart.stop_words.include?(w)
17
+ end
18
+ words.map do |w|
19
+ (0..(w.length-1)).map{ |l| w[0..l] }
20
+ end.flatten.uniq
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,106 @@
1
+ module Soulheart
2
+
3
+ class Loader < Base
4
+
5
+ def default_items_hash(text, category)
6
+ category ||= 'default'
7
+ {
8
+ 'category' => normalize(category),
9
+ 'priority' => 100,
10
+ 'term' => normalize(text),
11
+ 'aliases' => [],
12
+ 'data' => {
13
+ 'text' => text,
14
+ 'category' => category
15
+ },
16
+ }
17
+ end
18
+
19
+ def delete_categories
20
+ redis.expire categories_id, 0
21
+ end
22
+
23
+ def reset_categories(categories)
24
+ delete_categories
25
+ redis.sadd categories_id, categories
26
+ end
27
+
28
+ def delete_data
29
+ # delete the sorted sets for this type
30
+ phrases = redis.smembers(base_id)
31
+ redis.pipelined do
32
+ phrases.each do |p|
33
+ redis.del("#{base_id}:#{p}")
34
+ end
35
+ redis.del(base_id)
36
+ end
37
+
38
+ # Redis can continue serving cached requests for this type while the reload is
39
+ # occuring. Some requests may be cached incorrectly as empty set (for requests
40
+ # which come in after the above delete, but before the loading completes). But
41
+ # everything will work itself out as soon as the cache expires again.
42
+
43
+ # delete the data stored for this type
44
+ redis.del(results_hashes_id)
45
+ end
46
+
47
+ def load(items)
48
+ delete_data
49
+ # Replace with item return so we know we have category_id
50
+ items.each { |item| item.replace(add_item(item)) }
51
+ set_category_combos_array.each do |category_combo|
52
+ items.each do |item|
53
+ next unless category_combo.match(item['category']) || category_combo == 'all'
54
+ add_item(item, category_id(category_combo), cleaned: true) # send it base
55
+ end
56
+ end
57
+ end
58
+
59
+ def clean(item)
60
+ raise ArgumentError, "Items must have text" unless item["text"]
61
+ default_items_hash(item.delete('text'), item.delete('category')).
62
+ tap{ |i| i['data'].merge!(item.delete('data')) if item['data'] }.
63
+ merge item
64
+ end
65
+
66
+ def add_item(item, category_base_id=nil, cleaned: false)
67
+ unless cleaned
68
+ item = clean(item)
69
+ category_base_id ||= category_id(item['category'])
70
+ unless redis.smembers(categories_id).include?(item['category'])
71
+ redis.sadd categories_id, item['category']
72
+ end
73
+ end
74
+ redis.pipelined do
75
+ redis.zadd(no_query_id(category_base_id), item["priority"], item["term"]) # Add to master set for queryless searches
76
+ # store the raw data in a separate key to reduce memory usage, if it's cleaned it's done
77
+ redis.hset(results_hashes_id, item['term'], MultiJson.encode(item['data'])) unless cleaned
78
+ phrase = ([item["term"]] + (item["aliases"] || [])).join(' ')
79
+ # Store all the prefixes
80
+ prefixes_for_phrase(phrase).each do |p|
81
+ redis.sadd(base_id, p) unless cleaned # remember prefix in a master set
82
+ # store the normalized term in the index for each of the categories
83
+ redis.zadd("#{category_base_id}#{p}", item["priority"], item["term"])
84
+ end
85
+ end
86
+ item
87
+ end
88
+
89
+ # remove only cares about an item's id, but for consistency takes an object
90
+ def remove(item)
91
+ prev_item = Soulheart.redis.hget(base_id, item["term"])
92
+ if prev_item
93
+ prev_item = MultiJson.decode(prev_item)
94
+ # undo the operations done in add
95
+ Soulheart.redis.pipelined do
96
+ Soulheart.redis.hdel(base_id, prev_item["term"])
97
+ phrase = ([prev_item["term"]] + (prev_item["aliases"] || [])).join(' ')
98
+ prefixes_for_phrase(phrase).each do |p|
99
+ Soulheart.redis.srem(base_id, p)
100
+ Soulheart.redis.zrem("#{base_id}:#{p}", prev_item["term"])
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,69 @@
1
+ module Soulheart
2
+
3
+ class Matcher < Base
4
+ def initialize(params={})
5
+ @opts = self.class.default_params_hash.merge params
6
+ clean_opts
7
+ end
8
+
9
+ def self.default_params_hash
10
+ {
11
+ 'page' => 0,
12
+ 'per_page' => 5,
13
+ 'categories' => [],
14
+ 'query' => '',
15
+ 'cache' => true
16
+ }
17
+ end
18
+
19
+ def clean_opts
20
+ unless @opts['categories'] == '' || @opts['categories'] == []
21
+ @opts['categories'] = @opts['categories'].split(/,|\+/) unless @opts['categories'].kind_of?(Array)
22
+ @opts['categories'] = @opts['categories'].map{ |s| normalize(s) }.uniq.sort
23
+ @opts['categories'] = [] if @opts['categories'].length == redis.scard(categories_id)
24
+ end
25
+ @opts['query'] = normalize(@opts['query']).split(' ')
26
+ # .reject{ |i| i && i.length > 0 } .split(' ').reject{ Soulmate.stop_words.include?(w) }
27
+ @opts
28
+ end
29
+
30
+ def categories_string
31
+ @opts['categories'].empty? ? 'all' : @opts['categories'].join('')
32
+ end
33
+
34
+ def category_id_from_opts
35
+ category_id(categories_string)
36
+ end
37
+
38
+ def cache_id_from_opts
39
+ "#{cache_id(categories_string)}#{@opts['query'].join(':')}:"
40
+ end
41
+
42
+ def interkeys_from_opts(cid)
43
+ # If there isn't a query, we use a special key in redis
44
+ @opts['query'].empty? ? [no_query_id(cid)] : @opts['query'].map { |w| "#{cid}#{w}" }
45
+ end
46
+
47
+ def matches
48
+ cachekey = cache_id_from_opts
49
+ cid = category_id_from_opts
50
+
51
+ if !@opts['cache'] || !redis.exists(cachekey) || redis.exists(cachekey) == 0
52
+ interkeys = interkeys_from_opts(cid)
53
+ redis.zinterstore(cachekey, interkeys)
54
+ redis.expire(cachekey, cache_length) # cache_length is set in base.rb
55
+ end
56
+ page = @opts['page'].to_i
57
+ per_page = @opts['per_page'].to_i
58
+ ids = redis.zrevrange(cachekey, page*per_page, per_page-1) # Using 'ids', even though keys are now terms - because clarity?
59
+ if ids.size > 0
60
+ results = redis.hmget(results_hashes_id, *ids)
61
+ results = results.reject{ |r| r.nil? } # handle cached results for ids which have since been deleted
62
+ results.map { |r| MultiJson.decode(r) }
63
+ else
64
+ []
65
+ end
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,33 @@
1
+ require 'sinatra/base'
2
+ require 'soulheart'
3
+ require 'rack/contrib'
4
+
5
+ module Soulheart
6
+
7
+ class Server < Sinatra::Base
8
+ include Helpers
9
+
10
+ before do
11
+ content_type 'application/json', :charset => 'utf-8'
12
+ headers['Access-Control-Allow-Origin'] = '*'
13
+ headers['Access-Control-Allow-Methods'] = 'POST, PUT, GET, OPTIONS'
14
+ headers['Access-Control-Request-Method'] = '*'
15
+ headers['Access-Control-Allow-Headers'] = 'Origin, X-Requested-With, Content-Type, Accept, Authorization'
16
+ end
17
+
18
+ get '/' do
19
+ matches = Matcher.new(params).matches
20
+ MultiJson.encode({ results: matches })
21
+ end
22
+
23
+ get '/status' do
24
+ MultiJson.encode({ soulheart: Soulheart::VERSION, :status => "ok" })
25
+ end
26
+
27
+ not_found do
28
+ content_type 'application/json', :charset => 'utf-8'
29
+ MultiJson.encode({ :error => "not found" })
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,3 @@
1
+ module Soulheart
2
+ VERSION = "0.0.1"
3
+ end
data/logo.png ADDED
Binary file
data/soulheart.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/soulheart/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.version = Soulheart::VERSION
6
+ gem.authors = ["Seth Herr"]
7
+ gem.email = ["seth.william.herr@gmail.com"]
8
+ gem.description = gem.summary = "Simple, fast autocomplete server for Ruby and Rails"
9
+ gem.homepage = "https://github.com/sethherr/soulheart"
10
+ gem.license = "MIT"
11
+ gem.executables = ["soulheart", "soulheart-web"]
12
+ gem.files = `git ls-files | grep -Ev '^(test)'`.split("\n")
13
+ gem.name = "soulheart"
14
+ gem.require_paths = ["lib"]
15
+ gem.add_dependency 'hiredis', '~> 0.4.5'
16
+ gem.add_dependency 'redis', '>= 3.0.6'
17
+ gem.add_dependency 'vegas', '>= 0.1.0'
18
+ gem.add_dependency 'json'
19
+ gem.add_dependency 'sinatra'
20
+ gem.add_development_dependency 'rack-contrib'
21
+ gem.add_development_dependency 'rake'
22
+ gem.add_development_dependency 'rspec'
23
+ gem.add_development_dependency 'rubocop'
24
+ end
@@ -0,0 +1,15 @@
1
+ {"text":"Steel","category":"Frame Materials" }
2
+ {"text":"Brompton Bicycle","priority":51,"category":"Frame Manufacturer","data":{"id":8,"url":"http://www.brompton.com"}}
3
+ {"text":"Jamis","priority":75,"category":"Frame Manufacturer","data":{"id":2222,"url":"http://jamisbikes.com"}}
4
+ {"text":"Surly","priority":102,"category":"Frame Manufacturer"}
5
+ {"text":"Jagwire","priority":40,"category":"Manufacturer"}
6
+ {"text":"Jannd","priority":41,"category":"Manufacturer"}
7
+ {"text":"Sram","priority":50,"category":"Manufacturer","data":{"id":8,"url":"http://sram.com"}}
8
+ {"text":"Brooks England LTD.","priority":50,"category":"Manufacturer","data":{"id":200,"url":"http://www.brooksengland.com/"}}
9
+ {"text":"Dodger Stadium","priority":84,"data":{"id":1,"url":"\/dodger-stadium-tickets\/","subtitle":"Los Angeles, CA"},"aliases":["Chavez Ravine"]}
10
+ {"text":"Angel Stadium","priority":90,"data":{"id":28,"url":"\/angel-stadium-tickets\/","subtitle":"Anaheim, CA"},"aliases":["Edison International Field of Anaheim"]}
11
+ {"text":"Chase Field ","priority":80,"data":{"id":30,"url":"\/chase-field-tickets\/","subtitle":"Phoenix, AZ"},"aliases":["Bank One Ballpark", "Bank One Stadium"]}
12
+ {"text":"Sun Life Stadium","priority":75,"data":{"id":29,"url":"\/sun-life-stadium-tickets\/","subtitle":"Miami, FL"},"aliases":["Dolphins Stadium","Land Shark Stadium"]}
13
+ {"text":"Turner Field","priority":50,"data":{"id":2,"url":"\/turner-field-tickets\/","subtitle":"Atlanta, GA"}}
14
+ {"text":"Citi Field","priority":92,"data":{"id":3,"url":"\/citi-field-tickets\/","subtitle":"Atlanta, GA"},"aliases":["Shea Stadium"]}
15
+ {"text":"中国佛山 李小龙","priority":94,"data":{"id":8,"url":"\/Bruce Lee\/","subtitle":"Chinese Foshan"},"aliases":["Li XiaoLong"]}
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+
3
+ describe Soulheart::Loader do
4
+
5
+ describe :clean_data do
6
+ it "sets the default category, priority and normalizes term" do
7
+ item = { 'text' => ' FooBar' }
8
+ result = Soulheart::Loader.new.clean(item)
9
+ expect(result['priority']).to eq(100)
10
+ expect(result['term']).to eq('foobar')
11
+ expect(result['category']).to eq('default')
12
+ expect(result['data']['text']).to eq(' FooBar')
13
+ end
14
+
15
+ it "doesn't overwrite the submitted params" do
16
+ item = {
17
+ 'text' => 'Cool ',
18
+ 'priority' => 50,
19
+ 'category' => 'Gooble',
20
+ 'data' => {
21
+ 'id' => 199
22
+ }
23
+ }
24
+ result = Soulheart::Loader.new.clean(item)
25
+ expect(result['term']).to eq('cool')
26
+ expect(result['priority']).to eq(50)
27
+ expect(result['data']['text']).to eq('Cool ')
28
+ expect(result['data']['id']).to eq(199)
29
+ expect(result['category']).to eq('gooble')
30
+ expect(result['data']['category']).to eq('Gooble')
31
+ end
32
+
33
+ it "raises argument error if text is passed" do
34
+ expect{
35
+ Soulheart::Loader.new.clean({'name' => 'stuff'})
36
+ }.to raise_error(/must have/i)
37
+ end
38
+ end
39
+
40
+ describe :add_item do
41
+ it "adds an item, adds prefix scopes, adds category" do
42
+ item = {
43
+ 'text' => 'Brompton Bicycle',
44
+ 'priority' => 50,
45
+ 'category' => 'Gooble',
46
+ 'data' => {
47
+ 'id' => 199
48
+ }
49
+ }
50
+ loader = Soulheart::Loader.new
51
+ redis = loader.redis
52
+ redis.expire loader.results_hashes_id, 0
53
+ loader.add_item(item)
54
+ redis = loader.redis
55
+ target = "{\"text\":\"Brompton Bicycle\",\"category\":\"Gooble\",\"id\":199}"
56
+ result = redis.hget(loader.results_hashes_id, 'brompton bicycle')
57
+ expect(result).to eq(target)
58
+ prefixed = redis.zrevrange "#{loader.category_id('gooble')}:brom", 0, -1
59
+ expect(prefixed[0]).to eq('brompton bicycle')
60
+ expect(redis.smembers(loader.categories_id).include?('gooble')).to be_true
61
+ end
62
+ end
63
+
64
+ describe :store_terms do
65
+ it "stores terms by priority and adds categories for each possible category combination" do
66
+ items = []
67
+ file = File.read('spec/fixtures/multiple_categories.json')
68
+ file.each_line { |l| items << MultiJson.decode(l) }
69
+ loader = Soulheart::Loader.new
70
+ redis = loader.redis
71
+ loader.delete_categories
72
+ loader.load(items)
73
+
74
+ cat_prefixed = redis.zrevrange "#{loader.category_id('frame manufacturermanufacturer')}:brom", 0, -1
75
+ expect(cat_prefixed.count).to eq(1)
76
+ expect(redis.smembers(loader.categories_id).count).to be > 3
77
+
78
+ prefixed = redis.zrevrange "#{loader.category_id('all')}:bro", 0, -1
79
+ expect(prefixed.count).to eq(2)
80
+ expect(prefixed[0]).to eq('brompton bicycle')
81
+ end
82
+
83
+ it "stores terms by priority and doesn't add run categories if none are present" do
84
+ items = [
85
+ {'text' => 'cool thing', 'category' => 'AWESOME'},
86
+ {'text' => 'Sweet', 'category' => ' awesome'}
87
+ ]
88
+ loader = Soulheart::Loader.new
89
+ redis = loader.redis
90
+ loader.delete_categories
91
+ loader.load(items)
92
+ expect(redis.smembers(loader.category_combos_id).count).to eq(1)
93
+ end
94
+ end
95
+
96
+ end