suggester 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +18 -0
- data/Gemfile.lock +43 -0
- data/README.md +127 -0
- data/Rakefile +70 -0
- data/VERSION +1 -0
- data/bin/suggester_server +18 -0
- data/config/database.yml.example +6 -0
- data/lib/array_bsearch.rb +82 -0
- data/lib/suggester.rb +3 -0
- data/lib/suggester/client.rb +109 -0
- data/lib/suggester/handlers.rb +15 -0
- data/lib/suggester/handlers/active_record.rb +51 -0
- data/lib/suggester/handlers/base.rb +107 -0
- data/lib/suggester/handlers/helpers/normalization.rb +3 -0
- data/lib/suggester/handlers/helpers/refresh.rb +37 -0
- data/lib/suggester/handlers/marshal.rb +22 -0
- data/lib/suggester/handlers/yaml.rb +22 -0
- data/lib/suggester/server.rb +130 -0
- data/pkg/suggester-0.0.1.gem +0 -0
- data/pkg/suggester-0.0.2.gem +0 -0
- data/suggester.gemspec +92 -0
- data/test/fixtures/books.sql +15 -0
- data/test/fixtures/marshal.marshal +0 -0
- data/test/fixtures/yaml.yml +25 -0
- data/test/functional/basic_test.rb +98 -0
- data/test/test_helper.rb +26 -0
- data/test/unit/active_record_handler_test.rb +74 -0
- data/test/unit/marshal_handler_test.rb +65 -0
- data/test/unit/yaml_handler_test.rb +66 -0
- metadata +178 -0
data/Gemfile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
source :rubygems
|
2
|
+
gem "sinatra"
|
3
|
+
gem "json"
|
4
|
+
gem "vegas"
|
5
|
+
|
6
|
+
group :development do
|
7
|
+
gem "bundler", "~> 1.0.0"
|
8
|
+
gem "jeweler", "~> 1.5.1"
|
9
|
+
gem "rcov", ">= 0"
|
10
|
+
end
|
11
|
+
|
12
|
+
group :test do
|
13
|
+
gem "shoulda", ">= 0"
|
14
|
+
gem "mocha", ">= 0"
|
15
|
+
gem "activerecord", "=2.3.9"
|
16
|
+
gem "rack-test"
|
17
|
+
gem "mysql"
|
18
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
activerecord (2.3.9)
|
5
|
+
activesupport (= 2.3.9)
|
6
|
+
activesupport (2.3.9)
|
7
|
+
git (1.2.5)
|
8
|
+
jeweler (1.5.1)
|
9
|
+
bundler (~> 1.0.0)
|
10
|
+
git (>= 1.2.5)
|
11
|
+
rake
|
12
|
+
json (1.4.6)
|
13
|
+
mocha (0.9.8)
|
14
|
+
rake
|
15
|
+
mysql (2.8.1)
|
16
|
+
rack (1.2.1)
|
17
|
+
rack-test (0.5.6)
|
18
|
+
rack (>= 1.0)
|
19
|
+
rake (0.8.7)
|
20
|
+
rcov (0.9.9)
|
21
|
+
shoulda (2.11.3)
|
22
|
+
sinatra (1.1.0)
|
23
|
+
rack (~> 1.1)
|
24
|
+
tilt (~> 1.1)
|
25
|
+
tilt (1.1)
|
26
|
+
vegas (0.1.8)
|
27
|
+
rack (>= 1.0.0)
|
28
|
+
|
29
|
+
PLATFORMS
|
30
|
+
ruby
|
31
|
+
|
32
|
+
DEPENDENCIES
|
33
|
+
activerecord (= 2.3.9)
|
34
|
+
bundler (~> 1.0.0)
|
35
|
+
jeweler (~> 1.5.1)
|
36
|
+
json
|
37
|
+
mocha
|
38
|
+
mysql
|
39
|
+
rack-test
|
40
|
+
rcov
|
41
|
+
shoulda
|
42
|
+
sinatra
|
43
|
+
vegas
|
data/README.md
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
Suggester
|
2
|
+
=========
|
3
|
+
|
4
|
+
Suggester is an extensible, cache-based auto-suggest server and client for ruby.
|
5
|
+
Suggester also supports replication and scheduled refreshing.
|
6
|
+
|
7
|
+
To use, you provide a configuration ruby file which registers "handlers". Handlers
|
8
|
+
can be any Ruby object that responds to `find`, `match`, and `refresh`. We've included
|
9
|
+
3 basic handlers which are easily extensible:
|
10
|
+
|
11
|
+
1. Suggester::Handlers::ActiveRecord - pulls all records using an ActiveRecord class
|
12
|
+
2. Suggester::Handlers::Marshal - deserializes a Marshaled blob from a file or url
|
13
|
+
3. Suggester::Handlers::Yaml - deserializes YAML from a file or url
|
14
|
+
|
15
|
+
All 3 of these supplied handlers stores in a sorted array of data. We do a binary
|
16
|
+
search to find results.
|
17
|
+
|
18
|
+
Documentation & Requirements
|
19
|
+
----------------------------
|
20
|
+
|
21
|
+
Suggester requires the following gems:
|
22
|
+
|
23
|
+
* Sinatra
|
24
|
+
* Vegas
|
25
|
+
* JSON
|
26
|
+
|
27
|
+
Server
|
28
|
+
------
|
29
|
+
|
30
|
+
### Examples
|
31
|
+
|
32
|
+
Basic usage:
|
33
|
+
|
34
|
+
suggest_server <config_file.rb>
|
35
|
+
|
36
|
+
Config file example (config_file.rb):
|
37
|
+
|
38
|
+
# assuming that Book is an ActiveRecord class
|
39
|
+
Suggester::Server.add_handler("books", Suggester::Handlers::ActiveRecord.new(:class => Book))
|
40
|
+
|
41
|
+
# using the Marshal handler
|
42
|
+
Suggester::Server.add_handler("cars", Suggester::Handlers::Marshal(:file => "/path/to/file.marshal")
|
43
|
+
|
44
|
+
# using the Yaml handler
|
45
|
+
Suggester::Server.add_handler("dogs", Suggester::Handlers::Yaml(:file => "/path/to/file.yml")
|
46
|
+
|
47
|
+
### Web Interface
|
48
|
+
|
49
|
+
### Creating Custom Handlers
|
50
|
+
|
51
|
+
All Handler classes must respond to 2 public instance methods:
|
52
|
+
|
53
|
+
1. find(name, options = {})
|
54
|
+
2. match(name, options = {})
|
55
|
+
|
56
|
+
These methods should return an array of data that matches the supplied name (search term).
|
57
|
+
|
58
|
+
### Refreshing Handlers
|
59
|
+
|
60
|
+
Upon server start, we spawn a thread that refreshes handler caches. We do this to prevent blocking
|
61
|
+
the webserver which a cache reloads. Since suggester server should never modify its data source,
|
62
|
+
we shouldn't have to worry much about concurrency. Note that results returned may represent an older
|
63
|
+
state of the cache, but never an in-between cache state.
|
64
|
+
|
65
|
+
There are 2 ways to refresh a handler.
|
66
|
+
|
67
|
+
The first way is to make a GET request to `/<handler_name>/refresh`. This call forces the server
|
68
|
+
to reload the specified cache as soon as possible.
|
69
|
+
|
70
|
+
The second way is to schedule recurring refreshes. When using any handler that inherits from
|
71
|
+
Suggester::Handlers::Base, you may supply a `:refresh_interval` option (in minutes). The
|
72
|
+
server will automatically refresh that cache every `:refresh_interval` minutes.
|
73
|
+
|
74
|
+
### Replication
|
75
|
+
|
76
|
+
An easy way to set up replication is through configuration and the Marshal or Yaml handlers are
|
77
|
+
best for the job. Because the Marshal and Yaml handlers can load from a local file or a url,
|
78
|
+
you can point the replicant handler to a master server's dump output.
|
79
|
+
|
80
|
+
Example Master Configuration:
|
81
|
+
|
82
|
+
Suggester::Server.add_handler("books", Suggester::Handler::ActiveRecord.new(:class => Book))
|
83
|
+
Suggester::Server.add_handler("authors", Suggester::Handler::ActiveRecord.new(:class => Book, :name_field => :author))
|
84
|
+
|
85
|
+
Replicant Configuration:
|
86
|
+
|
87
|
+
Suggester::Server.add_handler("books", Suggester::Handler::Marshal(:file => "http://<master:port>/books/dump.marshal"))
|
88
|
+
Suggester::Server.add_handler("authors", Suggester::Handler::Yaml(:file => "http://<master:port>/books/dump.yml"))
|
89
|
+
|
90
|
+
Note that this method of replication uses the pull model -- the replicant must determine when to
|
91
|
+
pull from the master.
|
92
|
+
|
93
|
+
Client
|
94
|
+
------
|
95
|
+
|
96
|
+
### Examples
|
97
|
+
|
98
|
+
Basic usage:
|
99
|
+
|
100
|
+
require 'rubygems'
|
101
|
+
require 'suggester'
|
102
|
+
require 'suggester/client'
|
103
|
+
|
104
|
+
# create an instance of the client pointing to a server at localhost:5050
|
105
|
+
client = Suggester::Client.new(:host => 'localhost', :port => 5050)
|
106
|
+
|
107
|
+
# find begins-with matches
|
108
|
+
client.find("books", "A Tale of Two")
|
109
|
+
=> [{:id => 1, :display_name => "A Tale of Two Cities"}]
|
110
|
+
client.find("books", "The ")
|
111
|
+
=> [{:id => 4, :display_name => "The Origin of Species"},
|
112
|
+
{:id => 5, :display_name => "The Catcher in the Rye"},
|
113
|
+
{:id => 6, :display_name => "The Lord of the Rings"}]
|
114
|
+
client.find("books", "Non-existent")
|
115
|
+
=> []
|
116
|
+
|
117
|
+
# find exact matches
|
118
|
+
client.match("books", "a tale of two cities")
|
119
|
+
=> [{:id => 1, :display_name => "A Tale of Two Cities"}]
|
120
|
+
client.match("books", "A Tale of Two")
|
121
|
+
=> []
|
122
|
+
client.match("books", "The")
|
123
|
+
=> []
|
124
|
+
|
125
|
+
# tell the server to refresh its handler
|
126
|
+
client.refresh("books")
|
127
|
+
=> true
|
data/Rakefile
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'rake'
|
11
|
+
|
12
|
+
require 'jeweler'
|
13
|
+
Jeweler::Tasks.new do |gem|
|
14
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
15
|
+
gem.name = "suggester"
|
16
|
+
gem.homepage = "http://github.com/chingor13/suggester"
|
17
|
+
gem.license = "MIT"
|
18
|
+
gem.summary = %Q{Extensible, cache-based auto-suggest server for ruby.}
|
19
|
+
gem.description = %Q{Extensible, cache-based auto-suggest server for ruby. Includes refresh and replication support out of the box.}
|
20
|
+
gem.email = "ching.jeff@gmail.com"
|
21
|
+
gem.authors = ["Jeff Ching"]
|
22
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
23
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
24
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
25
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
26
|
+
end
|
27
|
+
Jeweler::RubygemsDotOrgTasks.new
|
28
|
+
|
29
|
+
require 'rake/testtask'
|
30
|
+
Rake::TestTask.new(:test) do |test|
|
31
|
+
test.libs << 'lib' << 'test'
|
32
|
+
test.pattern = 'test/**/*_test.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
|
36
|
+
require 'rcov/rcovtask'
|
37
|
+
Rcov::RcovTask.new do |test|
|
38
|
+
test.libs << 'test'
|
39
|
+
test.pattern = 'test/**/*_test.rb'
|
40
|
+
test.verbose = true
|
41
|
+
end
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "suggester #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
54
|
+
|
55
|
+
namespace :db do
|
56
|
+
desc 'setup test activerecord databases'
|
57
|
+
task :prepare do
|
58
|
+
load File.join(File.dirname(__FILE__), 'test', 'test_helper.rb')
|
59
|
+
if RUN_AR_TESTS
|
60
|
+
fixture_sql = File.join(File.dirname(__FILE__), 'test', 'fixtures', 'books.sql')
|
61
|
+
statements = open(fixture_sql).read.split(";")
|
62
|
+
statements.compact!
|
63
|
+
statements.each do |sql|
|
64
|
+
sql.strip!
|
65
|
+
ActiveRecord::Base.connection.execute(sql) unless sql.nil? || sql.empty?
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
begin
|
5
|
+
require 'vegas'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'vegas'
|
9
|
+
end
|
10
|
+
require 'suggester'
|
11
|
+
require 'suggester/server'
|
12
|
+
|
13
|
+
Vegas::Runner.new(Suggester::Server, 'suggester_server', {
|
14
|
+
:before_run => lambda {|v|
|
15
|
+
path = (ENV['Suggester_CONFIG'] || v.args.first)
|
16
|
+
load path.to_s.strip if path
|
17
|
+
}
|
18
|
+
})
|
@@ -0,0 +1,82 @@
|
|
1
|
+
class Array
|
2
|
+
#
|
3
|
+
# The binary search algorithm is extracted from Jon Bentley's
|
4
|
+
# Programming Pearls 2nd ed. p.93
|
5
|
+
#
|
6
|
+
|
7
|
+
#
|
8
|
+
# Return the lower boundary. (inside)
|
9
|
+
#
|
10
|
+
def bsearch_lower_boundary (range = 0 ... self.length, &block)
|
11
|
+
lower = range.first() -1
|
12
|
+
upper = if range.exclude_end? then range.last else range.last + 1 end
|
13
|
+
while lower + 1 != upper
|
14
|
+
mid = ((lower + upper) / 2).to_i # for working with mathn.rb (Rational)
|
15
|
+
if yield(self[mid]) < 0
|
16
|
+
lower = mid
|
17
|
+
else
|
18
|
+
upper = mid
|
19
|
+
end
|
20
|
+
end
|
21
|
+
return upper
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
# This method searches the FIRST occurrence which satisfies a
|
26
|
+
# condition given by a block in binary fashion and return the
|
27
|
+
# index of the first occurrence. Return nil if not found.
|
28
|
+
#
|
29
|
+
def bsearch_first (range = 0 ... self.length, &block)
|
30
|
+
boundary = bsearch_lower_boundary(range, &block)
|
31
|
+
if boundary >= self.length || yield(self[boundary]) != 0
|
32
|
+
return nil
|
33
|
+
else
|
34
|
+
return boundary
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
alias bsearch bsearch_first
|
39
|
+
|
40
|
+
#
|
41
|
+
# Return the upper boundary. (outside)
|
42
|
+
#
|
43
|
+
def bsearch_upper_boundary (range = 0 ... self.length, &block)
|
44
|
+
lower = range.first() -1
|
45
|
+
upper = if range.exclude_end? then range.last else range.last + 1 end
|
46
|
+
while lower + 1 != upper
|
47
|
+
mid = ((lower + upper) / 2).to_i # for working with mathn.rb (Rational)
|
48
|
+
if yield(self[mid]) <= 0
|
49
|
+
lower = mid
|
50
|
+
else
|
51
|
+
upper = mid
|
52
|
+
end
|
53
|
+
end
|
54
|
+
return lower + 1 # outside of the matching range.
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
# This method searches the LAST occurrence which satisfies a
|
59
|
+
# condition given by a block in binary fashion and return the
|
60
|
+
# index of the last occurrence. Return nil if not found.
|
61
|
+
#
|
62
|
+
def bsearch_last (range = 0 ... self.length, &block)
|
63
|
+
# `- 1' for canceling `lower + 1' in bsearch_upper_boundary.
|
64
|
+
boundary = bsearch_upper_boundary(range, &block) - 1
|
65
|
+
|
66
|
+
if (boundary <= -1 || yield(self[boundary]) != 0)
|
67
|
+
return nil
|
68
|
+
else
|
69
|
+
return boundary
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# Return the search result as a Range object.
|
75
|
+
#
|
76
|
+
def bsearch_range (range = 0 ... self.length, &block)
|
77
|
+
lower = bsearch_lower_boundary(range, &block)
|
78
|
+
upper = bsearch_upper_boundary(range, &block)
|
79
|
+
return lower ... upper
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
data/lib/suggester.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
# This is a simple client to access a Suggester::Server instance.
|
2
|
+
#
|
3
|
+
# Author:: Jeff Ching
|
4
|
+
# Copyright:: Copyright (c) 2010
|
5
|
+
# License:: Distributes under the same terms as Ruby
|
6
|
+
require 'open-uri'
|
7
|
+
require 'cgi'
|
8
|
+
require 'json'
|
9
|
+
require 'timeout'
|
10
|
+
|
11
|
+
module Suggester
|
12
|
+
# Simple client to access a Suggester::Server instance
|
13
|
+
class Client
|
14
|
+
attr_accessor :host, :port
|
15
|
+
attr_accessor :logger
|
16
|
+
|
17
|
+
# server uses sinatra with default port 17500
|
18
|
+
DEFAULT_HOST = "localhost"
|
19
|
+
DEFAULT_PORT = 17500
|
20
|
+
|
21
|
+
# Create an instance of the client
|
22
|
+
def initialize(options = {})
|
23
|
+
@host = options[:host] || DEFAULT_HOST
|
24
|
+
@port = options[:port] || DEFAULT_PORT
|
25
|
+
@logger = options[:logger]
|
26
|
+
end
|
27
|
+
|
28
|
+
# Match returns an array of data returned that is an exact match for the query
|
29
|
+
# string provided
|
30
|
+
#
|
31
|
+
# ==== Parameters
|
32
|
+
#
|
33
|
+
# * <tt>type</tt> - The name of the handler like "book"
|
34
|
+
# * <tt>query</tt> - The search term you are matching on
|
35
|
+
# * <tt>options</tt> - Hash of optionally parameters which are passed as query
|
36
|
+
# parameters. Includes the <tt>:limit</tt> option.
|
37
|
+
#
|
38
|
+
# ==== Examples
|
39
|
+
#
|
40
|
+
# # find exact matches
|
41
|
+
# client = Suggester::Client.new
|
42
|
+
# client.match("book", "A Tale of Two Cities") # returns all books that match "A Tale of Two Cities"
|
43
|
+
# client.match("book", "A Tale of Two Cities", :limit => 1) # return the first match for "A Tale of Two Cities"
|
44
|
+
def match(type, query, options = {})
|
45
|
+
fetch_response(build_url(type, "match", query, options))
|
46
|
+
end
|
47
|
+
|
48
|
+
# Find returns an array of data returned for records that start with query
|
49
|
+
# string provided
|
50
|
+
#
|
51
|
+
# ==== Parameters
|
52
|
+
#
|
53
|
+
# * <tt>type</tt> - The name of the handler like "book"
|
54
|
+
# * <tt>query</tt> - The search term you are searching on
|
55
|
+
# * <tt>options</tt> - Hash of optionally parameters which are passed as query
|
56
|
+
# parameters. Includes the <tt>:limit</tt> option.
|
57
|
+
#
|
58
|
+
# ==== Examples
|
59
|
+
#
|
60
|
+
# # find exact matches
|
61
|
+
# client = Suggester::Client.new
|
62
|
+
# client.match("book", "A Tale") # returns all books that match "A Tale of Two Cities"
|
63
|
+
# client.match("book", "A Tale", :limit => 1) # return the first match for "A Tale of Two Cities"
|
64
|
+
def find(type, query, options = {})
|
65
|
+
fetch_response(build_url(type, "find", query, options))
|
66
|
+
end
|
67
|
+
|
68
|
+
# Refresh tells the server to force a reload of its cached data
|
69
|
+
#
|
70
|
+
# ==== Parameters
|
71
|
+
#
|
72
|
+
# * <tt>type</tt> - The name of the handler to refresh
|
73
|
+
def refresh(type)
|
74
|
+
url = "http://#{@host}:#{@port}/#{type}/refresh"
|
75
|
+
response = fetch_response(url)
|
76
|
+
return response.is_a?(String) && response == "OK"
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def fetch_response(url) #:nodoc:
|
82
|
+
response = []
|
83
|
+
begin
|
84
|
+
Timeout::timeout(0.5) do
|
85
|
+
open(url) do |content|
|
86
|
+
response = JSON.parse(content.read)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
rescue Timeout::Error => e
|
90
|
+
@logger.error("Timeout: #{e}\n#{url}") if @logger
|
91
|
+
rescue => e
|
92
|
+
@logger.error("Error: #{e}\nURL: #{url}\nStack Trace:\n#{e.backtrace}") if @logger
|
93
|
+
end
|
94
|
+
response
|
95
|
+
end
|
96
|
+
|
97
|
+
def build_url(type, method, query, options) #:nodoc:
|
98
|
+
url = "http://#{@host}:#{@port}/#{type}/#{method}/#{CGI::escape(query)}.json"
|
99
|
+
unless options.empty?
|
100
|
+
s = []
|
101
|
+
options.each do |k,v|
|
102
|
+
s << "#{CGI::escape(k.to_s)}=#{CGI::escape(v.to_s)}"
|
103
|
+
end
|
104
|
+
url = url + "?" + s.join("&")
|
105
|
+
end
|
106
|
+
url
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|