suggester 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +18 -0
- data/Gemfile.lock +43 -0
- data/README.md +127 -0
- data/Rakefile +70 -0
- data/VERSION +1 -0
- data/bin/suggester_server +18 -0
- data/config/database.yml.example +6 -0
- data/lib/array_bsearch.rb +82 -0
- data/lib/suggester.rb +3 -0
- data/lib/suggester/client.rb +109 -0
- data/lib/suggester/handlers.rb +15 -0
- data/lib/suggester/handlers/active_record.rb +51 -0
- data/lib/suggester/handlers/base.rb +107 -0
- data/lib/suggester/handlers/helpers/normalization.rb +3 -0
- data/lib/suggester/handlers/helpers/refresh.rb +37 -0
- data/lib/suggester/handlers/marshal.rb +22 -0
- data/lib/suggester/handlers/yaml.rb +22 -0
- data/lib/suggester/server.rb +130 -0
- data/pkg/suggester-0.0.1.gem +0 -0
- data/pkg/suggester-0.0.2.gem +0 -0
- data/suggester.gemspec +92 -0
- data/test/fixtures/books.sql +15 -0
- data/test/fixtures/marshal.marshal +0 -0
- data/test/fixtures/yaml.yml +25 -0
- data/test/functional/basic_test.rb +98 -0
- data/test/test_helper.rb +26 -0
- data/test/unit/active_record_handler_test.rb +74 -0
- data/test/unit/marshal_handler_test.rb +65 -0
- data/test/unit/yaml_handler_test.rb +66 -0
- metadata +178 -0
data/Gemfile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
source :rubygems
|
2
|
+
gem "sinatra"
|
3
|
+
gem "json"
|
4
|
+
gem "vegas"
|
5
|
+
|
6
|
+
group :development do
|
7
|
+
gem "bundler", "~> 1.0.0"
|
8
|
+
gem "jeweler", "~> 1.5.1"
|
9
|
+
gem "rcov", ">= 0"
|
10
|
+
end
|
11
|
+
|
12
|
+
group :test do
|
13
|
+
gem "shoulda", ">= 0"
|
14
|
+
gem "mocha", ">= 0"
|
15
|
+
gem "activerecord", "=2.3.9"
|
16
|
+
gem "rack-test"
|
17
|
+
gem "mysql"
|
18
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
activerecord (2.3.9)
|
5
|
+
activesupport (= 2.3.9)
|
6
|
+
activesupport (2.3.9)
|
7
|
+
git (1.2.5)
|
8
|
+
jeweler (1.5.1)
|
9
|
+
bundler (~> 1.0.0)
|
10
|
+
git (>= 1.2.5)
|
11
|
+
rake
|
12
|
+
json (1.4.6)
|
13
|
+
mocha (0.9.8)
|
14
|
+
rake
|
15
|
+
mysql (2.8.1)
|
16
|
+
rack (1.2.1)
|
17
|
+
rack-test (0.5.6)
|
18
|
+
rack (>= 1.0)
|
19
|
+
rake (0.8.7)
|
20
|
+
rcov (0.9.9)
|
21
|
+
shoulda (2.11.3)
|
22
|
+
sinatra (1.1.0)
|
23
|
+
rack (~> 1.1)
|
24
|
+
tilt (~> 1.1)
|
25
|
+
tilt (1.1)
|
26
|
+
vegas (0.1.8)
|
27
|
+
rack (>= 1.0.0)
|
28
|
+
|
29
|
+
PLATFORMS
|
30
|
+
ruby
|
31
|
+
|
32
|
+
DEPENDENCIES
|
33
|
+
activerecord (= 2.3.9)
|
34
|
+
bundler (~> 1.0.0)
|
35
|
+
jeweler (~> 1.5.1)
|
36
|
+
json
|
37
|
+
mocha
|
38
|
+
mysql
|
39
|
+
rack-test
|
40
|
+
rcov
|
41
|
+
shoulda
|
42
|
+
sinatra
|
43
|
+
vegas
|
data/README.md
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
Suggester
|
2
|
+
=========
|
3
|
+
|
4
|
+
Suggester is an extensible, cache-based auto-suggest server and client for ruby.
|
5
|
+
Suggester also supports replication and scheduled refreshing.
|
6
|
+
|
7
|
+
To use, you provide a configuration ruby file which registers "handlers". Handlers
|
8
|
+
can be any Ruby object that responds to `find`, `match`, and `refresh`. We've included
|
9
|
+
3 basic handlers which are easily extensible:
|
10
|
+
|
11
|
+
1. Suggester::Handlers::ActiveRecord - pulls all records using an ActiveRecord class
|
12
|
+
2. Suggester::Handlers::Marshal - deserializes a Marshaled blob from a file or url
|
13
|
+
3. Suggester::Handlers::Yaml - deserializes YAML from a file or url
|
14
|
+
|
15
|
+
All 3 of these supplied handlers stores in a sorted array of data. We do a binary
|
16
|
+
search to find results.
|
17
|
+
|
18
|
+
Documentation & Requirements
|
19
|
+
----------------------------
|
20
|
+
|
21
|
+
Suggester requires the following gems:
|
22
|
+
|
23
|
+
* Sinatra
|
24
|
+
* Vegas
|
25
|
+
* JSON
|
26
|
+
|
27
|
+
Server
|
28
|
+
------
|
29
|
+
|
30
|
+
### Examples
|
31
|
+
|
32
|
+
Basic usage:
|
33
|
+
|
34
|
+
suggest_server <config_file.rb>
|
35
|
+
|
36
|
+
Config file example (config_file.rb):
|
37
|
+
|
38
|
+
# assuming that Book is an ActiveRecord class
|
39
|
+
Suggester::Server.add_handler("books", Suggester::Handlers::ActiveRecord.new(:class => Book))
|
40
|
+
|
41
|
+
# using the Marshal handler
|
42
|
+
Suggester::Server.add_handler("cars", Suggester::Handlers::Marshal(:file => "/path/to/file.marshal")
|
43
|
+
|
44
|
+
# using the Yaml handler
|
45
|
+
Suggester::Server.add_handler("dogs", Suggester::Handlers::Yaml(:file => "/path/to/file.yml")
|
46
|
+
|
47
|
+
### Web Interface
|
48
|
+
|
49
|
+
### Creating Custom Handlers
|
50
|
+
|
51
|
+
All Handler classes must respond to 2 public instance methods:
|
52
|
+
|
53
|
+
1. find(name, options = {})
|
54
|
+
2. match(name, options = {})
|
55
|
+
|
56
|
+
These methods should return an array of data that matches the supplied name (search term).
|
57
|
+
|
58
|
+
### Refreshing Handlers
|
59
|
+
|
60
|
+
Upon server start, we spawn a thread that refreshes handler caches. We do this to prevent blocking
|
61
|
+
the webserver which a cache reloads. Since suggester server should never modify its data source,
|
62
|
+
we shouldn't have to worry much about concurrency. Note that results returned may represent an older
|
63
|
+
state of the cache, but never an in-between cache state.
|
64
|
+
|
65
|
+
There are 2 ways to refresh a handler.
|
66
|
+
|
67
|
+
The first way is to make a GET request to `/<handler_name>/refresh`. This call forces the server
|
68
|
+
to reload the specified cache as soon as possible.
|
69
|
+
|
70
|
+
The second way is to schedule recurring refreshes. When using any handler that inherits from
|
71
|
+
Suggester::Handlers::Base, you may supply a `:refresh_interval` option (in minutes). The
|
72
|
+
server will automatically refresh that cache every `:refresh_interval` minutes.
|
73
|
+
|
74
|
+
### Replication
|
75
|
+
|
76
|
+
An easy way to set up replication is through configuration and the Marshal or Yaml handlers are
|
77
|
+
best for the job. Because the Marshal and Yaml handlers can load from a local file or a url,
|
78
|
+
you can point the replicant handler to a master server's dump output.
|
79
|
+
|
80
|
+
Example Master Configuration:
|
81
|
+
|
82
|
+
Suggester::Server.add_handler("books", Suggester::Handler::ActiveRecord.new(:class => Book))
|
83
|
+
Suggester::Server.add_handler("authors", Suggester::Handler::ActiveRecord.new(:class => Book, :name_field => :author))
|
84
|
+
|
85
|
+
Replicant Configuration:
|
86
|
+
|
87
|
+
Suggester::Server.add_handler("books", Suggester::Handler::Marshal(:file => "http://<master:port>/books/dump.marshal"))
|
88
|
+
Suggester::Server.add_handler("authors", Suggester::Handler::Yaml(:file => "http://<master:port>/books/dump.yml"))
|
89
|
+
|
90
|
+
Note that this method of replication uses the pull model -- the replicant must determine when to
|
91
|
+
pull from the master.
|
92
|
+
|
93
|
+
Client
|
94
|
+
------
|
95
|
+
|
96
|
+
### Examples
|
97
|
+
|
98
|
+
Basic usage:
|
99
|
+
|
100
|
+
require 'rubygems'
|
101
|
+
require 'suggester'
|
102
|
+
require 'suggester/client'
|
103
|
+
|
104
|
+
# create an instance of the client pointing to a server at localhost:5050
|
105
|
+
client = Suggester::Client.new(:host => 'localhost', :port => 5050)
|
106
|
+
|
107
|
+
# find begins-with matches
|
108
|
+
client.find("books", "A Tale of Two")
|
109
|
+
=> [{:id => 1, :display_name => "A Tale of Two Cities"}]
|
110
|
+
client.find("books", "The ")
|
111
|
+
=> [{:id => 4, :display_name => "The Origin of Species"},
|
112
|
+
{:id => 5, :display_name => "The Catcher in the Rye"},
|
113
|
+
{:id => 6, :display_name => "The Lord of the Rings"}]
|
114
|
+
client.find("books", "Non-existent")
|
115
|
+
=> []
|
116
|
+
|
117
|
+
# find exact matches
|
118
|
+
client.match("books", "a tale of two cities")
|
119
|
+
=> [{:id => 1, :display_name => "A Tale of Two Cities"}]
|
120
|
+
client.match("books", "A Tale of Two")
|
121
|
+
=> []
|
122
|
+
client.match("books", "The")
|
123
|
+
=> []
|
124
|
+
|
125
|
+
# tell the server to refresh its handler
|
126
|
+
client.refresh("books")
|
127
|
+
=> true
|
data/Rakefile
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'rake'
|
11
|
+
|
12
|
+
require 'jeweler'
|
13
|
+
Jeweler::Tasks.new do |gem|
|
14
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
15
|
+
gem.name = "suggester"
|
16
|
+
gem.homepage = "http://github.com/chingor13/suggester"
|
17
|
+
gem.license = "MIT"
|
18
|
+
gem.summary = %Q{Extensible, cache-based auto-suggest server for ruby.}
|
19
|
+
gem.description = %Q{Extensible, cache-based auto-suggest server for ruby. Includes refresh and replication support out of the box.}
|
20
|
+
gem.email = "ching.jeff@gmail.com"
|
21
|
+
gem.authors = ["Jeff Ching"]
|
22
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
23
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
24
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
25
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
26
|
+
end
|
27
|
+
Jeweler::RubygemsDotOrgTasks.new
|
28
|
+
|
29
|
+
require 'rake/testtask'
|
30
|
+
Rake::TestTask.new(:test) do |test|
|
31
|
+
test.libs << 'lib' << 'test'
|
32
|
+
test.pattern = 'test/**/*_test.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
|
36
|
+
require 'rcov/rcovtask'
|
37
|
+
Rcov::RcovTask.new do |test|
|
38
|
+
test.libs << 'test'
|
39
|
+
test.pattern = 'test/**/*_test.rb'
|
40
|
+
test.verbose = true
|
41
|
+
end
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "suggester #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
54
|
+
|
55
|
+
namespace :db do
|
56
|
+
desc 'setup test activerecord databases'
|
57
|
+
task :prepare do
|
58
|
+
load File.join(File.dirname(__FILE__), 'test', 'test_helper.rb')
|
59
|
+
if RUN_AR_TESTS
|
60
|
+
fixture_sql = File.join(File.dirname(__FILE__), 'test', 'fixtures', 'books.sql')
|
61
|
+
statements = open(fixture_sql).read.split(";")
|
62
|
+
statements.compact!
|
63
|
+
statements.each do |sql|
|
64
|
+
sql.strip!
|
65
|
+
ActiveRecord::Base.connection.execute(sql) unless sql.nil? || sql.empty?
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
begin
|
5
|
+
require 'vegas'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'vegas'
|
9
|
+
end
|
10
|
+
require 'suggester'
|
11
|
+
require 'suggester/server'
|
12
|
+
|
13
|
+
Vegas::Runner.new(Suggester::Server, 'suggester_server', {
|
14
|
+
:before_run => lambda {|v|
|
15
|
+
path = (ENV['Suggester_CONFIG'] || v.args.first)
|
16
|
+
load path.to_s.strip if path
|
17
|
+
}
|
18
|
+
})
|
@@ -0,0 +1,82 @@
|
|
1
|
+
class Array
|
2
|
+
#
|
3
|
+
# The binary search algorithm is extracted from Jon Bentley's
|
4
|
+
# Programming Pearls 2nd ed. p.93
|
5
|
+
#
|
6
|
+
|
7
|
+
#
|
8
|
+
# Return the lower boundary. (inside)
|
9
|
+
#
|
10
|
+
def bsearch_lower_boundary (range = 0 ... self.length, &block)
|
11
|
+
lower = range.first() -1
|
12
|
+
upper = if range.exclude_end? then range.last else range.last + 1 end
|
13
|
+
while lower + 1 != upper
|
14
|
+
mid = ((lower + upper) / 2).to_i # for working with mathn.rb (Rational)
|
15
|
+
if yield(self[mid]) < 0
|
16
|
+
lower = mid
|
17
|
+
else
|
18
|
+
upper = mid
|
19
|
+
end
|
20
|
+
end
|
21
|
+
return upper
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
# This method searches the FIRST occurrence which satisfies a
|
26
|
+
# condition given by a block in binary fashion and return the
|
27
|
+
# index of the first occurrence. Return nil if not found.
|
28
|
+
#
|
29
|
+
def bsearch_first (range = 0 ... self.length, &block)
|
30
|
+
boundary = bsearch_lower_boundary(range, &block)
|
31
|
+
if boundary >= self.length || yield(self[boundary]) != 0
|
32
|
+
return nil
|
33
|
+
else
|
34
|
+
return boundary
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
alias bsearch bsearch_first
|
39
|
+
|
40
|
+
#
|
41
|
+
# Return the upper boundary. (outside)
|
42
|
+
#
|
43
|
+
def bsearch_upper_boundary (range = 0 ... self.length, &block)
|
44
|
+
lower = range.first() -1
|
45
|
+
upper = if range.exclude_end? then range.last else range.last + 1 end
|
46
|
+
while lower + 1 != upper
|
47
|
+
mid = ((lower + upper) / 2).to_i # for working with mathn.rb (Rational)
|
48
|
+
if yield(self[mid]) <= 0
|
49
|
+
lower = mid
|
50
|
+
else
|
51
|
+
upper = mid
|
52
|
+
end
|
53
|
+
end
|
54
|
+
return lower + 1 # outside of the matching range.
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
# This method searches the LAST occurrence which satisfies a
|
59
|
+
# condition given by a block in binary fashion and return the
|
60
|
+
# index of the last occurrence. Return nil if not found.
|
61
|
+
#
|
62
|
+
def bsearch_last (range = 0 ... self.length, &block)
|
63
|
+
# `- 1' for canceling `lower + 1' in bsearch_upper_boundary.
|
64
|
+
boundary = bsearch_upper_boundary(range, &block) - 1
|
65
|
+
|
66
|
+
if (boundary <= -1 || yield(self[boundary]) != 0)
|
67
|
+
return nil
|
68
|
+
else
|
69
|
+
return boundary
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# Return the search result as a Range object.
|
75
|
+
#
|
76
|
+
def bsearch_range (range = 0 ... self.length, &block)
|
77
|
+
lower = bsearch_lower_boundary(range, &block)
|
78
|
+
upper = bsearch_upper_boundary(range, &block)
|
79
|
+
return lower ... upper
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
data/lib/suggester.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
# This is a simple client to access a Suggester::Server instance.
|
2
|
+
#
|
3
|
+
# Author:: Jeff Ching
|
4
|
+
# Copyright:: Copyright (c) 2010
|
5
|
+
# License:: Distributes under the same terms as Ruby
|
6
|
+
require 'open-uri'
|
7
|
+
require 'cgi'
|
8
|
+
require 'json'
|
9
|
+
require 'timeout'
|
10
|
+
|
11
|
+
module Suggester
|
12
|
+
# Simple client to access a Suggester::Server instance
|
13
|
+
class Client
|
14
|
+
attr_accessor :host, :port
|
15
|
+
attr_accessor :logger
|
16
|
+
|
17
|
+
# server uses sinatra with default port 17500
|
18
|
+
DEFAULT_HOST = "localhost"
|
19
|
+
DEFAULT_PORT = 17500
|
20
|
+
|
21
|
+
# Create an instance of the client
|
22
|
+
def initialize(options = {})
|
23
|
+
@host = options[:host] || DEFAULT_HOST
|
24
|
+
@port = options[:port] || DEFAULT_PORT
|
25
|
+
@logger = options[:logger]
|
26
|
+
end
|
27
|
+
|
28
|
+
# Match returns an array of data returned that is an exact match for the query
|
29
|
+
# string provided
|
30
|
+
#
|
31
|
+
# ==== Parameters
|
32
|
+
#
|
33
|
+
# * <tt>type</tt> - The name of the handler like "book"
|
34
|
+
# * <tt>query</tt> - The search term you are matching on
|
35
|
+
# * <tt>options</tt> - Hash of optionally parameters which are passed as query
|
36
|
+
# parameters. Includes the <tt>:limit</tt> option.
|
37
|
+
#
|
38
|
+
# ==== Examples
|
39
|
+
#
|
40
|
+
# # find exact matches
|
41
|
+
# client = Suggester::Client.new
|
42
|
+
# client.match("book", "A Tale of Two Cities") # returns all books that match "A Tale of Two Cities"
|
43
|
+
# client.match("book", "A Tale of Two Cities", :limit => 1) # return the first match for "A Tale of Two Cities"
|
44
|
+
def match(type, query, options = {})
|
45
|
+
fetch_response(build_url(type, "match", query, options))
|
46
|
+
end
|
47
|
+
|
48
|
+
# Find returns an array of data returned for records that start with query
|
49
|
+
# string provided
|
50
|
+
#
|
51
|
+
# ==== Parameters
|
52
|
+
#
|
53
|
+
# * <tt>type</tt> - The name of the handler like "book"
|
54
|
+
# * <tt>query</tt> - The search term you are searching on
|
55
|
+
# * <tt>options</tt> - Hash of optionally parameters which are passed as query
|
56
|
+
# parameters. Includes the <tt>:limit</tt> option.
|
57
|
+
#
|
58
|
+
# ==== Examples
|
59
|
+
#
|
60
|
+
# # find exact matches
|
61
|
+
# client = Suggester::Client.new
|
62
|
+
# client.match("book", "A Tale") # returns all books that match "A Tale of Two Cities"
|
63
|
+
# client.match("book", "A Tale", :limit => 1) # return the first match for "A Tale of Two Cities"
|
64
|
+
def find(type, query, options = {})
|
65
|
+
fetch_response(build_url(type, "find", query, options))
|
66
|
+
end
|
67
|
+
|
68
|
+
# Refresh tells the server to force a reload of its cached data
|
69
|
+
#
|
70
|
+
# ==== Parameters
|
71
|
+
#
|
72
|
+
# * <tt>type</tt> - The name of the handler to refresh
|
73
|
+
def refresh(type)
|
74
|
+
url = "http://#{@host}:#{@port}/#{type}/refresh"
|
75
|
+
response = fetch_response(url)
|
76
|
+
return response.is_a?(String) && response == "OK"
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def fetch_response(url) #:nodoc:
|
82
|
+
response = []
|
83
|
+
begin
|
84
|
+
Timeout::timeout(0.5) do
|
85
|
+
open(url) do |content|
|
86
|
+
response = JSON.parse(content.read)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
rescue Timeout::Error => e
|
90
|
+
@logger.error("Timeout: #{e}\n#{url}") if @logger
|
91
|
+
rescue => e
|
92
|
+
@logger.error("Error: #{e}\nURL: #{url}\nStack Trace:\n#{e.backtrace}") if @logger
|
93
|
+
end
|
94
|
+
response
|
95
|
+
end
|
96
|
+
|
97
|
+
def build_url(type, method, query, options) #:nodoc:
|
98
|
+
url = "http://#{@host}:#{@port}/#{type}/#{method}/#{CGI::escape(query)}.json"
|
99
|
+
unless options.empty?
|
100
|
+
s = []
|
101
|
+
options.each do |k,v|
|
102
|
+
s << "#{CGI::escape(k.to_s)}=#{CGI::escape(v.to_s)}"
|
103
|
+
end
|
104
|
+
url = url + "?" + s.join("&")
|
105
|
+
end
|
106
|
+
url
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|