fuzzy_search 0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +20 -0
- data/README.md +54 -0
- data/Rakefile +79 -0
- data/lib/fuzzy_model_extensions.rb +112 -0
- data/lib/fuzzy_search.rb +17 -0
- data/lib/fuzzy_search_trigram.rb +2 -0
- data/lib/fuzzy_search_ver.rb +3 -0
- data/rails_generators/fuzzy_search_setup/fuzzy_search_setup_generator.rb +8 -0
- data/rails_generators/fuzzy_search_setup/templates/create_fuzzy_search_trigrams.rb +15 -0
- data/test/app_root/app/models/email.rb +3 -0
- data/test/app_root/app/models/person.rb +3 -0
- data/test/app_root/config/boot.rb +115 -0
- data/test/app_root/config/database.yml +6 -0
- data/test/app_root/config/environment.rb +9 -0
- data/test/app_root/config/environments/test.rb +20 -0
- data/test/app_root/config/routes.rb +4 -0
- data/test/app_root/db/migrate/20100529235049_create_tables.rb +18 -0
- data/test/app_root/db/migrate/20111013132330_create_fuzzy_search_trigrams.rb +15 -0
- data/test/app_root/vendor/plugins/fuzzy_search/init.rb +2 -0
- data/test/factories.rb +13 -0
- data/test/test.watchr +6 -0
- data/test/test_helper.rb +69 -0
- data/test/unit/fuzzy_search_test.rb +108 -0
- metadata +102 -0
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 Kristian Meier, (c) 2011 David Simon
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
# fuzzy_search
|
2
|
+
|
3
|
+
Search through your models while tolerating slight mis-spellings. If you have a Person in your database named O'Reilly, you want your users to be able to find it even if they type "OReilly" or "O'Rielly".
|
4
|
+
|
5
|
+
This gem is not as powerful as dedicated search tools like Solr, but it's much quicker and easier to set up. It uses your regular database for indexing, rather than an external service that has to be maintained separately.
|
6
|
+
|
7
|
+
Currently only Rails 2 is supported. I welcome any contributions that resolve this!
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add `fuzzy_search` to your Rails project's Gemfile, and do the usual `bundle install` dance.
|
12
|
+
|
13
|
+
Then, run the generator and migrate to create the search table:
|
14
|
+
|
15
|
+
$ ./script/generate fuzzy_search_setup
|
16
|
+
$ rake db:migrate
|
17
|
+
|
18
|
+
## Example
|
19
|
+
|
20
|
+
To allow a model to be searched, specify which columns are to be indexed:
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
class Person < ActiveRecord::Base
|
24
|
+
# ...
|
25
|
+
fuzzy_searchable_on :first_name, :last_name
|
26
|
+
# ...
|
27
|
+
end
|
28
|
+
```
|
29
|
+
Now, the gem will update the index whenever a Person is saved. To index all the existing records in a model, do this:
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
Person.rebuild_fuzzy_search_index!
|
33
|
+
```
|
34
|
+
|
35
|
+
The fuzzy_search method returns arrays:
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
people = Person.fuzzy_search "OReilly"
|
39
|
+
```
|
40
|
+
|
41
|
+
Fuzzy find works on scopes too, including named_scopes and on-the-fly scopes:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
people = Person.scoped({:conditions => ["state='active'"]}).fuzzy_search("OReilly")
|
45
|
+
```
|
46
|
+
|
47
|
+
## Licence and credits
|
48
|
+
|
49
|
+
This gem is based on the rails-fuzzy-search plugin by iulianu
|
50
|
+
(https://github.com/iulianu/rails-fuzzy-search), which was in
|
51
|
+
turn based on the act_as_fuzzy_search plugin for DataMapper
|
52
|
+
by mkristian (http://github.com/mkristian/kristians_rails_plugins).
|
53
|
+
|
54
|
+
This gem is available under the MIT Licence.
|
data/Rakefile
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
require 'rdoc/task'
|
5
|
+
require 'rubygems/package_task'
|
6
|
+
|
7
|
+
def common_test_settings(t)
|
8
|
+
t.libs << 'lib'
|
9
|
+
t.libs << 'test'
|
10
|
+
t.pattern = 'test/**/*_test.rb'
|
11
|
+
t.verbose = true
|
12
|
+
end
|
13
|
+
|
14
|
+
desc 'Default: run unit tests.'
|
15
|
+
task :default => :test
|
16
|
+
|
17
|
+
desc 'Test fuzzy_search.'
|
18
|
+
Rake::TestTask.new(:test) do |t|
|
19
|
+
common_test_settings(t)
|
20
|
+
end
|
21
|
+
|
22
|
+
desc 'Run tests automatically as files change'
|
23
|
+
task :watchr do |t|
|
24
|
+
exec 'watchr test/test.watchr'
|
25
|
+
end
|
26
|
+
|
27
|
+
desc 'Generate documentation for fuzzy_search.'
|
28
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
29
|
+
rdoc.rdoc_dir = 'rdoc'
|
30
|
+
rdoc.title = 'FuzzySearch'
|
31
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
32
|
+
rdoc.rdoc_files.include('README')
|
33
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
34
|
+
end
|
35
|
+
|
36
|
+
begin
|
37
|
+
require 'rcov/rcovtask'
|
38
|
+
|
39
|
+
Rcov::RcovTask.new(:rcov) do |t|
|
40
|
+
common_test_settings(t)
|
41
|
+
t.rcov_opts << '-o coverage -x "/ruby/,/gems/,/test/,/migrate/"'
|
42
|
+
end
|
43
|
+
rescue LoadError
|
44
|
+
# Rcov wasn't available
|
45
|
+
end
|
46
|
+
|
47
|
+
begin
|
48
|
+
require 'ruby-prof/task'
|
49
|
+
|
50
|
+
RubyProf::ProfileTask.new(:profile) do |t|
|
51
|
+
common_test_settings(t)
|
52
|
+
t.output_dir = "#{File.dirname(__FILE__)}/profile"
|
53
|
+
t.printer = :call_tree
|
54
|
+
t.min_percent = 10
|
55
|
+
end
|
56
|
+
rescue LoadError
|
57
|
+
# Ruby-prof wasn't available
|
58
|
+
end
|
59
|
+
|
60
|
+
require 'lib/fuzzy_search_ver'
|
61
|
+
gemspec = Gem::Specification.new do |s|
|
62
|
+
s.name = "fuzzy_search"
|
63
|
+
s.version = FuzzySearch::VERSION
|
64
|
+
s.authors = ["Kristian Meier", "David Mike Simon"]
|
65
|
+
s.email = "david.mike.simon@gmail.com"
|
66
|
+
s.homepage = "http://github.com/DavidMikeSimon/fuzzy_search"
|
67
|
+
s.summary = "Search ActiveRecord models for strings similar to a query string"
|
68
|
+
s.description = "Implements fuzzy searching for ActiveRecord, using your database's own indexing instead of depending on external tools."
|
69
|
+
|
70
|
+
s.files = `git ls-files .`.split("\n") - [".gitignore"]
|
71
|
+
s.platform = Gem::Platform::RUBY
|
72
|
+
s.require_path = 'lib'
|
73
|
+
s.rubyforge_project = '[none]'
|
74
|
+
|
75
|
+
s.add_dependency('ar-extensions', '0.9.5')
|
76
|
+
end
|
77
|
+
|
78
|
+
Gem::PackageTask.new(gemspec) do |pkg|
|
79
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module FuzzySearch
|
2
|
+
module ModelExtensions
|
3
|
+
def self.included(base)
|
4
|
+
base.extend ClassMethods
|
5
|
+
|
6
|
+
base.write_inheritable_attribute :fuzzy_search_properties, []
|
7
|
+
base.class_inheritable_reader :fuzzy_search_properties
|
8
|
+
|
9
|
+
base.write_inheritable_attribute :fuzzy_search_threshold, 5
|
10
|
+
base.class_inheritable_reader :fuzzy_search_threshold
|
11
|
+
end
|
12
|
+
|
13
|
+
module ClassMethods
|
14
|
+
def fuzzy_searchable_on(*properties)
|
15
|
+
# TODO: Complain if fuzzy_searchable_on is called more than once
|
16
|
+
write_inheritable_attribute :fuzzy_search_properties, properties
|
17
|
+
has_many :fuzzy_search_trigrams, :as => :rec, :dependent => :destroy
|
18
|
+
after_save :update_fuzzy_search_trigrams!
|
19
|
+
named_scope :fuzzy_search_scope, lambda { |words| generate_fuzzy_search_scope_params(words) }
|
20
|
+
extend WordNormalizerClassMethod unless respond_to? :normalize
|
21
|
+
include InstanceMethods
|
22
|
+
end
|
23
|
+
|
24
|
+
def fuzzy_search(words)
|
25
|
+
# TODO: If fuzzy_search_scope doesn't exist, provide a useful error
|
26
|
+
fuzzy_search_scope(words).all
|
27
|
+
end
|
28
|
+
|
29
|
+
def rebuild_fuzzy_search_index!
|
30
|
+
FuzzySearchTrigram.delete_all(:rec_type => self.class.name)
|
31
|
+
all.each do |rec|
|
32
|
+
rec.update_fuzzy_search_trigrams!
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def generate_fuzzy_search_scope_params(words)
|
39
|
+
no_results = {:conditions => "0 = 1"}
|
40
|
+
return no_results unless words != nil
|
41
|
+
words = words.strip.to_s.split(/[\s\-]+/) unless words.instance_of? Array
|
42
|
+
return no_results unless words.size > 0
|
43
|
+
|
44
|
+
trigrams = []
|
45
|
+
words.each do |w|
|
46
|
+
word = ' ' + normalize(w) + ' '
|
47
|
+
word_as_chars = word.mb_chars
|
48
|
+
trigrams << (0..word_as_chars.length-3).collect {|idx| word_as_chars[idx,3].to_s}
|
49
|
+
end
|
50
|
+
trigrams = trigrams.flatten.uniq
|
51
|
+
|
52
|
+
# Transform the list of columns in the searchable entity into
|
53
|
+
# a SQL fragment like:
|
54
|
+
# "table_name.id, table_name.field1, table_name.field2, ..."
|
55
|
+
entity_fields = columns.map {|col| table_name + "." + col.name}.join(", ")
|
56
|
+
|
57
|
+
# The SQL expression for calculating fuzzy_score
|
58
|
+
# Has to be used multiple times because some databases (i.e. Postgres) do not support HAVING on named SELECT fields
|
59
|
+
# TODO: See if we can't get the count(*) out of here, that's a non-trivial operation in some databases
|
60
|
+
fuzzy_score_expr = "(((count(*)*100.0)/#{trigrams.size}) + " +
|
61
|
+
"((count(*)*100.0)/(SELECT count(*) FROM fuzzy_search_trigrams WHERE rec_id = #{table_name}.#{primary_key} AND rec_type = '#{name}')))/2.0"
|
62
|
+
|
63
|
+
# TODO: Optimize this query. In a large trigram table, this is going to go through a lot of dead ends.
|
64
|
+
# Maybe I need to just bite the bullet and learn how to do procedures? That would break cross-database compatibility, though...
|
65
|
+
return {
|
66
|
+
:select => "#{fuzzy_score_expr} AS fuzzy_score, #{entity_fields}",
|
67
|
+
:joins => ["LEFT OUTER JOIN fuzzy_search_trigrams ON fuzzy_search_trigrams.rec_id = #{table_name}.#{primary_key}"],
|
68
|
+
:conditions => ["fuzzy_search_trigrams.token IN (?) AND rec_type = '#{name}'", trigrams],
|
69
|
+
:group => "#{table_name}.#{primary_key}",
|
70
|
+
:order => "fuzzy_score DESC",
|
71
|
+
:having => "#{fuzzy_score_expr} >= #{fuzzy_search_threshold}"
|
72
|
+
}
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
module WordNormalizerClassMethod
|
77
|
+
def normalize(word)
|
78
|
+
word.mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/n,'').downcase.to_s
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
module InstanceMethods
|
83
|
+
def update_fuzzy_search_trigrams!
|
84
|
+
FuzzySearchTrigram.delete_all(:rec_id => self.id, :rec_type => self.class.name)
|
85
|
+
|
86
|
+
# to avoid double entries
|
87
|
+
tokens = []
|
88
|
+
self.class.fuzzy_search_properties.each do |prop|
|
89
|
+
prop_value = send(prop)
|
90
|
+
next if prop_value.nil?
|
91
|
+
# split the property into words (which are separated by whitespaces)
|
92
|
+
# and generate the trigrams for each word
|
93
|
+
prop_value.to_s.split(/[\s\-]+/).each do |p|
|
94
|
+
# put a space in front and at the end to emphasize the endings
|
95
|
+
word = ' ' + self.class.normalize(p) + ' '
|
96
|
+
word_as_chars = word.mb_chars
|
97
|
+
(0..word_as_chars.length - 3).each do |idx|
|
98
|
+
token = word_as_chars[idx, 3].to_s
|
99
|
+
tokens << token unless tokens.member?(token)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
FuzzySearchTrigram.import(
|
105
|
+
[:token, :rec_id, :rec_type],
|
106
|
+
tokens.map{|t| [t, self.id, self.class.name]},
|
107
|
+
:validate => false
|
108
|
+
)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
data/lib/fuzzy_search.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'ar-extensions' # External dependency
|
2
|
+
# Monkey patch a bug in ar-extensions which breaks postgres compatibility
|
3
|
+
module ActiveRecord # :nodoc:
|
4
|
+
module ConnectionAdapters # :nodoc:
|
5
|
+
class AbstractAdapter # :nodoc:
|
6
|
+
def next_value_for_sequence(sequence_name)
|
7
|
+
%{nextval('#{sequence_name}')}
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
require 'fuzzy_model_extensions'
|
14
|
+
require 'fuzzy_search_trigram'
|
15
|
+
require 'fuzzy_search_ver'
|
16
|
+
|
17
|
+
ActiveRecord::Base.send(:include, FuzzySearch::ModelExtensions)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class CreateFuzzySearchTrigrams < ActiveRecord::Migration
|
2
|
+
def self.up
|
3
|
+
create_table :fuzzy_search_trigrams, :id => false do |t|
|
4
|
+
t.column :token, :string, :limit => 3
|
5
|
+
t.column :rec_type, :string
|
6
|
+
t.column :rec_id, :integer
|
7
|
+
end
|
8
|
+
|
9
|
+
add_index :fuzzy_search_trigrams, [:rec_type, :token]
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.down
|
13
|
+
drop_table :fuzzy_search_trigrams
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# Allow customization of the rails framework path
|
2
|
+
RAILS_FRAMEWORK_ROOT = (ENV['RAILS_FRAMEWORK_ROOT'] || "#{File.dirname(__FILE__)}/../../../../../../vendor/rails") unless defined?(RAILS_FRAMEWORK_ROOT)
|
3
|
+
|
4
|
+
# Don't change this file!
|
5
|
+
# Configure your app in config/environment.rb and config/environments/*.rb
|
6
|
+
|
7
|
+
RAILS_ROOT = "#{File.dirname(__FILE__)}/.." unless defined?(RAILS_ROOT)
|
8
|
+
|
9
|
+
module Rails
|
10
|
+
class << self
|
11
|
+
def boot!
|
12
|
+
unless booted?
|
13
|
+
preinitialize
|
14
|
+
pick_boot.run
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def booted?
|
19
|
+
defined? Rails::Initializer
|
20
|
+
end
|
21
|
+
|
22
|
+
def pick_boot
|
23
|
+
(vendor_rails? ? VendorBoot : GemBoot).new
|
24
|
+
end
|
25
|
+
|
26
|
+
def vendor_rails?
|
27
|
+
File.exist?(RAILS_FRAMEWORK_ROOT)
|
28
|
+
end
|
29
|
+
|
30
|
+
def preinitialize
|
31
|
+
load(preinitializer_path) if File.exist?(preinitializer_path)
|
32
|
+
end
|
33
|
+
|
34
|
+
def preinitializer_path
|
35
|
+
"#{RAILS_ROOT}/config/preinitializer.rb"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class Boot
|
40
|
+
def run
|
41
|
+
load_initializer
|
42
|
+
Rails::Initializer.run(:set_load_path)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class VendorBoot < Boot
|
47
|
+
def load_initializer
|
48
|
+
require "#{RAILS_FRAMEWORK_ROOT}/railties/lib/initializer"
|
49
|
+
Rails::Initializer.run(:install_gem_spec_stubs)
|
50
|
+
Rails::GemDependency.add_frozen_gem_path
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class GemBoot < Boot
|
55
|
+
def load_initializer
|
56
|
+
self.class.load_rubygems
|
57
|
+
load_rails_gem
|
58
|
+
require 'initializer'
|
59
|
+
end
|
60
|
+
|
61
|
+
def load_rails_gem
|
62
|
+
if version = self.class.gem_version
|
63
|
+
gem 'rails', version
|
64
|
+
else
|
65
|
+
gem 'rails'
|
66
|
+
end
|
67
|
+
rescue Gem::LoadError => load_error
|
68
|
+
$stderr.puts %(Missing the Rails #{version} gem. Please `gem install -v=#{version} rails`, update your RAILS_GEM_VERSION setting in config/environment.rb for the Rails version you do have installed, or comment out RAILS_GEM_VERSION to use the latest version installed.)
|
69
|
+
exit 1
|
70
|
+
end
|
71
|
+
|
72
|
+
class << self
|
73
|
+
def rubygems_version
|
74
|
+
Gem::RubyGemsVersion rescue nil
|
75
|
+
end
|
76
|
+
|
77
|
+
def gem_version
|
78
|
+
if defined? RAILS_GEM_VERSION
|
79
|
+
RAILS_GEM_VERSION
|
80
|
+
elsif ENV.include?('RAILS_GEM_VERSION')
|
81
|
+
ENV['RAILS_GEM_VERSION']
|
82
|
+
else
|
83
|
+
parse_gem_version(read_environment_rb)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def load_rubygems
|
88
|
+
require 'rubygems'
|
89
|
+
min_version = '1.3.1'
|
90
|
+
unless rubygems_version >= min_version
|
91
|
+
$stderr.puts %Q(Rails requires RubyGems >= #{min_version} (you have #{rubygems_version}). Please `gem update --system` and try again.)
|
92
|
+
exit 1
|
93
|
+
end
|
94
|
+
|
95
|
+
rescue LoadError
|
96
|
+
$stderr.puts %Q(Rails requires RubyGems >= #{min_version}. Please install RubyGems and try again: http://rubygems.rubyforge.org)
|
97
|
+
exit 1
|
98
|
+
end
|
99
|
+
|
100
|
+
def parse_gem_version(text)
|
101
|
+
$1 if text =~ /^[^#]*RAILS_GEM_VERSION\s*=\s*["']([!~<>=]*\s*[\d.]+)["']/
|
102
|
+
end
|
103
|
+
|
104
|
+
private
|
105
|
+
def read_environment_rb
|
106
|
+
environment_rb = "#{RAILS_ROOT}/config/environment.rb"
|
107
|
+
environment_rb = "#{HELPER_RAILS_ROOT}/config/environment.rb" unless File.exists?(environment_rb)
|
108
|
+
File.read(environment_rb)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# All that for this:
|
115
|
+
Rails.boot!
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'boot')
|
2
|
+
|
3
|
+
RAILS_GEM_VERSION = '2.3.12'
|
4
|
+
|
5
|
+
Rails::Initializer.run do |config|
|
6
|
+
config.cache_classes = false
|
7
|
+
config.whiny_nils = true
|
8
|
+
config.action_controller.session = {:key => 'rails_session', :secret => 'd229e4d22437432705ab3985d4d246'}
|
9
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
config.cache_classes = true
|
2
|
+
|
3
|
+
# Log error messages when you accidentally call methods on nil.
|
4
|
+
config.whiny_nils = true
|
5
|
+
|
6
|
+
# Show full error reports and disable caching
|
7
|
+
config.action_controller.consider_all_requests_local = true
|
8
|
+
config.action_controller.perform_caching = false
|
9
|
+
config.action_view.cache_template_loading = true
|
10
|
+
|
11
|
+
# Disable request forgery protection in test environment
|
12
|
+
config.action_controller.allow_forgery_protection = false
|
13
|
+
|
14
|
+
# Tell Action Mailer not to deliver emails to the real world.
|
15
|
+
# The :test delivery method accumulates sent emails in the
|
16
|
+
# ActionMailer::Base.deliveries array.
|
17
|
+
config.action_mailer.delivery_method = :test
|
18
|
+
|
19
|
+
# Avoid excessively huge log files
|
20
|
+
config.log_level = :error
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class CreateTables < ActiveRecord::Migration
|
2
|
+
def self.up
|
3
|
+
create_table :emails do |t|
|
4
|
+
t.string :address
|
5
|
+
end
|
6
|
+
|
7
|
+
create_table :people do |t|
|
8
|
+
t.string :first_name
|
9
|
+
t.string :last_name
|
10
|
+
t.string :hobby
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.down
|
15
|
+
drop_table :emails
|
16
|
+
drop_table :people
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class CreateFuzzySearchTrigrams < ActiveRecord::Migration
|
2
|
+
def self.up
|
3
|
+
create_table :fuzzy_search_trigrams, :id => false do |t|
|
4
|
+
t.column :token, :string, :limit => 3
|
5
|
+
t.column :rec_type, :string
|
6
|
+
t.column :rec_id, :integer
|
7
|
+
end
|
8
|
+
|
9
|
+
add_index :fuzzy_search_trigrams, [:rec_type, :token]
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.down
|
13
|
+
drop_table :fuzzy_search_trigrams
|
14
|
+
end
|
15
|
+
end
|
data/test/factories.rb
ADDED
data/test/test.watchr
ADDED
data/test/test_helper.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
ENV['RAILS_ENV'] = 'test'
|
2
|
+
|
3
|
+
prev_dir = Dir.getwd
|
4
|
+
begin
|
5
|
+
Dir.chdir("#{File.dirname(__FILE__)}/..")
|
6
|
+
|
7
|
+
begin
|
8
|
+
# Used when running test files directly
|
9
|
+
$LOAD_PATH << "#{File.dirname(__FILE__)}/../lib"
|
10
|
+
require "#{File.dirname(__FILE__)}/app_root/config/environment"
|
11
|
+
rescue LoadError
|
12
|
+
# This is needed for root-level rake task 'test'
|
13
|
+
require "app_root/config/environment"
|
14
|
+
end
|
15
|
+
ensure
|
16
|
+
Dir.chdir(prev_dir)
|
17
|
+
end
|
18
|
+
|
19
|
+
require 'rubygems'
|
20
|
+
require 'minitest/autorun'
|
21
|
+
require 'redgreen'
|
22
|
+
require 'pp'
|
23
|
+
|
24
|
+
# Set default string encoding to unicode
|
25
|
+
$KCODE = 'u'
|
26
|
+
|
27
|
+
module MiniTest
|
28
|
+
def self.filter_backtrace(backtrace)
|
29
|
+
backtrace = backtrace.select do |e|
|
30
|
+
if ENV['FULL_BACKTRACE']
|
31
|
+
true
|
32
|
+
else
|
33
|
+
!(e.include?("/ruby/") || e.include?("/gems/"))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
common_prefix = nil
|
38
|
+
backtrace.each do |elem|
|
39
|
+
next if elem.start_with? "./"
|
40
|
+
if common_prefix
|
41
|
+
until elem.start_with? common_prefix
|
42
|
+
common_prefix.chop!
|
43
|
+
end
|
44
|
+
else
|
45
|
+
common_prefix = String.new(elem)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
return backtrace.map do |element|
|
50
|
+
if element.start_with? common_prefix && common_prefix.size < element.size
|
51
|
+
element[common_prefix.size, element.size]
|
52
|
+
elsif element.start_with? "./"
|
53
|
+
element[2, element.size]
|
54
|
+
elsif element.start_with?(Dir.getwd)
|
55
|
+
element[Dir.getwd.size+1, element.size]
|
56
|
+
else
|
57
|
+
element
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
require 'factories'
|
64
|
+
MiniTest::Unit::TestCase.send(:include, Factory::Syntax::Methods)
|
65
|
+
|
66
|
+
MiniTest::Unit::TestCase.add_setup_hook do
|
67
|
+
ActiveRecord::Migration.verbose = false
|
68
|
+
ActiveRecord::Migrator.migrate("#{Rails.root}/db/migrate") # Migrations in the test app
|
69
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../test_helper')
|
2
|
+
|
3
|
+
describe "fuzzy_search" do
|
4
|
+
before do
|
5
|
+
create(:person, :last_name => "meier", :first_name => "kristian")
|
6
|
+
create(:person, :last_name => "meyer", :first_name => "christian", :hobby => "Bicycling")
|
7
|
+
create(:person, :last_name => "mayr", :first_name => "Chris")
|
8
|
+
create(:person, :last_name => "maier", :first_name => "christoph", :hobby => "Bicycling")
|
9
|
+
create(:person, :last_name => "mueller", :first_name => "andreas")
|
10
|
+
create(:person, :last_name => "öther", :first_name => "name")
|
11
|
+
create(:person, :last_name => "yet another", :first_name => "name")
|
12
|
+
|
13
|
+
create(:email, :address => "öscar@web.oa")
|
14
|
+
create(:email, :address => "david.mike.simon@gmail.com")
|
15
|
+
create(:email, :address => "billg@microsoft.com")
|
16
|
+
end
|
17
|
+
|
18
|
+
after do
|
19
|
+
Person.delete_all
|
20
|
+
Email.delete_all
|
21
|
+
FuzzySearchTrigram.delete_all
|
22
|
+
end
|
23
|
+
|
24
|
+
it "can search for records with similar strings to a query" do
|
25
|
+
assert_equal 3, Person.fuzzy_search("meyr").size
|
26
|
+
assert_equal 1, Person.fuzzy_search("myr").size
|
27
|
+
end
|
28
|
+
|
29
|
+
it "can search on multiple columns" do
|
30
|
+
result = Person.fuzzy_search("kristin meiar")
|
31
|
+
assert_equal "kristian", result[0].first_name
|
32
|
+
assert_equal "meier", result[0].last_name
|
33
|
+
end
|
34
|
+
|
35
|
+
it "sorts results by their fuzzy match score" do
|
36
|
+
result = Person.fuzzy_search("kristian meier")
|
37
|
+
assert_equal 100, result[0].fuzzy_score
|
38
|
+
prior = 100
|
39
|
+
(1..3).each do |idx|
|
40
|
+
assert result[idx].fuzzy_score <= prior
|
41
|
+
prior = result[idx].fuzzy_score
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
it "returns an empty result set when given an empty query string" do
|
46
|
+
assert_empty Person.fuzzy_search("")
|
47
|
+
assert_empty Person.fuzzy_search(nil)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "updates the search index automatically when a new record is saved" do
|
51
|
+
assert_empty Person.fuzzy_search("Dave")
|
52
|
+
create(:person, :first_name => "David", :last_name => "Simon")
|
53
|
+
refute_empty Person.fuzzy_search("Dave")
|
54
|
+
end
|
55
|
+
|
56
|
+
it "updates the search index automatically when a record is updated" do
|
57
|
+
assert_empty Person.fuzzy_search("Obama")
|
58
|
+
refute_empty Person.fuzzy_search("yet")
|
59
|
+
|
60
|
+
p = Person.find_by_last_name("yet another")
|
61
|
+
p.last_name = "Obama"
|
62
|
+
p.save!
|
63
|
+
|
64
|
+
refute_empty Person.fuzzy_search("Obama")
|
65
|
+
assert_empty Person.fuzzy_search("yet")
|
66
|
+
end
|
67
|
+
|
68
|
+
it "destroys search index entries when a record is destroyed" do
|
69
|
+
size = Person.fuzzy_search("other").size
|
70
|
+
assert size > 0
|
71
|
+
Person.destroy_all(:last_name => "öther")
|
72
|
+
assert_equal size, Person.fuzzy_search("other").size + 1
|
73
|
+
end
|
74
|
+
|
75
|
+
it "only finds records of the ActiveRecord model you're searching on" do
|
76
|
+
refute_empty Person.fuzzy_search("meier")
|
77
|
+
assert_empty Email.fuzzy_search("meier")
|
78
|
+
|
79
|
+
assert_empty Person.fuzzy_search("oscar")
|
80
|
+
refute_empty Email.fuzzy_search("oscar")
|
81
|
+
end
|
82
|
+
|
83
|
+
it "can normalize strings" do
|
84
|
+
assert_equal("aaaaaa", Person.normalize("ÀÁÂÃÄÅ"))
|
85
|
+
end
|
86
|
+
|
87
|
+
it "normalizes strings before searching on them" do
|
88
|
+
assert_equal 1, Person.fuzzy_search("Müll").size
|
89
|
+
assert_equal 1, Email.fuzzy_search("öscar").size
|
90
|
+
end
|
91
|
+
|
92
|
+
it "normalizes record strings before indexing them" do
|
93
|
+
assert_equal 1, Email.fuzzy_search("oscar").size
|
94
|
+
end
|
95
|
+
|
96
|
+
it "can search through a scope" do
|
97
|
+
scope = Person.scoped({:conditions => {:hobby => "Bicycling"}})
|
98
|
+
assert_equal 4, Person.fuzzy_search("chris").size
|
99
|
+
assert_equal 2, scope.fuzzy_search("chris").size
|
100
|
+
end
|
101
|
+
|
102
|
+
it "can rebuild the search index from scratch" do
|
103
|
+
FuzzySearchTrigram.delete_all
|
104
|
+
assert_empty Person.fuzzy_search("chris")
|
105
|
+
Person.rebuild_fuzzy_search_index!
|
106
|
+
refute_empty Person.fuzzy_search("chris")
|
107
|
+
end
|
108
|
+
end
|
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fuzzy_search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 13
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 3
|
9
|
+
version: "0.3"
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Kristian Meier
|
13
|
+
- David Mike Simon
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-10-13 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: ar-extensions
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - "="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 49
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
- 9
|
32
|
+
- 5
|
33
|
+
version: 0.9.5
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
description: Implements fuzzy searching for ActiveRecord, using your database's own indexing instead of depending on external tools.
|
37
|
+
email: david.mike.simon@gmail.com
|
38
|
+
executables: []
|
39
|
+
|
40
|
+
extensions: []
|
41
|
+
|
42
|
+
extra_rdoc_files: []
|
43
|
+
|
44
|
+
files:
|
45
|
+
- MIT-LICENSE
|
46
|
+
- README.md
|
47
|
+
- Rakefile
|
48
|
+
- lib/fuzzy_model_extensions.rb
|
49
|
+
- lib/fuzzy_search.rb
|
50
|
+
- lib/fuzzy_search_trigram.rb
|
51
|
+
- lib/fuzzy_search_ver.rb
|
52
|
+
- rails_generators/fuzzy_search_setup/fuzzy_search_setup_generator.rb
|
53
|
+
- rails_generators/fuzzy_search_setup/templates/create_fuzzy_search_trigrams.rb
|
54
|
+
- test/app_root/app/models/email.rb
|
55
|
+
- test/app_root/app/models/person.rb
|
56
|
+
- test/app_root/config/boot.rb
|
57
|
+
- test/app_root/config/database.yml
|
58
|
+
- test/app_root/config/environment.rb
|
59
|
+
- test/app_root/config/environments/test.rb
|
60
|
+
- test/app_root/config/routes.rb
|
61
|
+
- test/app_root/db/migrate/20100529235049_create_tables.rb
|
62
|
+
- test/app_root/db/migrate/20111013132330_create_fuzzy_search_trigrams.rb
|
63
|
+
- test/app_root/vendor/plugins/fuzzy_search/init.rb
|
64
|
+
- test/factories.rb
|
65
|
+
- test/test.watchr
|
66
|
+
- test/test_helper.rb
|
67
|
+
- test/unit/fuzzy_search_test.rb
|
68
|
+
homepage: http://github.com/DavidMikeSimon/fuzzy_search
|
69
|
+
licenses: []
|
70
|
+
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
|
74
|
+
require_paths:
|
75
|
+
- lib
|
76
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
77
|
+
none: false
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
hash: 3
|
82
|
+
segments:
|
83
|
+
- 0
|
84
|
+
version: "0"
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
86
|
+
none: false
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
hash: 3
|
91
|
+
segments:
|
92
|
+
- 0
|
93
|
+
version: "0"
|
94
|
+
requirements: []
|
95
|
+
|
96
|
+
rubyforge_project: "[none]"
|
97
|
+
rubygems_version: 1.8.6
|
98
|
+
signing_key:
|
99
|
+
specification_version: 3
|
100
|
+
summary: Search ActiveRecord models for strings similar to a query string
|
101
|
+
test_files: []
|
102
|
+
|