fuzzily_reloaded 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/fuzzily.rb ADDED
@@ -0,0 +1,7 @@
1
+ require "fuzzily/version"
2
+ require "fuzzily/searchable"
3
+ require "fuzzily/migration"
4
+ require "fuzzily/model"
5
+ require "active_record"
6
+
7
+ ActiveRecord::Base.send :include, Fuzzily::Searchable
@@ -0,0 +1,43 @@
1
+ require "active_record"
2
+
3
+ module Fuzzily
4
+ module Migration
5
+ def self.extended(by)
6
+ by.singleton_class.class_eval do
7
+ def trigrams_table_name=(custom_name)
8
+ @trigrams_table_name = custom_name
9
+ end
10
+
11
+ def trigrams_table_name
12
+ @trigrams_table_name ||= :trigrams
13
+ end
14
+
15
+ def trigrams_owner_id_column_type=(custom_type)
16
+ @trigrams_owner_id_column_type = custom_type
17
+ end
18
+
19
+ def trigrams_owner_id_column_type
20
+ @trigrams_owner_id_column_type ||= :integer
21
+ end
22
+
23
+ def up
24
+ create_table trigrams_table_name do |t|
25
+ t.string :trigram, limit: 3
26
+ t.integer :score, limit: 2
27
+ t.send trigrams_owner_id_column_type, :owner_id
28
+ t.string :owner_type
29
+ t.string :fuzzy_field
30
+ end
31
+
32
+ # owner_id goes first as we'll GROUP BY that
33
+ add_index trigrams_table_name, [:owner_id, :owner_type, :fuzzy_field, :trigram, :score], name: :index_for_match
34
+ add_index trigrams_table_name, [:owner_id, :owner_type], name: :index_by_owner
35
+ end
36
+
37
+ def down
38
+ drop_table trigrams_table_name
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,49 @@
1
+ module Fuzzily
2
+ module Model
3
+ # Needs fields: trigram, owner_type, owner_id, score
4
+ # Needs index on [owner_type, trigram] and [owner_type, owner_id]
5
+
6
+ def self.included(by)
7
+ by.ancestors.include?(ActiveRecord::Base) or raise "Not included in an ActiveRecord subclass"
8
+ by.extend(ClassMethods)
9
+
10
+ by.class_eval do
11
+ return if class_variable_defined?(:@@fuzzily_trigram_model)
12
+
13
+ belongs_to :owner, polymorphic: true
14
+ validates :owner, :score, :fuzzy_field, presence: true
15
+ validates :trigram, length: { is: 3 }, uniqueness: {
16
+ case_sensitive: true,
17
+ scope: [:owner_type, :owner_id, :fuzzy_field],
18
+ }
19
+
20
+ _add_fuzzy_scopes
21
+ class_variable_set(:@@fuzzily_trigram_model, true)
22
+ end
23
+ end
24
+
25
+ module ClassMethods
26
+ def matches_for(text)
27
+ _matches_for_trigrams Fuzzily::String.new(text).trigrams
28
+ end
29
+
30
+ private
31
+
32
+ def _matches_for_trigrams(trigrams)
33
+ self.
34
+ select("owner_id, owner_type, count(*) AS matches, MAX(score) AS score").
35
+ group("owner_id, owner_type").
36
+ order("matches DESC, score ASC").
37
+ with_trigram(trigrams)
38
+ end
39
+
40
+ def _add_fuzzy_scopes
41
+ scope :for_model, lambda { |model|
42
+ where(owner_type: model.kind_of?(Class) ? model.name : model)
43
+ }
44
+ scope :for_field, lambda { |field_name| where(fuzzy_field: field_name) }
45
+ scope :with_trigram, lambda { |trigrams| where(trigram: trigrams) }
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,177 @@
1
+ require "fuzzily/trigram"
2
+ require "ostruct"
3
+
4
+ module Fuzzily
5
+ module Searchable
6
+
7
+ def self.included(by)
8
+ case ActiveRecord::VERSION::MAJOR
9
+ when 5 then by.extend Rails5ClassMethods
10
+ when 6 then by.extend Rails6ClassMethods
11
+ end
12
+ end
13
+
14
+ private
15
+
16
+ def _update_fuzzy!(_o)
17
+ self.send(_o.trigram_association).delete_all
18
+ String.new(self.send(_o.field)).scored_trigrams.each do |trigram, score|
19
+ self.send(_o.trigram_association).build.tap do |record|
20
+ record.score = score
21
+ record.trigram = trigram
22
+ record.fuzzy_field = _o.field.to_s
23
+ record.save!
24
+ end
25
+ end
26
+ end
27
+
28
+
29
+ module ClassMethods
30
+ # fuzzily_searchable <field> [, <field>...] [, <options>]
31
+ def fuzzily_searchable(*fields)
32
+ options = fields.last.kind_of?(Hash) ? fields.pop : {}
33
+
34
+ fields.each do |field|
35
+ make_field_fuzzily_searchable(field, options)
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def _find_by_fuzzy(_o, pattern, options={})
42
+ options[:limit] ||= 10 unless options.has_key? :limit
43
+ options[:offset] ||= 0
44
+
45
+ trigrams = _o.trigram_class_name.constantize.
46
+ limit(options[:limit]).
47
+ offset(options[:offset]).
48
+ for_model(self.name).
49
+ for_field(_o.field.to_s).
50
+ matches_for(pattern)
51
+ records = _load_for_ids(trigrams.map(&:owner_id))
52
+ # order records as per trigram query (no portable way to do this in SQL)
53
+ trigrams.map { |t| records[t.owner_id] }.compact
54
+ end
55
+
56
+ def _load_for_ids(ids)
57
+ {}.tap do |result|
58
+ results = if respond_to? :where
59
+ where(id: ids)
60
+ else
61
+ find(:all, ids)
62
+ end
63
+ results.each { |_r| result[_r.id] = _r }
64
+ end
65
+ end
66
+
67
+ def _bulk_update_fuzzy(_o)
68
+ trigram_class = _o.trigram_class_name.constantize
69
+ supports_bulk_inserts =
70
+ connection.class.name !~ /sqlite/i ||
71
+ (
72
+ defined?(SQLite3::SQLITE_VERSION) &&
73
+ Gem::Version.new(SQLite3::SQLITE_VERSION) >= Gem::Version.new("3.7.11")
74
+ )
75
+
76
+ _with_included_trigrams(_o).find_in_batches(batch_size: 100) do |batch|
77
+ inserts = []
78
+ batch.each do |record|
79
+ data = Fuzzily::String.new(record.send(_o.field))
80
+ data.scored_trigrams.each do |trigram, score|
81
+ inserts << sanitize_sql_array(["(?,?,?,?,?)", self.name, record.id, _o.field.to_s, score, trigram])
82
+ end
83
+ end
84
+
85
+ # take care of quoting
86
+ c = trigram_class.connection
87
+ insert_sql = %Q{
88
+ INSERT INTO %s (%s, %s, %s, %s, %s)
89
+ VALUES
90
+ } % [
91
+ c.quote_table_name(trigram_class.table_name),
92
+ c.quote_column_name("owner_type"),
93
+ c.quote_column_name("owner_id"),
94
+ c.quote_column_name("fuzzy_field"),
95
+ c.quote_column_name("score"),
96
+ c.quote_column_name("trigram")
97
+ ]
98
+
99
+ trigram_class.transaction do
100
+ batch.each { |record| record.send(_o.trigram_association).delete_all }
101
+ break if inserts.empty?
102
+
103
+ if supports_bulk_inserts
104
+ trigram_class.connection.insert(insert_sql + inserts.join(", "))
105
+ else
106
+ inserts.each do |insert|
107
+ trigram_class.connection.insert(insert_sql + insert)
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ def make_field_fuzzily_searchable(field, options={})
115
+ class_variable_defined?(:"@@fuzzily_searchable_#{field}") and return
116
+
117
+ _o = OpenStruct.new(
118
+ field: field,
119
+ trigram_class_name: options.fetch(:class_name, "Trigram"),
120
+ trigram_association: "trigrams_for_#{field}".to_sym,
121
+ update_trigrams_method: "update_fuzzy_#{field}!".to_sym,
122
+ async: options.fetch(:async, false)
123
+ )
124
+
125
+ _add_trigram_association(_o)
126
+
127
+ singleton_class.send(:define_method, "find_by_fuzzy_#{field}".to_sym) do |*args|
128
+ _find_by_fuzzy(_o, *args)
129
+ end
130
+
131
+ singleton_class.send(:define_method, "bulk_update_fuzzy_#{field}".to_sym) do
132
+ _bulk_update_fuzzy(_o)
133
+ end
134
+
135
+ define_method _o.update_trigrams_method do
136
+ if _o.async && self.respond_to?(:delay)
137
+ self.delay._update_fuzzy!(_o)
138
+ else
139
+ _update_fuzzy!(_o)
140
+ end
141
+ end
142
+
143
+ after_save do |record|
144
+ next unless record.send("saved_change_to_#{field}?".to_sym)
145
+
146
+ record.send(_o.update_trigrams_method)
147
+ end
148
+
149
+ class_variable_set(:"@@fuzzily_searchable_#{field}", true)
150
+ self
151
+ end
152
+ end
153
+
154
+ module Rails5ClassMethods
155
+ include ClassMethods
156
+
157
+ private
158
+
159
+ def _add_trigram_association(_o)
160
+ has_many _o.trigram_association,
161
+ lambda { where(fuzzy_field: _o.field.to_s) },
162
+ class_name: _o.trigram_class_name,
163
+ as: :owner,
164
+ dependent: :delete_all,
165
+ autosave: true
166
+ end
167
+
168
+ def _with_included_trigrams(_o)
169
+ self.includes(_o.trigram_association)
170
+ end
171
+ end
172
+
173
+ module Rails6ClassMethods
174
+ include Rails5ClassMethods
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,31 @@
1
+ require "active_support/core_ext/string/multibyte"
2
+ require "delegate"
3
+ module Fuzzily
4
+ class String < SimpleDelegator
5
+
6
+ def trigrams
7
+ return [] if __getobj__.blank?
8
+ normalized = self.normalize
9
+ number_of_trigrams = normalized.length - 3
10
+ trigrams = (0..number_of_trigrams).map { |index| normalized[index, 3] }.uniq
11
+ end
12
+
13
+ def scored_trigrams
14
+ trigrams.map { |t| [t, self.length] }
15
+ end
16
+
17
+ protected
18
+
19
+ # Remove accents, downcase, replace spaces and word start with "*",
20
+ # return list of normalized words
21
+ def normalize
22
+ ActiveSupport::Multibyte::Chars.new(self.to_s)
23
+ .mb_chars.unicode_normalize(:nfkd).to_s.downcase
24
+ .gsub(/[^\x00-\x7F]/, "")
25
+ .gsub(/[^a-z\d]/, " ")
26
+ .gsub(/\s+/, "*")
27
+ .gsub(/^/, "**")
28
+ .gsub(/$/, "*")
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,3 @@
1
+ module Fuzzily
2
+ VERSION = "1.0.0"
3
+ end
@@ -0,0 +1 @@
1
+ require "fuzzily"
@@ -0,0 +1,41 @@
1
+ require "spec_helper"
2
+
3
+ describe Fuzzily::Migration do
4
+ subject { Class.new(ActiveRecord::Migration[5.1]).extend(described_class) }
5
+
6
+ it "is a proper migration" do
7
+ expect(subject.ancestors).to include(ActiveRecord::Migration[5.1])
8
+ end
9
+
10
+ it "applies cleanly" do
11
+ ActiveRecord::Migration.suppress_messages do
12
+ subject.up
13
+ end
14
+ end
15
+
16
+ it "rolls back cleanly" do
17
+ ActiveRecord::Migration.suppress_messages do
18
+ subject.up ; subject.down
19
+ end
20
+ end
21
+
22
+ it "has a customizable table name" do
23
+ ActiveRecord::Migration.suppress_messages do
24
+ subject.trigrams_table_name = :foobars
25
+ subject.up
26
+ expect {
27
+ ActiveRecord::Base.connection.execute("INSERT INTO foobars (score) VALUES (1)")
28
+ }.to_not raise_error
29
+ end
30
+ end
31
+
32
+ it "results in a functional model" do
33
+ ActiveRecord::Migration.suppress_messages do
34
+ subject.up
35
+ model_class = Class.new(ActiveRecord::Base)
36
+ model_class.table_name = "trigrams"
37
+ model_class.create(trigram: "abc")
38
+ expect(model_class.count).to eq 1
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,77 @@
1
+ require "spec_helper"
2
+
3
+ describe Fuzzily::Model do
4
+ subject do
5
+ Class.new(ActiveRecord::Base).tap do |model|
6
+ model.table_name = :trigrams
7
+
8
+ def model.name ; "MyModel" ; end
9
+ end
10
+ end
11
+
12
+ before(:each) { prepare_trigrams_table }
13
+
14
+ it "can be included into an ActiveRecord model" do
15
+ subject.send(:include, described_class)
16
+ end
17
+
18
+ it "can be included twice" do
19
+ subject.send(:include, described_class)
20
+ subject.send(:include, described_class)
21
+ end
22
+
23
+ context "(derived model instance)" do
24
+ before { prepare_owners_table }
25
+ let(:model) { subject.send(:include, described_class) }
26
+
27
+ it "belongs to an owner" do
28
+ expect(model.new).to respond_to(:owner)
29
+ end
30
+
31
+ describe ".create" do
32
+ it "can create instances" do
33
+ model.create(owner: Stuff.create, score: 1, trigram: "abc", fuzzy_field: :name)
34
+ end
35
+ end
36
+
37
+ describe ".matches_for" do
38
+ before do
39
+ @paris = Stuff.create(name: "Paris")
40
+ %w(**p *pa par ari ris).each do |trigram|
41
+ model.create(owner: @paris, score: 1, fuzzy_field: :name, trigram: trigram)
42
+ end
43
+ end
44
+
45
+ it "finds matches" do
46
+ expect(model.matches_for("Paris").map(&:owner)).to eq [@paris]
47
+ end
48
+
49
+ it "finds close matches" do
50
+ expect(model.matches_for("Piriss").map(&:owner)).to eq [@paris]
51
+ end
52
+
53
+ it "does not confuse fields" do
54
+ expect(model.for_field(:name).matches_for("Paris").map(&:owner)).to eq [@paris]
55
+ expect(model.for_field(:data).matches_for("Paris").map(&:owner)).to be_empty
56
+ end
57
+
58
+ it "does not confuse owner types" do
59
+ expect(model.for_model(Stuff).matches_for("Paris").map(&:owner)).to eq [@paris]
60
+ expect(model.for_model(Object).matches_for("Paris").map(&:owner)).to be_empty
61
+ end
62
+
63
+ context "(with more than one entry)" do
64
+ before do
65
+ @palma = Stuff.create(name: "Palma")
66
+ %w(**p *pa pal alm lma).each do |trigram|
67
+ model.create(owner: @palma, score: 1, fuzzy_field: :name, trigram: trigram)
68
+ end
69
+ end
70
+
71
+ it "returns ordered results" do
72
+ expect(model.matches_for("Palmyre").map(&:owner)).to eq [@palma, @paris]
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,201 @@
1
+ require "spec_helper"
2
+
3
+ describe Fuzzily::Searchable do
4
+ # Prepare ourselves a Trigram repository
5
+ before do
6
+ unless defined?(Trigram)
7
+ Trigram = Class.new(ActiveRecord::Base)
8
+ Trigram.class_eval { include Fuzzily::Model }
9
+ end
10
+ end
11
+
12
+ before(:each) { prepare_trigrams_table }
13
+ before(:each) { prepare_owners_table }
14
+
15
+ subject do
16
+ Stuff ||= Class.new(ActiveRecord::Base)
17
+ def Stuff.name ; "Stuff" ; end
18
+ Stuff
19
+ end
20
+
21
+ describe ".fuzzily_searchable" do
22
+ it "is available to all of ActiveRecord" do
23
+ expect(subject).to respond_to(:fuzzily_searchable)
24
+ end
25
+
26
+ it "adds a find_by_fuzzy_<field> method" do
27
+ subject.fuzzily_searchable :name
28
+ expect(subject).to respond_to(:find_by_fuzzy_name)
29
+ end
30
+
31
+ it "is idempotent" do
32
+ subject.fuzzily_searchable :name
33
+ subject.fuzzily_searchable :name
34
+ expect(subject).to respond_to(:find_by_fuzzy_name)
35
+ end
36
+
37
+ it "creates the trigrams_for_<field> association" do
38
+ subject.fuzzily_searchable :name
39
+ expect(subject.new).to respond_to(:trigrams_for_name)
40
+ end
41
+ end
42
+
43
+ describe "(callbacks)" do
44
+ before { subject.fuzzily_searchable :name }
45
+
46
+ it "generates trigram records on creation" do
47
+ subject.create!(name: "Paris")
48
+ expect(subject.last.trigrams_for_name).to_not be_empty
49
+ end
50
+
51
+ it "generates the correct trigrams" do
52
+ record = subject.create!(name: "FOO")
53
+ expect(Trigram.first.trigram).to eq "**f"
54
+ expect(Trigram.first.owner_id).to eq record.id
55
+ expect(Trigram.first.owner_type).to eq "Stuff"
56
+ end
57
+
58
+ it "updates all trigram records on save" do
59
+ subject.create!(name: "Paris")
60
+ subject.first.update_attribute :name, "Rome"
61
+ expect(Trigram.all.order(:id).map(&:trigram)).to match %w(**r *ro rom ome me*)
62
+ end
63
+
64
+ it "deletes all trigrams on destroy" do
65
+ subject.create!(name: "Paris").destroy
66
+ expect(Trigram.all).to be_empty
67
+ end
68
+ end
69
+
70
+ describe "#update_fuzzy_<field>!" do
71
+ before do
72
+ subject.fuzzily_searchable :name
73
+ end
74
+
75
+ it "re-creates trigrams" do
76
+ subject.create!(name: "Paris")
77
+ old_ids = Trigram.all.map(&:id)
78
+ subject.last.update_fuzzy_name!
79
+ expect(old_ids & Trigram.all.map(&:id)).to be_empty
80
+ end
81
+
82
+ it "ignores nil values" do
83
+ subject.create!(name: nil)
84
+ subject.last.update_fuzzy_name!
85
+ expect(Trigram.all).to be_empty
86
+ end
87
+
88
+ it "ignores empty string values" do
89
+ subject.create!(name: "")
90
+ subject.last.update_fuzzy_name!
91
+ expect(Trigram.all).to be_empty
92
+ end
93
+
94
+ if ActiveRecord::VERSION::MAJOR <= 3
95
+ let(:fields) {[ :score, :fuzzy_field, :trigram ]}
96
+ before { Trigram.attr_protected fields }
97
+
98
+ it "tolerates mass assignment security" do
99
+ subject.create!(name: "Paris")
100
+ subject.last.update_fuzzy_name!
101
+ end
102
+ end
103
+ end
104
+
105
+ describe ".bulk_update_fuzzy_<field>" do
106
+ before { subject.fuzzily_searchable :name }
107
+
108
+ it "creates all trigrams" do
109
+ subject.create!(name: "Paris")
110
+ Trigram.delete_all
111
+ subject.bulk_update_fuzzy_name
112
+ expect(Trigram.all).to_not be_empty
113
+ end
114
+
115
+ it "ignores nil values" do
116
+ subject.create!(name: nil)
117
+ Trigram.delete_all
118
+ subject.bulk_update_fuzzy_name
119
+ expect(Trigram.all).to be_empty
120
+ end
121
+
122
+ it "ignores empty string values" do
123
+ subject.create!(name: "")
124
+ Trigram.delete_all
125
+ subject.bulk_update_fuzzy_name
126
+ expect(Trigram.all).to be_empty
127
+ end
128
+ end
129
+
130
+ context "(integrationg test)" do
131
+ describe "#find_by_fuzzy_<field>" do
132
+ it "returns records" do
133
+ subject.fuzzily_searchable :name
134
+ @paris = subject.create!(name: "Paris")
135
+ @palma = subject.create!(name: "Palma de Majorca")
136
+ @palmyre = subject.create!(name: "La Palmyre")
137
+
138
+ expect(subject.find_by_fuzzy_name("Piris")).to_not be_empty
139
+ expect(subject.find_by_fuzzy_name("Piris")).to match [@paris, @palma]
140
+ expect(subject.find_by_fuzzy_name("Paradise")).to match [@paris, @palma, @palmyre]
141
+ end
142
+
143
+ it "favours exact matches" do
144
+ subject.fuzzily_searchable :name
145
+ @new_york = subject.create!(name: "New York")
146
+ @yorkshire = subject.create!(name: "Yorkshire")
147
+ @york = subject.create!(name: "York")
148
+ @yorkisthan = subject.create!(name: "Yorkisthan")
149
+
150
+ expect(subject.find_by_fuzzy_name("York")).to match [@york, @new_york, @yorkshire, @yorkisthan]
151
+ expect(subject.find_by_fuzzy_name("Yorkshire")).to match [@yorkshire, @york, @yorkisthan, @new_york]
152
+ end
153
+
154
+ it "does not favour short words" do
155
+ subject.fuzzily_searchable :name
156
+ @lo = subject.create!(name: "Lo") # **l *lo lo*
157
+ @london = subject.create!(name: "London") # **l *lo lon ond ndo don on*
158
+ # **l *lo lon
159
+ expect(subject.find_by_fuzzy_name("Lon")).to eq [@london, @lo]
160
+ end
161
+
162
+ it "honours limit option" do
163
+ subject.fuzzily_searchable :name
164
+ 3.times { subject.create!(name: "Paris") }
165
+ expect(subject.find_by_fuzzy_name("Paris", limit: 2).length).to eq 2
166
+ end
167
+
168
+ it "limits results to 10 if limit option is not given" do
169
+ subject.fuzzily_searchable :name
170
+ 30.times { subject.create!(name: "Paris") }
171
+ expect(subject.find_by_fuzzy_name("Paris").length).to eq 10
172
+ end
173
+
174
+ it "does not limit results it limit option is present and is nil" do
175
+ subject.fuzzily_searchable :name
176
+ 30.times { subject.create!(name: "Paris") }
177
+ expect(subject.find_by_fuzzy_name("Paris", limit: nil).length).to eq 30
178
+ end
179
+
180
+ it "honours offset option" do
181
+ subject.fuzzily_searchable :name
182
+ 3.times { subject.create!(name: "Paris") }
183
+ expect(subject.find_by_fuzzy_name("Paris", offset: 2).length).to eq 1
184
+ end
185
+
186
+ it "does not raise on missing objects" do
187
+ subject.fuzzily_searchable :name
188
+ belgium = subject.create(name: "Belgium")
189
+ belgium.delete
190
+ subject.find_by_fuzzy_name("Belgium")
191
+ end
192
+
193
+ it "finds others alongside missing" do
194
+ subject.fuzzily_searchable :name
195
+ belgium1, belgium2 = 2.times.map { subject.create(name: "Belgium") }
196
+ belgium1.delete
197
+ expect(subject.find_by_fuzzy_name("Belgium")).to eq [belgium2]
198
+ end
199
+ end
200
+ end
201
+ end