fuzzily_reloaded 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/fuzzily.rb ADDED
@@ -0,0 +1,7 @@
1
+ require "fuzzily/version"
2
+ require "fuzzily/searchable"
3
+ require "fuzzily/migration"
4
+ require "fuzzily/model"
5
+ require "active_record"
6
+
7
+ ActiveRecord::Base.send :include, Fuzzily::Searchable
@@ -0,0 +1,43 @@
1
+ require "active_record"
2
+
3
+ module Fuzzily
4
+ module Migration
5
+ def self.extended(by)
6
+ by.singleton_class.class_eval do
7
+ def trigrams_table_name=(custom_name)
8
+ @trigrams_table_name = custom_name
9
+ end
10
+
11
+ def trigrams_table_name
12
+ @trigrams_table_name ||= :trigrams
13
+ end
14
+
15
+ def trigrams_owner_id_column_type=(custom_type)
16
+ @trigrams_owner_id_column_type = custom_type
17
+ end
18
+
19
+ def trigrams_owner_id_column_type
20
+ @trigrams_owner_id_column_type ||= :integer
21
+ end
22
+
23
+ def up
24
+ create_table trigrams_table_name do |t|
25
+ t.string :trigram, limit: 3
26
+ t.integer :score, limit: 2
27
+ t.send trigrams_owner_id_column_type, :owner_id
28
+ t.string :owner_type
29
+ t.string :fuzzy_field
30
+ end
31
+
32
+ # owner_id goes first as we'll GROUP BY that
33
+ add_index trigrams_table_name, [:owner_id, :owner_type, :fuzzy_field, :trigram, :score], name: :index_for_match
34
+ add_index trigrams_table_name, [:owner_id, :owner_type], name: :index_by_owner
35
+ end
36
+
37
+ def down
38
+ drop_table trigrams_table_name
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,49 @@
1
+ module Fuzzily
2
+ module Model
3
+ # Needs fields: trigram, owner_type, owner_id, score
4
+ # Needs index on [owner_type, trigram] and [owner_type, owner_id]
5
+
6
+ def self.included(by)
7
+ by.ancestors.include?(ActiveRecord::Base) or raise "Not included in an ActiveRecord subclass"
8
+ by.extend(ClassMethods)
9
+
10
+ by.class_eval do
11
+ return if class_variable_defined?(:@@fuzzily_trigram_model)
12
+
13
+ belongs_to :owner, polymorphic: true
14
+ validates :owner, :score, :fuzzy_field, presence: true
15
+ validates :trigram, length: { is: 3 }, uniqueness: {
16
+ case_sensitive: true,
17
+ scope: [:owner_type, :owner_id, :fuzzy_field],
18
+ }
19
+
20
+ _add_fuzzy_scopes
21
+ class_variable_set(:@@fuzzily_trigram_model, true)
22
+ end
23
+ end
24
+
25
+ module ClassMethods
26
+ def matches_for(text)
27
+ _matches_for_trigrams Fuzzily::String.new(text).trigrams
28
+ end
29
+
30
+ private
31
+
32
+ def _matches_for_trigrams(trigrams)
33
+ self.
34
+ select("owner_id, owner_type, count(*) AS matches, MAX(score) AS score").
35
+ group("owner_id, owner_type").
36
+ order("matches DESC, score ASC").
37
+ with_trigram(trigrams)
38
+ end
39
+
40
+ def _add_fuzzy_scopes
41
+ scope :for_model, lambda { |model|
42
+ where(owner_type: model.kind_of?(Class) ? model.name : model)
43
+ }
44
+ scope :for_field, lambda { |field_name| where(fuzzy_field: field_name) }
45
+ scope :with_trigram, lambda { |trigrams| where(trigram: trigrams) }
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,177 @@
1
+ require "fuzzily/trigram"
2
+ require "ostruct"
3
+
4
+ module Fuzzily
5
+ module Searchable
6
+
7
+ def self.included(by)
8
+ case ActiveRecord::VERSION::MAJOR
9
+ when 5 then by.extend Rails5ClassMethods
10
+ when 6 then by.extend Rails6ClassMethods
11
+ end
12
+ end
13
+
14
+ private
15
+
16
+ def _update_fuzzy!(_o)
17
+ self.send(_o.trigram_association).delete_all
18
+ String.new(self.send(_o.field)).scored_trigrams.each do |trigram, score|
19
+ self.send(_o.trigram_association).build.tap do |record|
20
+ record.score = score
21
+ record.trigram = trigram
22
+ record.fuzzy_field = _o.field.to_s
23
+ record.save!
24
+ end
25
+ end
26
+ end
27
+
28
+
29
+ module ClassMethods
30
+ # fuzzily_searchable <field> [, <field>...] [, <options>]
31
+ def fuzzily_searchable(*fields)
32
+ options = fields.last.kind_of?(Hash) ? fields.pop : {}
33
+
34
+ fields.each do |field|
35
+ make_field_fuzzily_searchable(field, options)
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def _find_by_fuzzy(_o, pattern, options={})
42
+ options[:limit] ||= 10 unless options.has_key? :limit
43
+ options[:offset] ||= 0
44
+
45
+ trigrams = _o.trigram_class_name.constantize.
46
+ limit(options[:limit]).
47
+ offset(options[:offset]).
48
+ for_model(self.name).
49
+ for_field(_o.field.to_s).
50
+ matches_for(pattern)
51
+ records = _load_for_ids(trigrams.map(&:owner_id))
52
+ # order records as per trigram query (no portable way to do this in SQL)
53
+ trigrams.map { |t| records[t.owner_id] }.compact
54
+ end
55
+
56
+ def _load_for_ids(ids)
57
+ {}.tap do |result|
58
+ results = if respond_to? :where
59
+ where(id: ids)
60
+ else
61
+ find(:all, ids)
62
+ end
63
+ results.each { |_r| result[_r.id] = _r }
64
+ end
65
+ end
66
+
67
+ def _bulk_update_fuzzy(_o)
68
+ trigram_class = _o.trigram_class_name.constantize
69
+ supports_bulk_inserts =
70
+ connection.class.name !~ /sqlite/i ||
71
+ (
72
+ defined?(SQLite3::SQLITE_VERSION) &&
73
+ Gem::Version.new(SQLite3::SQLITE_VERSION) >= Gem::Version.new("3.7.11")
74
+ )
75
+
76
+ _with_included_trigrams(_o).find_in_batches(batch_size: 100) do |batch|
77
+ inserts = []
78
+ batch.each do |record|
79
+ data = Fuzzily::String.new(record.send(_o.field))
80
+ data.scored_trigrams.each do |trigram, score|
81
+ inserts << sanitize_sql_array(["(?,?,?,?,?)", self.name, record.id, _o.field.to_s, score, trigram])
82
+ end
83
+ end
84
+
85
+ # take care of quoting
86
+ c = trigram_class.connection
87
+ insert_sql = %Q{
88
+ INSERT INTO %s (%s, %s, %s, %s, %s)
89
+ VALUES
90
+ } % [
91
+ c.quote_table_name(trigram_class.table_name),
92
+ c.quote_column_name("owner_type"),
93
+ c.quote_column_name("owner_id"),
94
+ c.quote_column_name("fuzzy_field"),
95
+ c.quote_column_name("score"),
96
+ c.quote_column_name("trigram")
97
+ ]
98
+
99
+ trigram_class.transaction do
100
+ batch.each { |record| record.send(_o.trigram_association).delete_all }
101
+ break if inserts.empty?
102
+
103
+ if supports_bulk_inserts
104
+ trigram_class.connection.insert(insert_sql + inserts.join(", "))
105
+ else
106
+ inserts.each do |insert|
107
+ trigram_class.connection.insert(insert_sql + insert)
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ def make_field_fuzzily_searchable(field, options={})
115
+ class_variable_defined?(:"@@fuzzily_searchable_#{field}") and return
116
+
117
+ _o = OpenStruct.new(
118
+ field: field,
119
+ trigram_class_name: options.fetch(:class_name, "Trigram"),
120
+ trigram_association: "trigrams_for_#{field}".to_sym,
121
+ update_trigrams_method: "update_fuzzy_#{field}!".to_sym,
122
+ async: options.fetch(:async, false)
123
+ )
124
+
125
+ _add_trigram_association(_o)
126
+
127
+ singleton_class.send(:define_method, "find_by_fuzzy_#{field}".to_sym) do |*args|
128
+ _find_by_fuzzy(_o, *args)
129
+ end
130
+
131
+ singleton_class.send(:define_method, "bulk_update_fuzzy_#{field}".to_sym) do
132
+ _bulk_update_fuzzy(_o)
133
+ end
134
+
135
+ define_method _o.update_trigrams_method do
136
+ if _o.async && self.respond_to?(:delay)
137
+ self.delay._update_fuzzy!(_o)
138
+ else
139
+ _update_fuzzy!(_o)
140
+ end
141
+ end
142
+
143
+ after_save do |record|
144
+ next unless record.send("saved_change_to_#{field}?".to_sym)
145
+
146
+ record.send(_o.update_trigrams_method)
147
+ end
148
+
149
+ class_variable_set(:"@@fuzzily_searchable_#{field}", true)
150
+ self
151
+ end
152
+ end
153
+
154
+ module Rails5ClassMethods
155
+ include ClassMethods
156
+
157
+ private
158
+
159
+ def _add_trigram_association(_o)
160
+ has_many _o.trigram_association,
161
+ lambda { where(fuzzy_field: _o.field.to_s) },
162
+ class_name: _o.trigram_class_name,
163
+ as: :owner,
164
+ dependent: :delete_all,
165
+ autosave: true
166
+ end
167
+
168
+ def _with_included_trigrams(_o)
169
+ self.includes(_o.trigram_association)
170
+ end
171
+ end
172
+
173
+ module Rails6ClassMethods
174
+ include Rails5ClassMethods
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,31 @@
1
+ require "active_support/core_ext/string/multibyte"
2
+ require "delegate"
3
+ module Fuzzily
4
+ class String < SimpleDelegator
5
+
6
+ def trigrams
7
+ return [] if __getobj__.blank?
8
+ normalized = self.normalize
9
+ number_of_trigrams = normalized.length - 3
10
+ trigrams = (0..number_of_trigrams).map { |index| normalized[index, 3] }.uniq
11
+ end
12
+
13
+ def scored_trigrams
14
+ trigrams.map { |t| [t, self.length] }
15
+ end
16
+
17
+ protected
18
+
19
+ # Remove accents, downcase, replace spaces and word start with "*",
20
+ # return list of normalized words
21
+ def normalize
22
+ ActiveSupport::Multibyte::Chars.new(self.to_s)
23
+ .mb_chars.unicode_normalize(:nfkd).to_s.downcase
24
+ .gsub(/[^\x00-\x7F]/, "")
25
+ .gsub(/[^a-z\d]/, " ")
26
+ .gsub(/\s+/, "*")
27
+ .gsub(/^/, "**")
28
+ .gsub(/$/, "*")
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,3 @@
1
+ module Fuzzily
2
+ VERSION = "1.0.0"
3
+ end
@@ -0,0 +1 @@
1
+ require "fuzzily"
@@ -0,0 +1,41 @@
1
+ require "spec_helper"
2
+
3
+ describe Fuzzily::Migration do
4
+ subject { Class.new(ActiveRecord::Migration[5.1]).extend(described_class) }
5
+
6
+ it "is a proper migration" do
7
+ expect(subject.ancestors).to include(ActiveRecord::Migration[5.1])
8
+ end
9
+
10
+ it "applies cleanly" do
11
+ ActiveRecord::Migration.suppress_messages do
12
+ subject.up
13
+ end
14
+ end
15
+
16
+ it "rolls back cleanly" do
17
+ ActiveRecord::Migration.suppress_messages do
18
+ subject.up ; subject.down
19
+ end
20
+ end
21
+
22
+ it "has a customizable table name" do
23
+ ActiveRecord::Migration.suppress_messages do
24
+ subject.trigrams_table_name = :foobars
25
+ subject.up
26
+ expect {
27
+ ActiveRecord::Base.connection.execute("INSERT INTO foobars (score) VALUES (1)")
28
+ }.to_not raise_error
29
+ end
30
+ end
31
+
32
+ it "results in a functional model" do
33
+ ActiveRecord::Migration.suppress_messages do
34
+ subject.up
35
+ model_class = Class.new(ActiveRecord::Base)
36
+ model_class.table_name = "trigrams"
37
+ model_class.create(trigram: "abc")
38
+ expect(model_class.count).to eq 1
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,77 @@
1
+ require "spec_helper"
2
+
3
+ describe Fuzzily::Model do
4
+ subject do
5
+ Class.new(ActiveRecord::Base).tap do |model|
6
+ model.table_name = :trigrams
7
+
8
+ def model.name ; "MyModel" ; end
9
+ end
10
+ end
11
+
12
+ before(:each) { prepare_trigrams_table }
13
+
14
+ it "can be included into an ActiveRecord model" do
15
+ subject.send(:include, described_class)
16
+ end
17
+
18
+ it "can be included twice" do
19
+ subject.send(:include, described_class)
20
+ subject.send(:include, described_class)
21
+ end
22
+
23
+ context "(derived model instance)" do
24
+ before { prepare_owners_table }
25
+ let(:model) { subject.send(:include, described_class) }
26
+
27
+ it "belongs to an owner" do
28
+ expect(model.new).to respond_to(:owner)
29
+ end
30
+
31
+ describe ".create" do
32
+ it "can create instances" do
33
+ model.create(owner: Stuff.create, score: 1, trigram: "abc", fuzzy_field: :name)
34
+ end
35
+ end
36
+
37
+ describe ".matches_for" do
38
+ before do
39
+ @paris = Stuff.create(name: "Paris")
40
+ %w(**p *pa par ari ris).each do |trigram|
41
+ model.create(owner: @paris, score: 1, fuzzy_field: :name, trigram: trigram)
42
+ end
43
+ end
44
+
45
+ it "finds matches" do
46
+ expect(model.matches_for("Paris").map(&:owner)).to eq [@paris]
47
+ end
48
+
49
+ it "finds close matches" do
50
+ expect(model.matches_for("Piriss").map(&:owner)).to eq [@paris]
51
+ end
52
+
53
+ it "does not confuse fields" do
54
+ expect(model.for_field(:name).matches_for("Paris").map(&:owner)).to eq [@paris]
55
+ expect(model.for_field(:data).matches_for("Paris").map(&:owner)).to be_empty
56
+ end
57
+
58
+ it "does not confuse owner types" do
59
+ expect(model.for_model(Stuff).matches_for("Paris").map(&:owner)).to eq [@paris]
60
+ expect(model.for_model(Object).matches_for("Paris").map(&:owner)).to be_empty
61
+ end
62
+
63
+ context "(with more than one entry)" do
64
+ before do
65
+ @palma = Stuff.create(name: "Palma")
66
+ %w(**p *pa pal alm lma).each do |trigram|
67
+ model.create(owner: @palma, score: 1, fuzzy_field: :name, trigram: trigram)
68
+ end
69
+ end
70
+
71
+ it "returns ordered results" do
72
+ expect(model.matches_for("Palmyre").map(&:owner)).to eq [@palma, @paris]
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,201 @@
1
+ require "spec_helper"
2
+
3
+ describe Fuzzily::Searchable do
4
+ # Prepare ourselves a Trigram repository
5
+ before do
6
+ unless defined?(Trigram)
7
+ Trigram = Class.new(ActiveRecord::Base)
8
+ Trigram.class_eval { include Fuzzily::Model }
9
+ end
10
+ end
11
+
12
+ before(:each) { prepare_trigrams_table }
13
+ before(:each) { prepare_owners_table }
14
+
15
+ subject do
16
+ Stuff ||= Class.new(ActiveRecord::Base)
17
+ def Stuff.name ; "Stuff" ; end
18
+ Stuff
19
+ end
20
+
21
+ describe ".fuzzily_searchable" do
22
+ it "is available to all of ActiveRecord" do
23
+ expect(subject).to respond_to(:fuzzily_searchable)
24
+ end
25
+
26
+ it "adds a find_by_fuzzy_<field> method" do
27
+ subject.fuzzily_searchable :name
28
+ expect(subject).to respond_to(:find_by_fuzzy_name)
29
+ end
30
+
31
+ it "is idempotent" do
32
+ subject.fuzzily_searchable :name
33
+ subject.fuzzily_searchable :name
34
+ expect(subject).to respond_to(:find_by_fuzzy_name)
35
+ end
36
+
37
+ it "creates the trigrams_for_<field> association" do
38
+ subject.fuzzily_searchable :name
39
+ expect(subject.new).to respond_to(:trigrams_for_name)
40
+ end
41
+ end
42
+
43
+ describe "(callbacks)" do
44
+ before { subject.fuzzily_searchable :name }
45
+
46
+ it "generates trigram records on creation" do
47
+ subject.create!(name: "Paris")
48
+ expect(subject.last.trigrams_for_name).to_not be_empty
49
+ end
50
+
51
+ it "generates the correct trigrams" do
52
+ record = subject.create!(name: "FOO")
53
+ expect(Trigram.first.trigram).to eq "**f"
54
+ expect(Trigram.first.owner_id).to eq record.id
55
+ expect(Trigram.first.owner_type).to eq "Stuff"
56
+ end
57
+
58
+ it "updates all trigram records on save" do
59
+ subject.create!(name: "Paris")
60
+ subject.first.update_attribute :name, "Rome"
61
+ expect(Trigram.all.order(:id).map(&:trigram)).to match %w(**r *ro rom ome me*)
62
+ end
63
+
64
+ it "deletes all trigrams on destroy" do
65
+ subject.create!(name: "Paris").destroy
66
+ expect(Trigram.all).to be_empty
67
+ end
68
+ end
69
+
70
+ describe "#update_fuzzy_<field>!" do
71
+ before do
72
+ subject.fuzzily_searchable :name
73
+ end
74
+
75
+ it "re-creates trigrams" do
76
+ subject.create!(name: "Paris")
77
+ old_ids = Trigram.all.map(&:id)
78
+ subject.last.update_fuzzy_name!
79
+ expect(old_ids & Trigram.all.map(&:id)).to be_empty
80
+ end
81
+
82
+ it "ignores nil values" do
83
+ subject.create!(name: nil)
84
+ subject.last.update_fuzzy_name!
85
+ expect(Trigram.all).to be_empty
86
+ end
87
+
88
+ it "ignores empty string values" do
89
+ subject.create!(name: "")
90
+ subject.last.update_fuzzy_name!
91
+ expect(Trigram.all).to be_empty
92
+ end
93
+
94
+ if ActiveRecord::VERSION::MAJOR <= 3
95
+ let(:fields) {[ :score, :fuzzy_field, :trigram ]}
96
+ before { Trigram.attr_protected fields }
97
+
98
+ it "tolerates mass assignment security" do
99
+ subject.create!(name: "Paris")
100
+ subject.last.update_fuzzy_name!
101
+ end
102
+ end
103
+ end
104
+
105
+ describe ".bulk_update_fuzzy_<field>" do
106
+ before { subject.fuzzily_searchable :name }
107
+
108
+ it "creates all trigrams" do
109
+ subject.create!(name: "Paris")
110
+ Trigram.delete_all
111
+ subject.bulk_update_fuzzy_name
112
+ expect(Trigram.all).to_not be_empty
113
+ end
114
+
115
+ it "ignores nil values" do
116
+ subject.create!(name: nil)
117
+ Trigram.delete_all
118
+ subject.bulk_update_fuzzy_name
119
+ expect(Trigram.all).to be_empty
120
+ end
121
+
122
+ it "ignores empty string values" do
123
+ subject.create!(name: "")
124
+ Trigram.delete_all
125
+ subject.bulk_update_fuzzy_name
126
+ expect(Trigram.all).to be_empty
127
+ end
128
+ end
129
+
130
+ context "(integrationg test)" do
131
+ describe "#find_by_fuzzy_<field>" do
132
+ it "returns records" do
133
+ subject.fuzzily_searchable :name
134
+ @paris = subject.create!(name: "Paris")
135
+ @palma = subject.create!(name: "Palma de Majorca")
136
+ @palmyre = subject.create!(name: "La Palmyre")
137
+
138
+ expect(subject.find_by_fuzzy_name("Piris")).to_not be_empty
139
+ expect(subject.find_by_fuzzy_name("Piris")).to match [@paris, @palma]
140
+ expect(subject.find_by_fuzzy_name("Paradise")).to match [@paris, @palma, @palmyre]
141
+ end
142
+
143
+ it "favours exact matches" do
144
+ subject.fuzzily_searchable :name
145
+ @new_york = subject.create!(name: "New York")
146
+ @yorkshire = subject.create!(name: "Yorkshire")
147
+ @york = subject.create!(name: "York")
148
+ @yorkisthan = subject.create!(name: "Yorkisthan")
149
+
150
+ expect(subject.find_by_fuzzy_name("York")).to match [@york, @new_york, @yorkshire, @yorkisthan]
151
+ expect(subject.find_by_fuzzy_name("Yorkshire")).to match [@yorkshire, @york, @yorkisthan, @new_york]
152
+ end
153
+
154
+ it "does not favour short words" do
155
+ subject.fuzzily_searchable :name
156
+ @lo = subject.create!(name: "Lo") # **l *lo lo*
157
+ @london = subject.create!(name: "London") # **l *lo lon ond ndo don on*
158
+ # **l *lo lon
159
+ expect(subject.find_by_fuzzy_name("Lon")).to eq [@london, @lo]
160
+ end
161
+
162
+ it "honours limit option" do
163
+ subject.fuzzily_searchable :name
164
+ 3.times { subject.create!(name: "Paris") }
165
+ expect(subject.find_by_fuzzy_name("Paris", limit: 2).length).to eq 2
166
+ end
167
+
168
+ it "limits results to 10 if limit option is not given" do
169
+ subject.fuzzily_searchable :name
170
+ 30.times { subject.create!(name: "Paris") }
171
+ expect(subject.find_by_fuzzy_name("Paris").length).to eq 10
172
+ end
173
+
174
+ it "does not limit results it limit option is present and is nil" do
175
+ subject.fuzzily_searchable :name
176
+ 30.times { subject.create!(name: "Paris") }
177
+ expect(subject.find_by_fuzzy_name("Paris", limit: nil).length).to eq 30
178
+ end
179
+
180
+ it "honours offset option" do
181
+ subject.fuzzily_searchable :name
182
+ 3.times { subject.create!(name: "Paris") }
183
+ expect(subject.find_by_fuzzy_name("Paris", offset: 2).length).to eq 1
184
+ end
185
+
186
+ it "does not raise on missing objects" do
187
+ subject.fuzzily_searchable :name
188
+ belgium = subject.create(name: "Belgium")
189
+ belgium.delete
190
+ subject.find_by_fuzzy_name("Belgium")
191
+ end
192
+
193
+ it "finds others alongside missing" do
194
+ subject.fuzzily_searchable :name
195
+ belgium1, belgium2 = 2.times.map { subject.create(name: "Belgium") }
196
+ belgium1.delete
197
+ expect(subject.find_by_fuzzy_name("Belgium")).to eq [belgium2]
198
+ end
199
+ end
200
+ end
201
+ end