fuzzily 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/README.md +16 -3
- data/lib/fuzzily/migration.rb +1 -1
- data/lib/fuzzily/searchable.rb +4 -4
- data/lib/fuzzily/trigram.rb +8 -1
- data/lib/fuzzily/version.rb +1 -1
- data/spec/fuzzily/searchable_spec.rb +10 -0
- metadata +120 -119
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -48,9 +48,7 @@ Instrument your model (your searchable fields do not have to be stored, they can
|
|
48
48
|
|
49
49
|
Index your model (will happen automatically for new/updated records):
|
50
50
|
|
51
|
-
MyStuff.
|
52
|
-
record.update_fuzzy_name!
|
53
|
-
end
|
51
|
+
MyStuff.bulk_update_fuzzy_name
|
54
52
|
|
55
53
|
Search!
|
56
54
|
|
@@ -87,6 +85,21 @@ If you want or need to name your index model differently (e.g. because you alrea
|
|
87
85
|
end
|
88
86
|
|
89
87
|
|
88
|
+
## Speeding things up
|
89
|
+
|
90
|
+
For large data sets (millions of rows to index), the "compatible" storage
|
91
|
+
used by default will typically no longer be enough to keep the index small
|
92
|
+
enough.
|
93
|
+
|
94
|
+
Users have reported **major improvements** (2 order of magniture) when turning
|
95
|
+
the `owner_type` and `fuzzy_field` columns of the `trigrams` table from
|
96
|
+
`VARCHAR` (the default) into `ENUM`. This is particularly efficient with
|
97
|
+
MySQL and pgSQL.
|
98
|
+
|
99
|
+
This is not the default in the gem as ActiveRecord does not suport `ENUM`
|
100
|
+
columns in any version
|
101
|
+
|
102
|
+
|
90
103
|
## License
|
91
104
|
|
92
105
|
MIT licence. Quite permissive if you ask me.
|
data/lib/fuzzily/migration.rb
CHANGED
data/lib/fuzzily/searchable.rb
CHANGED
@@ -47,8 +47,8 @@ module Fuzzily
|
|
47
47
|
self.scoped(:include => trigram_association).find_in_batches(:batch_size => 100) do |batch|
|
48
48
|
inserts = []
|
49
49
|
batch.each do |record|
|
50
|
-
record.send(field).extend(String).
|
51
|
-
inserts << sanitize_sql_array(['(?,?,?,?,?)', self.name, record.id, field.to_s,
|
50
|
+
record.send(field).extend(String).scored_trigrams.each do |trigram, score|
|
51
|
+
inserts << sanitize_sql_array(['(?,?,?,?,?)', self.name, record.id, field.to_s, score, trigram])
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
@@ -66,8 +66,8 @@ module Fuzzily
|
|
66
66
|
|
67
67
|
define_method update_trigrams_method do
|
68
68
|
self.send(trigram_association).delete_all
|
69
|
-
self.send(field).extend(String).
|
70
|
-
self.send(trigram_association).create!(:score =>
|
69
|
+
self.send(field).extend(String).scored_trigrams.each do |trigram, score|
|
70
|
+
self.send(trigram_association).create!(:score => score, :trigram => trigram, :owner_type => self.class.name)
|
71
71
|
end
|
72
72
|
end
|
73
73
|
|
data/lib/fuzzily/trigram.rb
CHANGED
@@ -4,7 +4,14 @@ module Fuzzily
|
|
4
4
|
module String
|
5
5
|
def trigrams
|
6
6
|
normalized = self.normalize
|
7
|
-
|
7
|
+
number_of_trigrams = normalized.length - 3
|
8
|
+
trigrams = (0..number_of_trigrams).map { |index| normalized[index,3] }.uniq
|
9
|
+
end
|
10
|
+
|
11
|
+
def scored_trigrams
|
12
|
+
trigrams_ = self.trigrams
|
13
|
+
score = 32_768 / trigrams_.length
|
14
|
+
trigrams_.map { |t| [t, score] }
|
8
15
|
end
|
9
16
|
|
10
17
|
protected
|
data/lib/fuzzily/version.rb
CHANGED
@@ -83,6 +83,16 @@ describe Fuzzily::Searchable do
|
|
83
83
|
subject.find_by_fuzzy_name('Piris').should =~ [@paris, @palma]
|
84
84
|
subject.find_by_fuzzy_name('Paradise').should =~ [@paris, @palma, @palmyre]
|
85
85
|
end
|
86
|
+
|
87
|
+
it 'favours exact matches' do
|
88
|
+
subject.fuzzily_searchable :name
|
89
|
+
@new_york = subject.create(:name => 'New York')
|
90
|
+
@yorkshire = subject.create(:name => 'Yorkshire')
|
91
|
+
@york = subject.create(:name => 'York')
|
92
|
+
@yorkisthan = subject.create(:name => 'Yorkisthan')
|
93
|
+
|
94
|
+
subject.find_by_fuzzy_name('York').should == [@york, @yorkshire, @yorkisthan, @new_york]
|
95
|
+
end
|
86
96
|
end
|
87
97
|
end
|
88
98
|
|
metadata
CHANGED
@@ -1,135 +1,133 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: fuzzily
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Julien Letessier
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
+
|
18
|
+
date: 2013-03-06 00:00:00 +00:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
17
23
|
none: false
|
18
|
-
requirements:
|
19
|
-
- -
|
20
|
-
- !ruby/object:Gem::Version
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 33
|
28
|
+
segments:
|
29
|
+
- 2
|
30
|
+
- 3
|
31
|
+
- 17
|
21
32
|
version: 2.3.17
|
22
|
-
type: :runtime
|
23
33
|
prerelease: false
|
24
|
-
|
34
|
+
name: activerecord
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
25
39
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
|
30
|
-
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
prerelease: false
|
31
48
|
name: rspec
|
32
|
-
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
|
-
requirements:
|
35
|
-
- - ! '>='
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: '0'
|
38
49
|
type: :development
|
39
|
-
|
40
|
-
|
50
|
+
version_requirements: *id002
|
51
|
+
- !ruby/object:Gem::Dependency
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
53
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
|
46
|
-
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
prerelease: false
|
47
62
|
name: rake
|
48
|
-
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
|
-
requirements:
|
51
|
-
- - ! '>='
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: '0'
|
54
63
|
type: :development
|
55
|
-
|
56
|
-
|
64
|
+
version_requirements: *id003
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
57
67
|
none: false
|
58
|
-
requirements:
|
59
|
-
- -
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
|
62
|
-
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
prerelease: false
|
63
76
|
name: appraisal
|
64
|
-
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
|
-
requirements:
|
67
|
-
- - ! '>='
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '0'
|
70
77
|
type: :development
|
71
|
-
|
72
|
-
|
78
|
+
version_requirements: *id004
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
73
81
|
none: false
|
74
|
-
requirements:
|
75
|
-
- -
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
|
78
|
-
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
hash: 3
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
version: "0"
|
89
|
+
prerelease: false
|
79
90
|
name: pry
|
80
|
-
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
|
-
requirements:
|
83
|
-
- - ! '>='
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
version: '0'
|
86
91
|
type: :development
|
87
|
-
|
88
|
-
|
92
|
+
version_requirements: *id005
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
89
95
|
none: false
|
90
|
-
requirements:
|
91
|
-
- -
|
92
|
-
- !ruby/object:Gem::Version
|
93
|
-
|
94
|
-
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
hash: 3
|
100
|
+
segments:
|
101
|
+
- 0
|
102
|
+
version: "0"
|
103
|
+
prerelease: false
|
95
104
|
name: pry-nav
|
96
|
-
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
|
-
requirements:
|
99
|
-
- - ! '>='
|
100
|
-
- !ruby/object:Gem::Version
|
101
|
-
version: '0'
|
102
105
|
type: :development
|
103
|
-
|
104
|
-
|
106
|
+
version_requirements: *id006
|
107
|
+
- !ruby/object:Gem::Dependency
|
108
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
105
109
|
none: false
|
106
|
-
requirements:
|
107
|
-
- -
|
108
|
-
- !ruby/object:Gem::Version
|
109
|
-
|
110
|
-
|
110
|
+
requirements:
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
hash: 3
|
114
|
+
segments:
|
115
|
+
- 0
|
116
|
+
version: "0"
|
117
|
+
prerelease: false
|
111
118
|
name: sqlite3
|
112
|
-
requirement: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
|
-
requirements:
|
115
|
-
- - ! '>='
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '0'
|
118
119
|
type: :development
|
119
|
-
|
120
|
-
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
|
-
requirements:
|
123
|
-
- - ! '>='
|
124
|
-
- !ruby/object:Gem::Version
|
125
|
-
version: '0'
|
120
|
+
version_requirements: *id007
|
126
121
|
description: Fast fuzzy string matching for rails
|
127
|
-
email:
|
122
|
+
email:
|
128
123
|
- julien.letessier@gmail.com
|
129
124
|
executables: []
|
125
|
+
|
130
126
|
extensions: []
|
127
|
+
|
131
128
|
extra_rdoc_files: []
|
132
|
-
|
129
|
+
|
130
|
+
files:
|
133
131
|
- .gitignore
|
134
132
|
- .rspec
|
135
133
|
- .travis.yml
|
@@ -160,38 +158,41 @@ files:
|
|
160
158
|
- spec/fuzzily/trigram_spec.rb
|
161
159
|
- spec/meta_spec.rb
|
162
160
|
- spec/spec_helper.rb
|
163
|
-
|
161
|
+
has_rdoc: true
|
162
|
+
homepage: ""
|
164
163
|
licenses: []
|
164
|
+
|
165
165
|
post_install_message:
|
166
166
|
rdoc_options: []
|
167
|
-
|
167
|
+
|
168
|
+
require_paths:
|
168
169
|
- lib
|
169
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
170
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
170
171
|
none: false
|
171
|
-
requirements:
|
172
|
-
- -
|
173
|
-
- !ruby/object:Gem::Version
|
174
|
-
|
175
|
-
segments:
|
172
|
+
requirements:
|
173
|
+
- - ">="
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
hash: 3
|
176
|
+
segments:
|
176
177
|
- 0
|
177
|
-
|
178
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
178
|
+
version: "0"
|
179
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
179
180
|
none: false
|
180
|
-
requirements:
|
181
|
-
- -
|
182
|
-
- !ruby/object:Gem::Version
|
183
|
-
|
184
|
-
segments:
|
181
|
+
requirements:
|
182
|
+
- - ">="
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
hash: 3
|
185
|
+
segments:
|
185
186
|
- 0
|
186
|
-
|
187
|
+
version: "0"
|
187
188
|
requirements: []
|
189
|
+
|
188
190
|
rubyforge_project:
|
189
|
-
rubygems_version: 1.
|
191
|
+
rubygems_version: 1.3.9.5
|
190
192
|
signing_key:
|
191
193
|
specification_version: 3
|
192
|
-
summary: A fast, trigram-based, database-backed fuzzy string search/match engine for
|
193
|
-
|
194
|
-
test_files:
|
194
|
+
summary: A fast, trigram-based, database-backed fuzzy string search/match engine for Rails.
|
195
|
+
test_files:
|
195
196
|
- spec/fuzzily/migration_spec.rb
|
196
197
|
- spec/fuzzily/model_spec.rb
|
197
198
|
- spec/fuzzily/searchable_spec.rb
|