fuzzily_reloaded 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +26 -0
- data/.rspec +3 -0
- data/.travis.yml +39 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +61 -0
- data/LICENSE.txt +23 -0
- data/README.md +221 -0
- data/Rakefile +6 -0
- data/fuzzily.gemspec +28 -0
- data/gemfiles/rails51.gemfile +5 -0
- data/gemfiles/rails60.gemfile +5 -0
- data/lib/fuzzily.rb +7 -0
- data/lib/fuzzily/migration.rb +43 -0
- data/lib/fuzzily/model.rb +49 -0
- data/lib/fuzzily/searchable.rb +177 -0
- data/lib/fuzzily/trigram.rb +31 -0
- data/lib/fuzzily/version.rb +3 -0
- data/lib/fuzzily_reloaded.rb +1 -0
- data/spec/fuzzily/migration_spec.rb +41 -0
- data/spec/fuzzily/model_spec.rb +77 -0
- data/spec/fuzzily/searchable_spec.rb +201 -0
- data/spec/fuzzily/trigram_spec.rb +28 -0
- data/spec/meta_spec.rb +8 -0
- data/spec/spec_helper.rb +79 -0
- metadata +174 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ccfb4a3ddebd786b36963599dc73afc65feac6968873c5895deccaa3c5dae044
|
4
|
+
data.tar.gz: f11961215c5c6937c006ac69d03f0b69f362eec24c2e5dd571bae2728dc402ea
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 62d203f5c2d84624dbcdb98390eed6b042242b3017e34282289991ec26fd9602626ffc321f41e32260e73b78bdde9dc8bc53b6cbad03c9b6f566ea4fd1e47d51
|
7
|
+
data.tar.gz: a7ea210ef1db362b1d5abfca1191576d493b2f76f528ba3727eef8fd76f5d543939e476d3d470224796de33aa4244153c768ee4fa19fdeb475b484ff240a771f
|
data/.gitignore
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.config
|
4
|
+
.yardoc
|
5
|
+
InstalledFiles
|
6
|
+
_yardoc
|
7
|
+
coverage
|
8
|
+
doc/
|
9
|
+
lib/bundler/man
|
10
|
+
pkg
|
11
|
+
rdoc
|
12
|
+
spec/reports
|
13
|
+
test/tmp
|
14
|
+
test/version_tmp
|
15
|
+
tmp
|
16
|
+
|
17
|
+
# CTags
|
18
|
+
.tags*
|
19
|
+
|
20
|
+
# Data
|
21
|
+
*.gz
|
22
|
+
*.bz2
|
23
|
+
|
24
|
+
# Bundler
|
25
|
+
.bundle
|
26
|
+
vendor/bundle
|
data/.rspec
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
---
|
2
|
+
language: ruby
|
3
|
+
services:
|
4
|
+
- mysql
|
5
|
+
- postgresql
|
6
|
+
before_script:
|
7
|
+
- psql -c 'create database fuzzily_test;' -U postgres
|
8
|
+
- mysql -e 'create database fuzzily_test;'
|
9
|
+
env:
|
10
|
+
- FUZZILY_ADAPTER=sqlite3
|
11
|
+
- FUZZILY_ADAPTER=mysql FUZZILY_DB_USER=travis
|
12
|
+
- FUZZILY_ADAPTER=postgresql FUZZILY_DB_USER=postgres
|
13
|
+
script:
|
14
|
+
- bundle exec rspec
|
15
|
+
rvm:
|
16
|
+
- 2.3.8
|
17
|
+
- 2.4.9
|
18
|
+
- 2.5.7
|
19
|
+
- 2.6.5
|
20
|
+
- 2.7.0
|
21
|
+
- ruby-head
|
22
|
+
- jruby-9.2.6.0
|
23
|
+
- jruby-head
|
24
|
+
gemfile:
|
25
|
+
- gemfiles/rails51.gemfile
|
26
|
+
- gemfiles/rails60.gemfile
|
27
|
+
matrix:
|
28
|
+
allow_failures:
|
29
|
+
- rvm: ruby-head
|
30
|
+
- rvm: jruby-head
|
31
|
+
- rvm: jruby-9.2.6.0
|
32
|
+
exclude:
|
33
|
+
- rvm: 2.3.8
|
34
|
+
gemfile: gemfiles/rails60.gemfile
|
35
|
+
- rvm: 2.4.9
|
36
|
+
gemfile: gemfiles/rails60.gemfile
|
37
|
+
fast_finish: true
|
38
|
+
branches:
|
39
|
+
only: master
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
fuzzily_reloaded (1.0.0)
|
5
|
+
activerecord (>= 5.1)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activemodel (6.0.2.2)
|
11
|
+
activesupport (= 6.0.2.2)
|
12
|
+
activerecord (6.0.2.2)
|
13
|
+
activemodel (= 6.0.2.2)
|
14
|
+
activesupport (= 6.0.2.2)
|
15
|
+
activesupport (6.0.2.2)
|
16
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
17
|
+
i18n (>= 0.7, < 2)
|
18
|
+
minitest (~> 5.1)
|
19
|
+
tzinfo (~> 1.1)
|
20
|
+
zeitwerk (~> 2.2)
|
21
|
+
concurrent-ruby (1.1.6)
|
22
|
+
diff-lcs (1.3)
|
23
|
+
i18n (1.8.2)
|
24
|
+
concurrent-ruby (~> 1.0)
|
25
|
+
minitest (5.14.0)
|
26
|
+
mysql2 (0.5.3)
|
27
|
+
pg (1.2.3)
|
28
|
+
rake (13.0.1)
|
29
|
+
rspec (3.9.0)
|
30
|
+
rspec-core (~> 3.9.0)
|
31
|
+
rspec-expectations (~> 3.9.0)
|
32
|
+
rspec-mocks (~> 3.9.0)
|
33
|
+
rspec-core (3.9.1)
|
34
|
+
rspec-support (~> 3.9.1)
|
35
|
+
rspec-expectations (3.9.1)
|
36
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
37
|
+
rspec-support (~> 3.9.0)
|
38
|
+
rspec-mocks (3.9.1)
|
39
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
40
|
+
rspec-support (~> 3.9.0)
|
41
|
+
rspec-support (3.9.2)
|
42
|
+
sqlite3 (1.4.2)
|
43
|
+
thread_safe (0.3.6)
|
44
|
+
tzinfo (1.2.6)
|
45
|
+
thread_safe (~> 0.1)
|
46
|
+
zeitwerk (2.3.0)
|
47
|
+
|
48
|
+
PLATFORMS
|
49
|
+
ruby
|
50
|
+
|
51
|
+
DEPENDENCIES
|
52
|
+
bundler
|
53
|
+
fuzzily_reloaded!
|
54
|
+
mysql2
|
55
|
+
pg
|
56
|
+
rake
|
57
|
+
rspec
|
58
|
+
sqlite3
|
59
|
+
|
60
|
+
BUNDLED WITH
|
61
|
+
2.1.2
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
Copyright (c) 2020, Sven Pachnit aka. 2called-chaos (forked)
|
2
|
+
Copyright (c) 2012, HouseTrip Ltd
|
3
|
+
|
4
|
+
MIT License
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
a copy of this software and associated documentation files (the
|
8
|
+
"Software"), to deal in the Software without restriction, including
|
9
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
the following conditions:
|
13
|
+
|
14
|
+
The above copyright notice and this permission notice shall be
|
15
|
+
included in all copies or substantial portions of the Software.
|
16
|
+
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,221 @@
|
|
1
|
+
# Fuzzily - fuzzy string matching for ActiveRecord
|
2
|
+
|
3
|
+
[](https://badge.fury.io/rb/fuzzily_reloaded)
|
4
|
+
[](https://travis-ci.org/2called-chaos/fuzzily)
|
5
|
+
|
6
|
+
> Show me photos of **Marakech** !
|
7
|
+
>
|
8
|
+
> Here aresome photos of **Marrakesh**, Morroco.
|
9
|
+
> Did you mean **Martanesh**, Albania, **Marakkanam**, India, or **Marasheshty**, Romania?
|
10
|
+
|
11
|
+
Fuzzily finds misspelled, prefix, or partial needles in a haystack of
|
12
|
+
strings. It's a fast, [trigram](http://en.wikipedia.org/wiki/N-gram)-based, database-backed [fuzzy](http://en.wikipedia.org/wiki/Approximate_string_matching) string search/match engine for Rails.
|
13
|
+
Loosely inspired from an [old blog post](http://unirec.blogspot.co.uk/2007/12/live-fuzzy-search-using-n-grams-in.html).
|
14
|
+
|
15
|
+
Tested with ActiveRecord (5.1, 6.0) on various Rubies (2.3, 2.4, 2.5, 2.6, 2.7) and the most common adapters (SQLite3, MySQL, and PostgreSQL).
|
16
|
+
|
17
|
+
If your dateset is big, if you need yet more speed, or do not use ActiveRecord,
|
18
|
+
check out [blurrily](http://github.com/mezis/blurrily), another gem (backed with a C extension)
|
19
|
+
with the same intent.
|
20
|
+
|
21
|
+
## Fork differences
|
22
|
+
|
23
|
+
- Added support for Rails 5.1 and 6.0
|
24
|
+
- Removed support for Rails <5.1
|
25
|
+
|
26
|
+
### Breaking changes
|
27
|
+
|
28
|
+
- Dirty attributes behaviour has changed in after_save context.
|
29
|
+
Use `saved_change_to_ATTR?` instead of `ATTR_changed?`!
|
30
|
+
- Semi-breaking: The string is now being checked for `blank?` instead of `nil?` to prevent `***` ngrams
|
31
|
+
|
32
|
+
### Fixes
|
33
|
+
|
34
|
+
- Numbers are now supported but using a Converter is recommended
|
35
|
+
- Fixed deprecation warning regarding uniqueness validator
|
36
|
+
|
37
|
+
|
38
|
+
## Installation
|
39
|
+
|
40
|
+
Add this line to your application's Gemfile:
|
41
|
+
|
42
|
+
gem 'fuzzily_reloaded'
|
43
|
+
|
44
|
+
And then execute:
|
45
|
+
|
46
|
+
$ bundle
|
47
|
+
|
48
|
+
Or install it yourself as:
|
49
|
+
|
50
|
+
$ gem install fuzzily_reloaded
|
51
|
+
|
52
|
+
## Usage
|
53
|
+
|
54
|
+
You'll need to setup 2 things:
|
55
|
+
|
56
|
+
- a trigram model (your search index) and its migration
|
57
|
+
- the model you want to search for
|
58
|
+
|
59
|
+
Create an ActiveRecord model in your app (this will be used to store a "fuzzy index" of all the models and fields you will be indexing):
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
class Trigram < ActiveRecord::Base
|
63
|
+
include Fuzzily::Model
|
64
|
+
end
|
65
|
+
```
|
66
|
+
|
67
|
+
Create a migration for it:
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
class AddTrigramsModel < ActiveRecord::Migration[6.0]
|
71
|
+
extend Fuzzily::Migration
|
72
|
+
end
|
73
|
+
```
|
74
|
+
|
75
|
+
Instrument your model:
|
76
|
+
|
77
|
+
```ruby
|
78
|
+
class MyStuff < ActiveRecord::Base
|
79
|
+
# assuming my_stuffs has a 'name' attribute
|
80
|
+
fuzzily_searchable :name
|
81
|
+
end
|
82
|
+
```
|
83
|
+
|
84
|
+
*Note: The `name` part in the following method calls refers to the `:name` field. Replace it to match your searchable attribute.*
|
85
|
+
|
86
|
+
Index your model (will happen automatically for new/updated records):
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
MyStuff.bulk_update_fuzzy_name
|
90
|
+
```
|
91
|
+
|
92
|
+
Search!
|
93
|
+
|
94
|
+
```ruby
|
95
|
+
MyStuff.find_by_fuzzy_name('Some Name', :limit => 10)
|
96
|
+
# => records
|
97
|
+
```
|
98
|
+
|
99
|
+
You can force an update on a specific record with
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
MyStuff.find(123).update_fuzzy_name!
|
103
|
+
```
|
104
|
+
|
105
|
+
## Handling numbers
|
106
|
+
|
107
|
+
Numbers `\d` are supported but it is recommended to evaluate a custom conversion.
|
108
|
+
We had way better results for product names that included both, arabic and roman numbers,
|
109
|
+
with the following converter (note that both the search input and fuzzily input are converted):
|
110
|
+
|
111
|
+
https://gist.github.com/2called-chaos/64f64fc7fb35959fbf68f6018494a698
|
112
|
+
|
113
|
+
## Indexing more than one field
|
114
|
+
|
115
|
+
Just list all the field you want to index, or call `fuzzily_searchable` more than once:
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
class MyStuff < ActiveRecord::Base
|
119
|
+
fuzzily_searchable :name_fr, :name_en
|
120
|
+
fuzzily_searchable :name_de
|
121
|
+
end
|
122
|
+
```
|
123
|
+
|
124
|
+
## Custom name for the index model
|
125
|
+
|
126
|
+
If you want or need to name your index model differently (e.g. because you already have a class called `Trigram`):
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
class CustomTrigram < ActiveRecord::Base
|
130
|
+
include Fuzzily::Model
|
131
|
+
end
|
132
|
+
|
133
|
+
class AddTrigramsModel < ActiveRecord::Migration
|
134
|
+
extend Fuzzily::Migration
|
135
|
+
self.trigrams_table_name = :custom_trigrams
|
136
|
+
end
|
137
|
+
|
138
|
+
class MyStuff < ActiveRecord::Base
|
139
|
+
fuzzily_searchable :name, class_name: 'CustomTrigram'
|
140
|
+
end
|
141
|
+
```
|
142
|
+
|
143
|
+
## Speeding things up
|
144
|
+
|
145
|
+
For large data sets (millions of rows to index), the "compatible" storage
|
146
|
+
used by default will typically no longer be enough to keep the index small
|
147
|
+
enough.
|
148
|
+
|
149
|
+
Users have reported **major improvements** (2 order of magnitude) when turning
|
150
|
+
the `owner_type` and `fuzzy_field` columns of the `trigrams` table from
|
151
|
+
`VARCHAR` (the default) into `ENUM`. This is particularly efficient with
|
152
|
+
MySQL and pgSQL.
|
153
|
+
|
154
|
+
This is not the default in the gem as ActiveRecord does not suport `ENUM`
|
155
|
+
columns in any version.
|
156
|
+
|
157
|
+
## UUID's
|
158
|
+
|
159
|
+
When using Rails 4 with UUID's, you will need to change the `owner_id` column type to `UUID`.
|
160
|
+
|
161
|
+
```ruby
|
162
|
+
class AddTrigramsModel < ActiveRecord::Migration
|
163
|
+
extend Fuzzily::Migration
|
164
|
+
trigrams_owner_id_column_type = :uuid
|
165
|
+
end
|
166
|
+
```
|
167
|
+
|
168
|
+
## Model primary key (id) is VARCHAR
|
169
|
+
|
170
|
+
If you set your Model primary key (id) AS `VARCHAR` instead of `INT`, you will need to change the `owner_id` column type from `INT` to `VARCHAR` in the trigrams table.
|
171
|
+
|
172
|
+
## Searching virtual attributes
|
173
|
+
|
174
|
+
Your searchable fields do not have to be stored, they can be dynamic methods
|
175
|
+
too. Just remember to add a virtual change method as well.
|
176
|
+
For instance, if you model has `first_name` and `last_name` attributes, and you
|
177
|
+
want to index a compound `name` dynamic attribute:
|
178
|
+
|
179
|
+
```ruby
|
180
|
+
class Employee < ActiveRecord::Base
|
181
|
+
fuzzily_searchable :name
|
182
|
+
def name
|
183
|
+
"#{first_name} #{last_name}"
|
184
|
+
end
|
185
|
+
|
186
|
+
def saved_change_to_name?
|
187
|
+
saved_change_to_first_name? || saved_change_to_last_name?
|
188
|
+
end
|
189
|
+
end
|
190
|
+
```
|
191
|
+
|
192
|
+
## Update Trigram index using `sidekiq-delay`
|
193
|
+
|
194
|
+
For larger text, it takes time to build the index. Thus it can be moved into delay task using `sidekiq` + `sidekiq-delay` or `delayed_job` gem, both of them provide the method `delay` to move the execution to background thread by adding option `async`:
|
195
|
+
|
196
|
+
```ruby
|
197
|
+
class Employee < ActiveRecord::Base
|
198
|
+
fuzzily_searchable :name, async: true
|
199
|
+
|
200
|
+
end
|
201
|
+
```
|
202
|
+
|
203
|
+
## License
|
204
|
+
|
205
|
+
MIT licence. Quite permissive if you ask me.
|
206
|
+
|
207
|
+
Copyright (c) 2013, HouseTrip Ltd.
|
208
|
+
Copyright (c) 2020, Sven Pachnit aka. 2called-chaos (forked)
|
209
|
+
|
210
|
+
## Contributing
|
211
|
+
|
212
|
+
1. Fork it
|
213
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
214
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
215
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
216
|
+
5. Create a new Pull Request
|
217
|
+
|
218
|
+
|
219
|
+
Thanks to @mezis for creating this literal gem.
|
220
|
+
Thanks to @bclennox, @fdegiuli, @nickbender, @Shanison, @rickbutton for pointing out
|
221
|
+
and/or helping on various issues.
|
data/Rakefile
ADDED
data/fuzzily.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require "fuzzily/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "fuzzily_reloaded"
|
7
|
+
spec.version = Fuzzily::VERSION
|
8
|
+
spec.authors = ["Julien Letessier", "Sven Pachnit"]
|
9
|
+
spec.email = ["julien.letessier@gmail.com", "sven@bmonkeys.net"]
|
10
|
+
spec.description = %q{Fast fuzzy string matching for rails}
|
11
|
+
spec.summary = %q{A fast, trigram-based, database-backed fuzzy string search/match engine for Rails.}
|
12
|
+
spec.homepage = "http://github.com/2called-chaos/fuzzily"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.add_runtime_dependency "activerecord", ">= 5.1"
|
16
|
+
|
17
|
+
spec.add_development_dependency "bundler"
|
18
|
+
spec.add_development_dependency "rake"
|
19
|
+
spec.add_development_dependency "rspec"
|
20
|
+
spec.add_development_dependency "sqlite3"
|
21
|
+
spec.add_development_dependency "pg"
|
22
|
+
spec.add_development_dependency "mysql2"
|
23
|
+
|
24
|
+
spec.files = `git ls-files`.split($/)
|
25
|
+
spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
26
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
27
|
+
spec.require_paths = ["lib"]
|
28
|
+
end
|