fuzzily_reloaded 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +26 -0
- data/.rspec +3 -0
- data/.travis.yml +39 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +61 -0
- data/LICENSE.txt +23 -0
- data/README.md +221 -0
- data/Rakefile +6 -0
- data/fuzzily.gemspec +28 -0
- data/gemfiles/rails51.gemfile +5 -0
- data/gemfiles/rails60.gemfile +5 -0
- data/lib/fuzzily.rb +7 -0
- data/lib/fuzzily/migration.rb +43 -0
- data/lib/fuzzily/model.rb +49 -0
- data/lib/fuzzily/searchable.rb +177 -0
- data/lib/fuzzily/trigram.rb +31 -0
- data/lib/fuzzily/version.rb +3 -0
- data/lib/fuzzily_reloaded.rb +1 -0
- data/spec/fuzzily/migration_spec.rb +41 -0
- data/spec/fuzzily/model_spec.rb +77 -0
- data/spec/fuzzily/searchable_spec.rb +201 -0
- data/spec/fuzzily/trigram_spec.rb +28 -0
- data/spec/meta_spec.rb +8 -0
- data/spec/spec_helper.rb +79 -0
- metadata +174 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ccfb4a3ddebd786b36963599dc73afc65feac6968873c5895deccaa3c5dae044
|
4
|
+
data.tar.gz: f11961215c5c6937c006ac69d03f0b69f362eec24c2e5dd571bae2728dc402ea
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 62d203f5c2d84624dbcdb98390eed6b042242b3017e34282289991ec26fd9602626ffc321f41e32260e73b78bdde9dc8bc53b6cbad03c9b6f566ea4fd1e47d51
|
7
|
+
data.tar.gz: a7ea210ef1db362b1d5abfca1191576d493b2f76f528ba3727eef8fd76f5d543939e476d3d470224796de33aa4244153c768ee4fa19fdeb475b484ff240a771f
|
data/.gitignore
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.config
|
4
|
+
.yardoc
|
5
|
+
InstalledFiles
|
6
|
+
_yardoc
|
7
|
+
coverage
|
8
|
+
doc/
|
9
|
+
lib/bundler/man
|
10
|
+
pkg
|
11
|
+
rdoc
|
12
|
+
spec/reports
|
13
|
+
test/tmp
|
14
|
+
test/version_tmp
|
15
|
+
tmp
|
16
|
+
|
17
|
+
# CTags
|
18
|
+
.tags*
|
19
|
+
|
20
|
+
# Data
|
21
|
+
*.gz
|
22
|
+
*.bz2
|
23
|
+
|
24
|
+
# Bundler
|
25
|
+
.bundle
|
26
|
+
vendor/bundle
|
data/.rspec
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
---
|
2
|
+
language: ruby
|
3
|
+
services:
|
4
|
+
- mysql
|
5
|
+
- postgresql
|
6
|
+
before_script:
|
7
|
+
- psql -c 'create database fuzzily_test;' -U postgres
|
8
|
+
- mysql -e 'create database fuzzily_test;'
|
9
|
+
env:
|
10
|
+
- FUZZILY_ADAPTER=sqlite3
|
11
|
+
- FUZZILY_ADAPTER=mysql FUZZILY_DB_USER=travis
|
12
|
+
- FUZZILY_ADAPTER=postgresql FUZZILY_DB_USER=postgres
|
13
|
+
script:
|
14
|
+
- bundle exec rspec
|
15
|
+
rvm:
|
16
|
+
- 2.3.8
|
17
|
+
- 2.4.9
|
18
|
+
- 2.5.7
|
19
|
+
- 2.6.5
|
20
|
+
- 2.7.0
|
21
|
+
- ruby-head
|
22
|
+
- jruby-9.2.6.0
|
23
|
+
- jruby-head
|
24
|
+
gemfile:
|
25
|
+
- gemfiles/rails51.gemfile
|
26
|
+
- gemfiles/rails60.gemfile
|
27
|
+
matrix:
|
28
|
+
allow_failures:
|
29
|
+
- rvm: ruby-head
|
30
|
+
- rvm: jruby-head
|
31
|
+
- rvm: jruby-9.2.6.0
|
32
|
+
exclude:
|
33
|
+
- rvm: 2.3.8
|
34
|
+
gemfile: gemfiles/rails60.gemfile
|
35
|
+
- rvm: 2.4.9
|
36
|
+
gemfile: gemfiles/rails60.gemfile
|
37
|
+
fast_finish: true
|
38
|
+
branches:
|
39
|
+
only: master
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
fuzzily_reloaded (1.0.0)
|
5
|
+
activerecord (>= 5.1)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activemodel (6.0.2.2)
|
11
|
+
activesupport (= 6.0.2.2)
|
12
|
+
activerecord (6.0.2.2)
|
13
|
+
activemodel (= 6.0.2.2)
|
14
|
+
activesupport (= 6.0.2.2)
|
15
|
+
activesupport (6.0.2.2)
|
16
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
17
|
+
i18n (>= 0.7, < 2)
|
18
|
+
minitest (~> 5.1)
|
19
|
+
tzinfo (~> 1.1)
|
20
|
+
zeitwerk (~> 2.2)
|
21
|
+
concurrent-ruby (1.1.6)
|
22
|
+
diff-lcs (1.3)
|
23
|
+
i18n (1.8.2)
|
24
|
+
concurrent-ruby (~> 1.0)
|
25
|
+
minitest (5.14.0)
|
26
|
+
mysql2 (0.5.3)
|
27
|
+
pg (1.2.3)
|
28
|
+
rake (13.0.1)
|
29
|
+
rspec (3.9.0)
|
30
|
+
rspec-core (~> 3.9.0)
|
31
|
+
rspec-expectations (~> 3.9.0)
|
32
|
+
rspec-mocks (~> 3.9.0)
|
33
|
+
rspec-core (3.9.1)
|
34
|
+
rspec-support (~> 3.9.1)
|
35
|
+
rspec-expectations (3.9.1)
|
36
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
37
|
+
rspec-support (~> 3.9.0)
|
38
|
+
rspec-mocks (3.9.1)
|
39
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
40
|
+
rspec-support (~> 3.9.0)
|
41
|
+
rspec-support (3.9.2)
|
42
|
+
sqlite3 (1.4.2)
|
43
|
+
thread_safe (0.3.6)
|
44
|
+
tzinfo (1.2.6)
|
45
|
+
thread_safe (~> 0.1)
|
46
|
+
zeitwerk (2.3.0)
|
47
|
+
|
48
|
+
PLATFORMS
|
49
|
+
ruby
|
50
|
+
|
51
|
+
DEPENDENCIES
|
52
|
+
bundler
|
53
|
+
fuzzily_reloaded!
|
54
|
+
mysql2
|
55
|
+
pg
|
56
|
+
rake
|
57
|
+
rspec
|
58
|
+
sqlite3
|
59
|
+
|
60
|
+
BUNDLED WITH
|
61
|
+
2.1.2
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
Copyright (c) 2020, Sven Pachnit aka. 2called-chaos (forked)
|
2
|
+
Copyright (c) 2012, HouseTrip Ltd
|
3
|
+
|
4
|
+
MIT License
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
a copy of this software and associated documentation files (the
|
8
|
+
"Software"), to deal in the Software without restriction, including
|
9
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
the following conditions:
|
13
|
+
|
14
|
+
The above copyright notice and this permission notice shall be
|
15
|
+
included in all copies or substantial portions of the Software.
|
16
|
+
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,221 @@
|
|
1
|
+
# Fuzzily - fuzzy string matching for ActiveRecord
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/fuzzily_reloaded.png)](https://badge.fury.io/rb/fuzzily_reloaded)
|
4
|
+
[![Build Status](https://travis-ci.org/2called-chaos/fuzzily.png?branch=master)](https://travis-ci.org/2called-chaos/fuzzily)
|
5
|
+
|
6
|
+
> Show me photos of **Marakech** !
|
7
|
+
>
|
8
|
+
> Here aresome photos of **Marrakesh**, Morroco.
|
9
|
+
> Did you mean **Martanesh**, Albania, **Marakkanam**, India, or **Marasheshty**, Romania?
|
10
|
+
|
11
|
+
Fuzzily finds misspelled, prefix, or partial needles in a haystack of
|
12
|
+
strings. It's a fast, [trigram](http://en.wikipedia.org/wiki/N-gram)-based, database-backed [fuzzy](http://en.wikipedia.org/wiki/Approximate_string_matching) string search/match engine for Rails.
|
13
|
+
Loosely inspired from an [old blog post](http://unirec.blogspot.co.uk/2007/12/live-fuzzy-search-using-n-grams-in.html).
|
14
|
+
|
15
|
+
Tested with ActiveRecord (5.1, 6.0) on various Rubies (2.3, 2.4, 2.5, 2.6, 2.7) and the most common adapters (SQLite3, MySQL, and PostgreSQL).
|
16
|
+
|
17
|
+
If your dateset is big, if you need yet more speed, or do not use ActiveRecord,
|
18
|
+
check out [blurrily](http://github.com/mezis/blurrily), another gem (backed with a C extension)
|
19
|
+
with the same intent.
|
20
|
+
|
21
|
+
## Fork differences
|
22
|
+
|
23
|
+
- Added support for Rails 5.1 and 6.0
|
24
|
+
- Removed support for Rails <5.1
|
25
|
+
|
26
|
+
### Breaking changes
|
27
|
+
|
28
|
+
- Dirty attributes behaviour has changed in after_save context.
|
29
|
+
Use `saved_change_to_ATTR?` instead of `ATTR_changed?`!
|
30
|
+
- Semi-breaking: The string is now being checked for `blank?` instead of `nil?` to prevent `***` ngrams
|
31
|
+
|
32
|
+
### Fixes
|
33
|
+
|
34
|
+
- Numbers are now supported but using a Converter is recommended
|
35
|
+
- Fixed deprecation warning regarding uniqueness validator
|
36
|
+
|
37
|
+
|
38
|
+
## Installation
|
39
|
+
|
40
|
+
Add this line to your application's Gemfile:
|
41
|
+
|
42
|
+
gem 'fuzzily_reloaded'
|
43
|
+
|
44
|
+
And then execute:
|
45
|
+
|
46
|
+
$ bundle
|
47
|
+
|
48
|
+
Or install it yourself as:
|
49
|
+
|
50
|
+
$ gem install fuzzily_reloaded
|
51
|
+
|
52
|
+
## Usage
|
53
|
+
|
54
|
+
You'll need to setup 2 things:
|
55
|
+
|
56
|
+
- a trigram model (your search index) and its migration
|
57
|
+
- the model you want to search for
|
58
|
+
|
59
|
+
Create an ActiveRecord model in your app (this will be used to store a "fuzzy index" of all the models and fields you will be indexing):
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
class Trigram < ActiveRecord::Base
|
63
|
+
include Fuzzily::Model
|
64
|
+
end
|
65
|
+
```
|
66
|
+
|
67
|
+
Create a migration for it:
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
class AddTrigramsModel < ActiveRecord::Migration[6.0]
|
71
|
+
extend Fuzzily::Migration
|
72
|
+
end
|
73
|
+
```
|
74
|
+
|
75
|
+
Instrument your model:
|
76
|
+
|
77
|
+
```ruby
|
78
|
+
class MyStuff < ActiveRecord::Base
|
79
|
+
# assuming my_stuffs has a 'name' attribute
|
80
|
+
fuzzily_searchable :name
|
81
|
+
end
|
82
|
+
```
|
83
|
+
|
84
|
+
*Note: The `name` part in the following method calls refers to the `:name` field. Replace it to match your searchable attribute.*
|
85
|
+
|
86
|
+
Index your model (will happen automatically for new/updated records):
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
MyStuff.bulk_update_fuzzy_name
|
90
|
+
```
|
91
|
+
|
92
|
+
Search!
|
93
|
+
|
94
|
+
```ruby
|
95
|
+
MyStuff.find_by_fuzzy_name('Some Name', :limit => 10)
|
96
|
+
# => records
|
97
|
+
```
|
98
|
+
|
99
|
+
You can force an update on a specific record with
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
MyStuff.find(123).update_fuzzy_name!
|
103
|
+
```
|
104
|
+
|
105
|
+
## Handling numbers
|
106
|
+
|
107
|
+
Numbers `\d` are supported but it is recommended to evaluate a custom conversion.
|
108
|
+
We had way better results for product names that included both, arabic and roman numbers,
|
109
|
+
with the following converter (note that both the search input and fuzzily input are converted):
|
110
|
+
|
111
|
+
https://gist.github.com/2called-chaos/64f64fc7fb35959fbf68f6018494a698
|
112
|
+
|
113
|
+
## Indexing more than one field
|
114
|
+
|
115
|
+
Just list all the field you want to index, or call `fuzzily_searchable` more than once:
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
class MyStuff < ActiveRecord::Base
|
119
|
+
fuzzily_searchable :name_fr, :name_en
|
120
|
+
fuzzily_searchable :name_de
|
121
|
+
end
|
122
|
+
```
|
123
|
+
|
124
|
+
## Custom name for the index model
|
125
|
+
|
126
|
+
If you want or need to name your index model differently (e.g. because you already have a class called `Trigram`):
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
class CustomTrigram < ActiveRecord::Base
|
130
|
+
include Fuzzily::Model
|
131
|
+
end
|
132
|
+
|
133
|
+
class AddTrigramsModel < ActiveRecord::Migration
|
134
|
+
extend Fuzzily::Migration
|
135
|
+
self.trigrams_table_name = :custom_trigrams
|
136
|
+
end
|
137
|
+
|
138
|
+
class MyStuff < ActiveRecord::Base
|
139
|
+
fuzzily_searchable :name, class_name: 'CustomTrigram'
|
140
|
+
end
|
141
|
+
```
|
142
|
+
|
143
|
+
## Speeding things up
|
144
|
+
|
145
|
+
For large data sets (millions of rows to index), the "compatible" storage
|
146
|
+
used by default will typically no longer be enough to keep the index small
|
147
|
+
enough.
|
148
|
+
|
149
|
+
Users have reported **major improvements** (2 order of magnitude) when turning
|
150
|
+
the `owner_type` and `fuzzy_field` columns of the `trigrams` table from
|
151
|
+
`VARCHAR` (the default) into `ENUM`. This is particularly efficient with
|
152
|
+
MySQL and pgSQL.
|
153
|
+
|
154
|
+
This is not the default in the gem as ActiveRecord does not suport `ENUM`
|
155
|
+
columns in any version.
|
156
|
+
|
157
|
+
## UUID's
|
158
|
+
|
159
|
+
When using Rails 4 with UUID's, you will need to change the `owner_id` column type to `UUID`.
|
160
|
+
|
161
|
+
```ruby
|
162
|
+
class AddTrigramsModel < ActiveRecord::Migration
|
163
|
+
extend Fuzzily::Migration
|
164
|
+
trigrams_owner_id_column_type = :uuid
|
165
|
+
end
|
166
|
+
```
|
167
|
+
|
168
|
+
## Model primary key (id) is VARCHAR
|
169
|
+
|
170
|
+
If you set your Model primary key (id) AS `VARCHAR` instead of `INT`, you will need to change the `owner_id` column type from `INT` to `VARCHAR` in the trigrams table.
|
171
|
+
|
172
|
+
## Searching virtual attributes
|
173
|
+
|
174
|
+
Your searchable fields do not have to be stored, they can be dynamic methods
|
175
|
+
too. Just remember to add a virtual change method as well.
|
176
|
+
For instance, if you model has `first_name` and `last_name` attributes, and you
|
177
|
+
want to index a compound `name` dynamic attribute:
|
178
|
+
|
179
|
+
```ruby
|
180
|
+
class Employee < ActiveRecord::Base
|
181
|
+
fuzzily_searchable :name
|
182
|
+
def name
|
183
|
+
"#{first_name} #{last_name}"
|
184
|
+
end
|
185
|
+
|
186
|
+
def saved_change_to_name?
|
187
|
+
saved_change_to_first_name? || saved_change_to_last_name?
|
188
|
+
end
|
189
|
+
end
|
190
|
+
```
|
191
|
+
|
192
|
+
## Update Trigram index using `sidekiq-delay`
|
193
|
+
|
194
|
+
For larger text, it takes time to build the index. Thus it can be moved into delay task using `sidekiq` + `sidekiq-delay` or `delayed_job` gem, both of them provide the method `delay` to move the execution to background thread by adding option `async`:
|
195
|
+
|
196
|
+
```ruby
|
197
|
+
class Employee < ActiveRecord::Base
|
198
|
+
fuzzily_searchable :name, async: true
|
199
|
+
|
200
|
+
end
|
201
|
+
```
|
202
|
+
|
203
|
+
## License
|
204
|
+
|
205
|
+
MIT licence. Quite permissive if you ask me.
|
206
|
+
|
207
|
+
Copyright (c) 2013, HouseTrip Ltd.
|
208
|
+
Copyright (c) 2020, Sven Pachnit aka. 2called-chaos (forked)
|
209
|
+
|
210
|
+
## Contributing
|
211
|
+
|
212
|
+
1. Fork it
|
213
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
214
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
215
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
216
|
+
5. Create a new Pull Request
|
217
|
+
|
218
|
+
|
219
|
+
Thanks to @mezis for creating this literal gem.
|
220
|
+
Thanks to @bclennox, @fdegiuli, @nickbender, @Shanison, @rickbutton for pointing out
|
221
|
+
and/or helping on various issues.
|
data/Rakefile
ADDED
data/fuzzily.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require "fuzzily/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "fuzzily_reloaded"
|
7
|
+
spec.version = Fuzzily::VERSION
|
8
|
+
spec.authors = ["Julien Letessier", "Sven Pachnit"]
|
9
|
+
spec.email = ["julien.letessier@gmail.com", "sven@bmonkeys.net"]
|
10
|
+
spec.description = %q{Fast fuzzy string matching for rails}
|
11
|
+
spec.summary = %q{A fast, trigram-based, database-backed fuzzy string search/match engine for Rails.}
|
12
|
+
spec.homepage = "http://github.com/2called-chaos/fuzzily"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.add_runtime_dependency "activerecord", ">= 5.1"
|
16
|
+
|
17
|
+
spec.add_development_dependency "bundler"
|
18
|
+
spec.add_development_dependency "rake"
|
19
|
+
spec.add_development_dependency "rspec"
|
20
|
+
spec.add_development_dependency "sqlite3"
|
21
|
+
spec.add_development_dependency "pg"
|
22
|
+
spec.add_development_dependency "mysql2"
|
23
|
+
|
24
|
+
spec.files = `git ls-files`.split($/)
|
25
|
+
spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
26
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
27
|
+
spec.require_paths = ["lib"]
|
28
|
+
end
|