fuzzily 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fuzzily (0.1.0)
4
+ fuzzily (0.2.0)
5
5
  activerecord (>= 2.3.17)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -48,9 +48,7 @@ Instrument your model (your searchable fields do not have to be stored, they can
48
48
 
49
49
  Index your model (will happen automatically for new/updated records):
50
50
 
51
- MyStuff.find_each do |record|
52
- record.update_fuzzy_name!
53
- end
51
+ MyStuff.bulk_update_fuzzy_name
54
52
 
55
53
  Search!
56
54
 
@@ -87,6 +85,21 @@ If you want or need to name your index model differently (e.g. because you alrea
87
85
  end
88
86
 
89
87
 
88
+ ## Speeding things up
89
+
90
+ For large data sets (millions of rows to index), the "compatible" storage
91
+ used by default will typically no longer be enough to keep the index small
92
+ enough.
93
+
94
+ Users have reported **major improvements** (2 order of magniture) when turning
95
+ the `owner_type` and `fuzzy_field` columns of the `trigrams` table from
96
+ `VARCHAR` (the default) into `ENUM`. This is particularly efficient with
97
+ MySQL and pgSQL.
98
+
99
+ This is not the default in the gem as ActiveRecord does not suport `ENUM`
100
+ columns in any version
101
+
102
+
90
103
  ## License
91
104
 
92
105
  MIT licence. Quite permissive if you ask me.
@@ -15,7 +15,7 @@ module Fuzzily
15
15
  def up
16
16
  create_table trigrams_table_name do |t|
17
17
  t.string :trigram, :limit => 3
18
- t.integer :score
18
+ t.integer :score, :limit => 2
19
19
  t.integer :owner_id
20
20
  t.string :owner_type
21
21
  t.string :fuzzy_field
@@ -47,8 +47,8 @@ module Fuzzily
47
47
  self.scoped(:include => trigram_association).find_in_batches(:batch_size => 100) do |batch|
48
48
  inserts = []
49
49
  batch.each do |record|
50
- record.send(field).extend(String).trigrams.each do |trigram|
51
- inserts << sanitize_sql_array(['(?,?,?,?,?)', self.name, record.id, field.to_s, 1, trigram])
50
+ record.send(field).extend(String).scored_trigrams.each do |trigram, score|
51
+ inserts << sanitize_sql_array(['(?,?,?,?,?)', self.name, record.id, field.to_s, score, trigram])
52
52
  end
53
53
  end
54
54
 
@@ -66,8 +66,8 @@ module Fuzzily
66
66
 
67
67
  define_method update_trigrams_method do
68
68
  self.send(trigram_association).delete_all
69
- self.send(field).extend(String).trigrams.each do |trigram|
70
- self.send(trigram_association).create!(:score => 1, :trigram => trigram, :owner_type => self.class.name)
69
+ self.send(field).extend(String).scored_trigrams.each do |trigram, score|
70
+ self.send(trigram_association).create!(:score => score, :trigram => trigram, :owner_type => self.class.name)
71
71
  end
72
72
  end
73
73
 
@@ -4,7 +4,14 @@ module Fuzzily
4
4
  module String
5
5
  def trigrams
6
6
  normalized = self.normalize
7
- (0..(normalized.length - 3)).map { |index| normalized[index,3] }.uniq
7
+ number_of_trigrams = normalized.length - 3
8
+ trigrams = (0..number_of_trigrams).map { |index| normalized[index,3] }.uniq
9
+ end
10
+
11
+ def scored_trigrams
12
+ trigrams_ = self.trigrams
13
+ score = 32_768 / trigrams_.length
14
+ trigrams_.map { |t| [t, score] }
8
15
  end
9
16
 
10
17
  protected
@@ -1,3 +1,3 @@
1
1
  module Fuzzily
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -83,6 +83,16 @@ describe Fuzzily::Searchable do
83
83
  subject.find_by_fuzzy_name('Piris').should =~ [@paris, @palma]
84
84
  subject.find_by_fuzzy_name('Paradise').should =~ [@paris, @palma, @palmyre]
85
85
  end
86
+
87
+ it 'favours exact matches' do
88
+ subject.fuzzily_searchable :name
89
+ @new_york = subject.create(:name => 'New York')
90
+ @yorkshire = subject.create(:name => 'Yorkshire')
91
+ @york = subject.create(:name => 'York')
92
+ @yorkisthan = subject.create(:name => 'Yorkisthan')
93
+
94
+ subject.find_by_fuzzy_name('York').should == [@york, @yorkshire, @yorkisthan, @new_york]
95
+ end
86
96
  end
87
97
  end
88
98
 
metadata CHANGED
@@ -1,135 +1,133 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: fuzzily
3
- version: !ruby/object:Gem::Version
4
- version: 0.1.0
3
+ version: !ruby/object:Gem::Version
4
+ hash: 23
5
5
  prerelease:
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 0
10
+ version: 0.2.0
6
11
  platform: ruby
7
- authors:
12
+ authors:
8
13
  - Julien Letessier
9
14
  autorequire:
10
15
  bindir: bin
11
16
  cert_chain: []
12
- date: 2013-03-04 00:00:00.000000000 Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: activerecord
16
- requirement: !ruby/object:Gem::Requirement
17
+
18
+ date: 2013-03-06 00:00:00 +00:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ requirement: &id001 !ruby/object:Gem::Requirement
17
23
  none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ hash: 33
28
+ segments:
29
+ - 2
30
+ - 3
31
+ - 17
21
32
  version: 2.3.17
22
- type: :runtime
23
33
  prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
34
+ name: activerecord
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ requirement: &id002 !ruby/object:Gem::Requirement
25
39
  none: false
26
- requirements:
27
- - - ! '>='
28
- - !ruby/object:Gem::Version
29
- version: 2.3.17
30
- - !ruby/object:Gem::Dependency
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ prerelease: false
31
48
  name: rspec
32
- requirement: !ruby/object:Gem::Requirement
33
- none: false
34
- requirements:
35
- - - ! '>='
36
- - !ruby/object:Gem::Version
37
- version: '0'
38
49
  type: :development
39
- prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
50
+ version_requirements: *id002
51
+ - !ruby/object:Gem::Dependency
52
+ requirement: &id003 !ruby/object:Gem::Requirement
41
53
  none: false
42
- requirements:
43
- - - ! '>='
44
- - !ruby/object:Gem::Version
45
- version: '0'
46
- - !ruby/object:Gem::Dependency
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ prerelease: false
47
62
  name: rake
48
- requirement: !ruby/object:Gem::Requirement
49
- none: false
50
- requirements:
51
- - - ! '>='
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
63
  type: :development
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
64
+ version_requirements: *id003
65
+ - !ruby/object:Gem::Dependency
66
+ requirement: &id004 !ruby/object:Gem::Requirement
57
67
  none: false
58
- requirements:
59
- - - ! '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- - !ruby/object:Gem::Dependency
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ prerelease: false
63
76
  name: appraisal
64
- requirement: !ruby/object:Gem::Requirement
65
- none: false
66
- requirements:
67
- - - ! '>='
68
- - !ruby/object:Gem::Version
69
- version: '0'
70
77
  type: :development
71
- prerelease: false
72
- version_requirements: !ruby/object:Gem::Requirement
78
+ version_requirements: *id004
79
+ - !ruby/object:Gem::Dependency
80
+ requirement: &id005 !ruby/object:Gem::Requirement
73
81
  none: false
74
- requirements:
75
- - - ! '>='
76
- - !ruby/object:Gem::Version
77
- version: '0'
78
- - !ruby/object:Gem::Dependency
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ hash: 3
86
+ segments:
87
+ - 0
88
+ version: "0"
89
+ prerelease: false
79
90
  name: pry
80
- requirement: !ruby/object:Gem::Requirement
81
- none: false
82
- requirements:
83
- - - ! '>='
84
- - !ruby/object:Gem::Version
85
- version: '0'
86
91
  type: :development
87
- prerelease: false
88
- version_requirements: !ruby/object:Gem::Requirement
92
+ version_requirements: *id005
93
+ - !ruby/object:Gem::Dependency
94
+ requirement: &id006 !ruby/object:Gem::Requirement
89
95
  none: false
90
- requirements:
91
- - - ! '>='
92
- - !ruby/object:Gem::Version
93
- version: '0'
94
- - !ruby/object:Gem::Dependency
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ hash: 3
100
+ segments:
101
+ - 0
102
+ version: "0"
103
+ prerelease: false
95
104
  name: pry-nav
96
- requirement: !ruby/object:Gem::Requirement
97
- none: false
98
- requirements:
99
- - - ! '>='
100
- - !ruby/object:Gem::Version
101
- version: '0'
102
105
  type: :development
103
- prerelease: false
104
- version_requirements: !ruby/object:Gem::Requirement
106
+ version_requirements: *id006
107
+ - !ruby/object:Gem::Dependency
108
+ requirement: &id007 !ruby/object:Gem::Requirement
105
109
  none: false
106
- requirements:
107
- - - ! '>='
108
- - !ruby/object:Gem::Version
109
- version: '0'
110
- - !ruby/object:Gem::Dependency
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ hash: 3
114
+ segments:
115
+ - 0
116
+ version: "0"
117
+ prerelease: false
111
118
  name: sqlite3
112
- requirement: !ruby/object:Gem::Requirement
113
- none: false
114
- requirements:
115
- - - ! '>='
116
- - !ruby/object:Gem::Version
117
- version: '0'
118
119
  type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- none: false
122
- requirements:
123
- - - ! '>='
124
- - !ruby/object:Gem::Version
125
- version: '0'
120
+ version_requirements: *id007
126
121
  description: Fast fuzzy string matching for rails
127
- email:
122
+ email:
128
123
  - julien.letessier@gmail.com
129
124
  executables: []
125
+
130
126
  extensions: []
127
+
131
128
  extra_rdoc_files: []
132
- files:
129
+
130
+ files:
133
131
  - .gitignore
134
132
  - .rspec
135
133
  - .travis.yml
@@ -160,38 +158,41 @@ files:
160
158
  - spec/fuzzily/trigram_spec.rb
161
159
  - spec/meta_spec.rb
162
160
  - spec/spec_helper.rb
163
- homepage: ''
161
+ has_rdoc: true
162
+ homepage: ""
164
163
  licenses: []
164
+
165
165
  post_install_message:
166
166
  rdoc_options: []
167
- require_paths:
167
+
168
+ require_paths:
168
169
  - lib
169
- required_ruby_version: !ruby/object:Gem::Requirement
170
+ required_ruby_version: !ruby/object:Gem::Requirement
170
171
  none: false
171
- requirements:
172
- - - ! '>='
173
- - !ruby/object:Gem::Version
174
- version: '0'
175
- segments:
172
+ requirements:
173
+ - - ">="
174
+ - !ruby/object:Gem::Version
175
+ hash: 3
176
+ segments:
176
177
  - 0
177
- hash: -3139686676920702806
178
- required_rubygems_version: !ruby/object:Gem::Requirement
178
+ version: "0"
179
+ required_rubygems_version: !ruby/object:Gem::Requirement
179
180
  none: false
180
- requirements:
181
- - - ! '>='
182
- - !ruby/object:Gem::Version
183
- version: '0'
184
- segments:
181
+ requirements:
182
+ - - ">="
183
+ - !ruby/object:Gem::Version
184
+ hash: 3
185
+ segments:
185
186
  - 0
186
- hash: -3139686676920702806
187
+ version: "0"
187
188
  requirements: []
189
+
188
190
  rubyforge_project:
189
- rubygems_version: 1.8.23
191
+ rubygems_version: 1.3.9.5
190
192
  signing_key:
191
193
  specification_version: 3
192
- summary: A fast, trigram-based, database-backed fuzzy string search/match engine for
193
- Rails.
194
- test_files:
194
+ summary: A fast, trigram-based, database-backed fuzzy string search/match engine for Rails.
195
+ test_files:
195
196
  - spec/fuzzily/migration_spec.rb
196
197
  - spec/fuzzily/model_spec.rb
197
198
  - spec/fuzzily/searchable_spec.rb