fuzzily 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -14,3 +14,10 @@ spec/reports
14
14
  test/tmp
15
15
  test/version_tmp
16
16
  tmp
17
+
18
+ # CTags
19
+ .tags*
20
+
21
+ # Data
22
+ *.gz
23
+ *.bz2
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 1.9.2-p320
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fuzzily (0.2.0)
4
+ fuzzily (0.2.1)
5
5
  activerecord (>= 2.3.17)
6
6
 
7
7
  GEM
@@ -24,10 +24,11 @@ GEM
24
24
  arel (3.0.2)
25
25
  builder (3.0.4)
26
26
  coderay (1.0.9)
27
- diff-lcs (1.1.3)
28
- i18n (0.6.1)
27
+ diff-lcs (1.2.1)
28
+ i18n (0.6.4)
29
29
  method_source (0.8.1)
30
30
  multi_json (1.6.1)
31
+ mysql2 (0.3.11)
31
32
  pry (0.9.12)
32
33
  coderay (~> 1.0.5)
33
34
  method_source (~> 0.8)
@@ -35,17 +36,17 @@ GEM
35
36
  pry-nav (0.2.3)
36
37
  pry (~> 0.9.10)
37
38
  rake (10.0.3)
38
- rspec (2.12.0)
39
- rspec-core (~> 2.12.0)
40
- rspec-expectations (~> 2.12.0)
41
- rspec-mocks (~> 2.12.0)
42
- rspec-core (2.12.2)
43
- rspec-expectations (2.12.1)
44
- diff-lcs (~> 1.1.3)
45
- rspec-mocks (2.12.2)
46
- slop (3.4.3)
39
+ rspec (2.13.0)
40
+ rspec-core (~> 2.13.0)
41
+ rspec-expectations (~> 2.13.0)
42
+ rspec-mocks (~> 2.13.0)
43
+ rspec-core (2.13.1)
44
+ rspec-expectations (2.13.0)
45
+ diff-lcs (>= 1.1.3, < 2.0)
46
+ rspec-mocks (2.13.0)
47
+ slop (3.4.4)
47
48
  sqlite3 (1.3.7)
48
- tzinfo (0.3.35)
49
+ tzinfo (0.3.37)
49
50
 
50
51
  PLATFORMS
51
52
  ruby
@@ -53,6 +54,7 @@ PLATFORMS
53
54
  DEPENDENCIES
54
55
  appraisal
55
56
  fuzzily!
57
+ mysql2
56
58
  pry
57
59
  pry-nav
58
60
  rake
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Fuzzily
2
2
 
3
+ [![Build Status](https://travis-ci.org/mezis/fuzzily.png?branch=master)](https://travis-ci.org/mezis/fuzzily)
4
+ [![Dependency Status](https://gemnasium.com/mezis/fuzzily.png)](https://gemnasium.com/mezis/fuzzily)
5
+
3
6
  A fast, [trigram](http://en.wikipedia.org/wiki/N-gram)-based, database-backed [fuzzy](http://en.wikipedia.org/wiki/Approximate_string_matching) string search/match engine for Rails.
4
7
 
5
8
  Loosely inspired from an [old blog post](http://unirec.blogspot.co.uk/2007/12/live-fuzzy-search-using-n-grams-in.html).
data/fuzzily.gemspec CHANGED
@@ -20,6 +20,7 @@ Gem::Specification.new do |gem|
20
20
  gem.add_development_dependency 'pry'
21
21
  gem.add_development_dependency 'pry-nav'
22
22
  gem.add_development_dependency 'sqlite3'
23
+ gem.add_development_dependency 'mysql2'
23
24
 
24
25
  gem.files = `git ls-files`.split($/)
25
26
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
@@ -21,11 +21,12 @@ module Fuzzily
21
21
  t.string :fuzzy_field
22
22
  end
23
23
 
24
+ # owner_id goes first as we'll GROUP BY that
24
25
  add_index trigrams_table_name,
25
- [:owner_type, :fuzzy_field, :trigram, :owner_id, :score],
26
+ [:owner_id, :owner_type, :fuzzy_field, :trigram, :score],
26
27
  :name => :index_for_match
27
28
  add_index trigrams_table_name,
28
- [:owner_type, :owner_id],
29
+ [:owner_id, :owner_type],
29
30
  :name => :index_by_owner
30
31
  end
31
32
 
data/lib/fuzzily/model.rb CHANGED
@@ -13,7 +13,7 @@ module Fuzzily
13
13
 
14
14
  belongs_to :owner, :polymorphic => true
15
15
  validates_presence_of :owner
16
- validates_uniqueness_of :trigram, :scope => [:owner_type, :owner_id]
16
+ validates_uniqueness_of :trigram, :scope => [:owner_type, :owner_id, :fuzzy_field]
17
17
  validates_length_of :trigram, :is => 3
18
18
  validates_presence_of :score
19
19
  validates_presence_of :fuzzy_field
@@ -35,18 +35,13 @@ module Fuzzily
35
35
  end
36
36
 
37
37
  module ClassMethods
38
- # options:
39
- # - model (mandatory)
40
- # - field (mandatory)
41
- # - limit (default 10)
42
- def matches_for(text, options = {})
43
- options[:limit] ||= 10
38
+ def matches_for(text)
39
+ trigrams = Fuzzily::String.new(text).trigrams
44
40
  self.
45
- scoped(:select => 'owner_id, owner_type, SUM(score) AS total_score').
41
+ scoped(:select => 'owner_id, owner_type, count(*) AS matches, score').
46
42
  scoped(:group => :owner_id).
47
- scoped(:order => 'total_score DESC').
48
- scoped(:limit => options[:limit]).
49
- with_trigram(text.extend(String).trigrams).
43
+ scoped(:order => 'matches DESC, score ASC').
44
+ with_trigram(trigrams).
50
45
  map(&:owner)
51
46
  end
52
47
  end
@@ -34,6 +34,8 @@ module Fuzzily
34
34
  else raise 'Wrong # of arguments'
35
35
  end
36
36
 
37
+ options[:limit] ||= 10
38
+
37
39
  trigram_class_name.constantize.
38
40
  scoped(options).
39
41
  for_model(self.name).
@@ -47,7 +49,8 @@ module Fuzzily
47
49
  self.scoped(:include => trigram_association).find_in_batches(:batch_size => 100) do |batch|
48
50
  inserts = []
49
51
  batch.each do |record|
50
- record.send(field).extend(String).scored_trigrams.each do |trigram, score|
52
+ data = Fuzzily::String.new(record.send(field))
53
+ data.scored_trigrams.each do |trigram, score|
51
54
  inserts << sanitize_sql_array(['(?,?,?,?,?)', self.name, record.id, field.to_s, score, trigram])
52
55
  end
53
56
  end
@@ -66,7 +69,7 @@ module Fuzzily
66
69
 
67
70
  define_method update_trigrams_method do
68
71
  self.send(trigram_association).delete_all
69
- self.send(field).extend(String).scored_trigrams.each do |trigram, score|
72
+ String.new(self.send(field)).scored_trigrams.each do |trigram, score|
70
73
  self.send(trigram_association).create!(:score => score, :trigram => trigram, :owner_type => self.class.name)
71
74
  end
72
75
  end
@@ -1,7 +1,8 @@
1
1
  require 'active_support/core_ext/string/multibyte'
2
-
2
+ require 'delegate'
3
3
  module Fuzzily
4
- module String
4
+ class String < SimpleDelegator
5
+
5
6
  def trigrams
6
7
  normalized = self.normalize
7
8
  number_of_trigrams = normalized.length - 3
@@ -9,9 +10,7 @@ module Fuzzily
9
10
  end
10
11
 
11
12
  def scored_trigrams
12
- trigrams_ = self.trigrams
13
- score = 32_768 / trigrams_.length
14
- trigrams_.map { |t| [t, score] }
13
+ trigrams.map { |t| [t, self.length] }
15
14
  end
16
15
 
17
16
  protected
@@ -22,9 +21,10 @@ module Fuzzily
22
21
  # Iconv.iconv('ascii//translit//ignore', 'utf-8', self).first.
23
22
  ActiveSupport::Multibyte::Chars.new(self).
24
23
  mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').downcase.to_s.
25
- gsub(/\W/,' ').
24
+ gsub(/[^a-z]/,' ').
26
25
  gsub(/\s+/,'*').
27
- gsub(/^/,'**')
26
+ gsub(/^/,'**').
27
+ gsub(/$/,'*')
28
28
  end
29
29
  end
30
30
  end
@@ -1,3 +1,3 @@
1
1
  module Fuzzily
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
@@ -68,10 +68,6 @@ describe Fuzzily::Model do
68
68
  end
69
69
  end
70
70
 
71
- it 'honors the limit option' do
72
- model.matches_for('Palmyre', :limit => 1).should == [@palma]
73
- end
74
-
75
71
  it 'returns ordered results' do
76
72
  model.matches_for('Palmyre').should == [@palma, @paris]
77
73
  end
@@ -57,7 +57,7 @@ describe Fuzzily::Searchable do
57
57
  it 'updates all trigram records on save' do
58
58
  subject.create(:name => 'Paris')
59
59
  subject.first.update_attribute :name, 'Rome'
60
- Trigram.all.map(&:trigram).should =~ %w(**r *ro rom ome)
60
+ Trigram.all.map(&:trigram).should =~ %w(**r *ro rom ome me*)
61
61
  end
62
62
  end
63
63
 
@@ -91,7 +91,16 @@ describe Fuzzily::Searchable do
91
91
  @york = subject.create(:name => 'York')
92
92
  @yorkisthan = subject.create(:name => 'Yorkisthan')
93
93
 
94
- subject.find_by_fuzzy_name('York').should == [@york, @yorkshire, @yorkisthan, @new_york]
94
+ subject.find_by_fuzzy_name('York').should == [@york, @new_york, @yorkshire, @yorkisthan]
95
+ subject.find_by_fuzzy_name('Yorkshire').should == [@yorkshire, @york, @yorkisthan, @new_york]
96
+ end
97
+
98
+ it 'does not favour short words' do
99
+ subject.fuzzily_searchable :name
100
+ @lo = subject.create(:name => 'Lo') # **l *lo lo*
101
+ @london = subject.create(:name => 'London') # **l *lo lon ond ndo don on*
102
+ # **l *lo lon
103
+ subject.find_by_fuzzy_name('Lon').should == [@london, @lo]
95
104
  end
96
105
  end
97
106
  end
@@ -3,22 +3,23 @@ require 'spec_helper'
3
3
 
4
4
  describe Fuzzily::String do
5
5
  def result(string)
6
- string.extend(described_class).trigrams
6
+ described_class.new(string).trigrams
7
7
  end
8
8
 
9
9
  it 'splits strings into trigrams' do
10
- result('Paris').should == %w(**p *pa par ari ris)
10
+ result('Paris').should == %w(**p *pa par ari ris is*)
11
11
  end
12
12
 
13
13
  it 'removes accents' do
14
- result('Montélimar').should == %w(**m *mo mon ont nte tel eli lim ima mar)
14
+ result('Montélimar').should == %w(**m *mo mon ont nte tel eli lim ima mar ar*)
15
15
  end
16
16
 
17
17
  it 'handles multi word strings' do
18
- result('Le Mans').should == %w(**l *le le* e*m *ma man ans)
18
+ result('Le Mans').should == %w(**l *le le* e*m *ma man ans ns*)
19
19
  end
20
20
 
21
- it 'removes symbols' do
21
+ it 'removes symbols and duplicates' do
22
+ # The final ess, sse, se* would be dupes.
22
23
  result('Besse-en-Chandesse').should == %w(**b *be bes ess sse se* e*e *en en* n*c *ch cha han and nde des)
23
24
  end
24
25
  end
metadata CHANGED
@@ -1,135 +1,154 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: fuzzily
3
- version: !ruby/object:Gem::Version
4
- hash: 23
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 2
9
- - 0
10
- version: 0.2.0
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Julien Letessier
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2013-03-06 00:00:00 +00:00
19
- default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
22
- requirement: &id001 !ruby/object:Gem::Requirement
23
- none: false
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- hash: 33
28
- segments:
29
- - 2
30
- - 3
31
- - 17
32
- version: 2.3.17
33
- prerelease: false
12
+ date: 2013-03-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
34
15
  name: activerecord
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.3.17
35
22
  type: :runtime
36
- version_requirements: *id001
37
- - !ruby/object:Gem::Dependency
38
- requirement: &id002 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- hash: 3
44
- segments:
45
- - 0
46
- version: "0"
47
23
  prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.3.17
30
+ - !ruby/object:Gem::Dependency
48
31
  name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
49
38
  type: :development
50
- version_requirements: *id002
51
- - !ruby/object:Gem::Dependency
52
- requirement: &id003 !ruby/object:Gem::Requirement
53
- none: false
54
- requirements:
55
- - - ">="
56
- - !ruby/object:Gem::Version
57
- hash: 3
58
- segments:
59
- - 0
60
- version: "0"
61
39
  prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
62
47
  name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
63
54
  type: :development
64
- version_requirements: *id003
65
- - !ruby/object:Gem::Dependency
66
- requirement: &id004 !ruby/object:Gem::Requirement
67
- none: false
68
- requirements:
69
- - - ">="
70
- - !ruby/object:Gem::Version
71
- hash: 3
72
- segments:
73
- - 0
74
- version: "0"
75
55
  prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
76
63
  name: appraisal
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
77
70
  type: :development
78
- version_requirements: *id004
79
- - !ruby/object:Gem::Dependency
80
- requirement: &id005 !ruby/object:Gem::Requirement
81
- none: false
82
- requirements:
83
- - - ">="
84
- - !ruby/object:Gem::Version
85
- hash: 3
86
- segments:
87
- - 0
88
- version: "0"
89
71
  prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
90
79
  name: pry
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
91
86
  type: :development
92
- version_requirements: *id005
93
- - !ruby/object:Gem::Dependency
94
- requirement: &id006 !ruby/object:Gem::Requirement
95
- none: false
96
- requirements:
97
- - - ">="
98
- - !ruby/object:Gem::Version
99
- hash: 3
100
- segments:
101
- - 0
102
- version: "0"
103
87
  prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
104
95
  name: pry-nav
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
105
102
  type: :development
106
- version_requirements: *id006
107
- - !ruby/object:Gem::Dependency
108
- requirement: &id007 !ruby/object:Gem::Requirement
109
- none: false
110
- requirements:
111
- - - ">="
112
- - !ruby/object:Gem::Version
113
- hash: 3
114
- segments:
115
- - 0
116
- version: "0"
117
103
  prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
118
111
  name: sqlite3
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
119
118
  type: :development
120
- version_requirements: *id007
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
127
+ name: mysql2
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
121
142
  description: Fast fuzzy string matching for rails
122
- email:
143
+ email:
123
144
  - julien.letessier@gmail.com
124
145
  executables: []
125
-
126
146
  extensions: []
127
-
128
147
  extra_rdoc_files: []
129
-
130
- files:
148
+ files:
131
149
  - .gitignore
132
150
  - .rspec
151
+ - .ruby-version
133
152
  - .travis.yml
134
153
  - Appraisals
135
154
  - Gemfile
@@ -158,41 +177,38 @@ files:
158
177
  - spec/fuzzily/trigram_spec.rb
159
178
  - spec/meta_spec.rb
160
179
  - spec/spec_helper.rb
161
- has_rdoc: true
162
- homepage: ""
180
+ homepage: ''
163
181
  licenses: []
164
-
165
182
  post_install_message:
166
183
  rdoc_options: []
167
-
168
- require_paths:
184
+ require_paths:
169
185
  - lib
170
- required_ruby_version: !ruby/object:Gem::Requirement
186
+ required_ruby_version: !ruby/object:Gem::Requirement
171
187
  none: false
172
- requirements:
173
- - - ">="
174
- - !ruby/object:Gem::Version
175
- hash: 3
176
- segments:
188
+ requirements:
189
+ - - ! '>='
190
+ - !ruby/object:Gem::Version
191
+ version: '0'
192
+ segments:
177
193
  - 0
178
- version: "0"
179
- required_rubygems_version: !ruby/object:Gem::Requirement
194
+ hash: 2813832546497713737
195
+ required_rubygems_version: !ruby/object:Gem::Requirement
180
196
  none: false
181
- requirements:
182
- - - ">="
183
- - !ruby/object:Gem::Version
184
- hash: 3
185
- segments:
197
+ requirements:
198
+ - - ! '>='
199
+ - !ruby/object:Gem::Version
200
+ version: '0'
201
+ segments:
186
202
  - 0
187
- version: "0"
203
+ hash: 2813832546497713737
188
204
  requirements: []
189
-
190
205
  rubyforge_project:
191
- rubygems_version: 1.3.9.5
206
+ rubygems_version: 1.8.23
192
207
  signing_key:
193
208
  specification_version: 3
194
- summary: A fast, trigram-based, database-backed fuzzy string search/match engine for Rails.
195
- test_files:
209
+ summary: A fast, trigram-based, database-backed fuzzy string search/match engine for
210
+ Rails.
211
+ test_files:
196
212
  - spec/fuzzily/migration_spec.rb
197
213
  - spec/fuzzily/model_spec.rb
198
214
  - spec/fuzzily/searchable_spec.rb