fuzzily 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -14,3 +14,10 @@ spec/reports
14
14
  test/tmp
15
15
  test/version_tmp
16
16
  tmp
17
+
18
+ # CTags
19
+ .tags*
20
+
21
+ # Data
22
+ *.gz
23
+ *.bz2
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 1.9.2-p320
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fuzzily (0.2.0)
4
+ fuzzily (0.2.1)
5
5
  activerecord (>= 2.3.17)
6
6
 
7
7
  GEM
@@ -24,10 +24,11 @@ GEM
24
24
  arel (3.0.2)
25
25
  builder (3.0.4)
26
26
  coderay (1.0.9)
27
- diff-lcs (1.1.3)
28
- i18n (0.6.1)
27
+ diff-lcs (1.2.1)
28
+ i18n (0.6.4)
29
29
  method_source (0.8.1)
30
30
  multi_json (1.6.1)
31
+ mysql2 (0.3.11)
31
32
  pry (0.9.12)
32
33
  coderay (~> 1.0.5)
33
34
  method_source (~> 0.8)
@@ -35,17 +36,17 @@ GEM
35
36
  pry-nav (0.2.3)
36
37
  pry (~> 0.9.10)
37
38
  rake (10.0.3)
38
- rspec (2.12.0)
39
- rspec-core (~> 2.12.0)
40
- rspec-expectations (~> 2.12.0)
41
- rspec-mocks (~> 2.12.0)
42
- rspec-core (2.12.2)
43
- rspec-expectations (2.12.1)
44
- diff-lcs (~> 1.1.3)
45
- rspec-mocks (2.12.2)
46
- slop (3.4.3)
39
+ rspec (2.13.0)
40
+ rspec-core (~> 2.13.0)
41
+ rspec-expectations (~> 2.13.0)
42
+ rspec-mocks (~> 2.13.0)
43
+ rspec-core (2.13.1)
44
+ rspec-expectations (2.13.0)
45
+ diff-lcs (>= 1.1.3, < 2.0)
46
+ rspec-mocks (2.13.0)
47
+ slop (3.4.4)
47
48
  sqlite3 (1.3.7)
48
- tzinfo (0.3.35)
49
+ tzinfo (0.3.37)
49
50
 
50
51
  PLATFORMS
51
52
  ruby
@@ -53,6 +54,7 @@ PLATFORMS
53
54
  DEPENDENCIES
54
55
  appraisal
55
56
  fuzzily!
57
+ mysql2
56
58
  pry
57
59
  pry-nav
58
60
  rake
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Fuzzily
2
2
 
3
+ [![Build Status](https://travis-ci.org/mezis/fuzzily.png?branch=master)](https://travis-ci.org/mezis/fuzzily)
4
+ [![Dependency Status](https://gemnasium.com/mezis/fuzzily.png)](https://gemnasium.com/mezis/fuzzily)
5
+
3
6
  A fast, [trigram](http://en.wikipedia.org/wiki/N-gram)-based, database-backed [fuzzy](http://en.wikipedia.org/wiki/Approximate_string_matching) string search/match engine for Rails.
4
7
 
5
8
  Loosely inspired from an [old blog post](http://unirec.blogspot.co.uk/2007/12/live-fuzzy-search-using-n-grams-in.html).
data/fuzzily.gemspec CHANGED
@@ -20,6 +20,7 @@ Gem::Specification.new do |gem|
20
20
  gem.add_development_dependency 'pry'
21
21
  gem.add_development_dependency 'pry-nav'
22
22
  gem.add_development_dependency 'sqlite3'
23
+ gem.add_development_dependency 'mysql2'
23
24
 
24
25
  gem.files = `git ls-files`.split($/)
25
26
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
@@ -21,11 +21,12 @@ module Fuzzily
21
21
  t.string :fuzzy_field
22
22
  end
23
23
 
24
+ # owner_id goes first as we'll GROUP BY that
24
25
  add_index trigrams_table_name,
25
- [:owner_type, :fuzzy_field, :trigram, :owner_id, :score],
26
+ [:owner_id, :owner_type, :fuzzy_field, :trigram, :score],
26
27
  :name => :index_for_match
27
28
  add_index trigrams_table_name,
28
- [:owner_type, :owner_id],
29
+ [:owner_id, :owner_type],
29
30
  :name => :index_by_owner
30
31
  end
31
32
 
data/lib/fuzzily/model.rb CHANGED
@@ -13,7 +13,7 @@ module Fuzzily
13
13
 
14
14
  belongs_to :owner, :polymorphic => true
15
15
  validates_presence_of :owner
16
- validates_uniqueness_of :trigram, :scope => [:owner_type, :owner_id]
16
+ validates_uniqueness_of :trigram, :scope => [:owner_type, :owner_id, :fuzzy_field]
17
17
  validates_length_of :trigram, :is => 3
18
18
  validates_presence_of :score
19
19
  validates_presence_of :fuzzy_field
@@ -35,18 +35,13 @@ module Fuzzily
35
35
  end
36
36
 
37
37
  module ClassMethods
38
- # options:
39
- # - model (mandatory)
40
- # - field (mandatory)
41
- # - limit (default 10)
42
- def matches_for(text, options = {})
43
- options[:limit] ||= 10
38
+ def matches_for(text)
39
+ trigrams = Fuzzily::String.new(text).trigrams
44
40
  self.
45
- scoped(:select => 'owner_id, owner_type, SUM(score) AS total_score').
41
+ scoped(:select => 'owner_id, owner_type, count(*) AS matches, score').
46
42
  scoped(:group => :owner_id).
47
- scoped(:order => 'total_score DESC').
48
- scoped(:limit => options[:limit]).
49
- with_trigram(text.extend(String).trigrams).
43
+ scoped(:order => 'matches DESC, score ASC').
44
+ with_trigram(trigrams).
50
45
  map(&:owner)
51
46
  end
52
47
  end
@@ -34,6 +34,8 @@ module Fuzzily
34
34
  else raise 'Wrong # of arguments'
35
35
  end
36
36
 
37
+ options[:limit] ||= 10
38
+
37
39
  trigram_class_name.constantize.
38
40
  scoped(options).
39
41
  for_model(self.name).
@@ -47,7 +49,8 @@ module Fuzzily
47
49
  self.scoped(:include => trigram_association).find_in_batches(:batch_size => 100) do |batch|
48
50
  inserts = []
49
51
  batch.each do |record|
50
- record.send(field).extend(String).scored_trigrams.each do |trigram, score|
52
+ data = Fuzzily::String.new(record.send(field))
53
+ data.scored_trigrams.each do |trigram, score|
51
54
  inserts << sanitize_sql_array(['(?,?,?,?,?)', self.name, record.id, field.to_s, score, trigram])
52
55
  end
53
56
  end
@@ -66,7 +69,7 @@ module Fuzzily
66
69
 
67
70
  define_method update_trigrams_method do
68
71
  self.send(trigram_association).delete_all
69
- self.send(field).extend(String).scored_trigrams.each do |trigram, score|
72
+ String.new(self.send(field)).scored_trigrams.each do |trigram, score|
70
73
  self.send(trigram_association).create!(:score => score, :trigram => trigram, :owner_type => self.class.name)
71
74
  end
72
75
  end
@@ -1,7 +1,8 @@
1
1
  require 'active_support/core_ext/string/multibyte'
2
-
2
+ require 'delegate'
3
3
  module Fuzzily
4
- module String
4
+ class String < SimpleDelegator
5
+
5
6
  def trigrams
6
7
  normalized = self.normalize
7
8
  number_of_trigrams = normalized.length - 3
@@ -9,9 +10,7 @@ module Fuzzily
9
10
  end
10
11
 
11
12
  def scored_trigrams
12
- trigrams_ = self.trigrams
13
- score = 32_768 / trigrams_.length
14
- trigrams_.map { |t| [t, score] }
13
+ trigrams.map { |t| [t, self.length] }
15
14
  end
16
15
 
17
16
  protected
@@ -22,9 +21,10 @@ module Fuzzily
22
21
  # Iconv.iconv('ascii//translit//ignore', 'utf-8', self).first.
23
22
  ActiveSupport::Multibyte::Chars.new(self).
24
23
  mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').downcase.to_s.
25
- gsub(/\W/,' ').
24
+ gsub(/[^a-z]/,' ').
26
25
  gsub(/\s+/,'*').
27
- gsub(/^/,'**')
26
+ gsub(/^/,'**').
27
+ gsub(/$/,'*')
28
28
  end
29
29
  end
30
30
  end
@@ -1,3 +1,3 @@
1
1
  module Fuzzily
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
@@ -68,10 +68,6 @@ describe Fuzzily::Model do
68
68
  end
69
69
  end
70
70
 
71
- it 'honors the limit option' do
72
- model.matches_for('Palmyre', :limit => 1).should == [@palma]
73
- end
74
-
75
71
  it 'returns ordered results' do
76
72
  model.matches_for('Palmyre').should == [@palma, @paris]
77
73
  end
@@ -57,7 +57,7 @@ describe Fuzzily::Searchable do
57
57
  it 'updates all trigram records on save' do
58
58
  subject.create(:name => 'Paris')
59
59
  subject.first.update_attribute :name, 'Rome'
60
- Trigram.all.map(&:trigram).should =~ %w(**r *ro rom ome)
60
+ Trigram.all.map(&:trigram).should =~ %w(**r *ro rom ome me*)
61
61
  end
62
62
  end
63
63
 
@@ -91,7 +91,16 @@ describe Fuzzily::Searchable do
91
91
  @york = subject.create(:name => 'York')
92
92
  @yorkisthan = subject.create(:name => 'Yorkisthan')
93
93
 
94
- subject.find_by_fuzzy_name('York').should == [@york, @yorkshire, @yorkisthan, @new_york]
94
+ subject.find_by_fuzzy_name('York').should == [@york, @new_york, @yorkshire, @yorkisthan]
95
+ subject.find_by_fuzzy_name('Yorkshire').should == [@yorkshire, @york, @yorkisthan, @new_york]
96
+ end
97
+
98
+ it 'does not favour short words' do
99
+ subject.fuzzily_searchable :name
100
+ @lo = subject.create(:name => 'Lo') # **l *lo lo*
101
+ @london = subject.create(:name => 'London') # **l *lo lon ond ndo don on*
102
+ # **l *lo lon
103
+ subject.find_by_fuzzy_name('Lon').should == [@london, @lo]
95
104
  end
96
105
  end
97
106
  end
@@ -3,22 +3,23 @@ require 'spec_helper'
3
3
 
4
4
  describe Fuzzily::String do
5
5
  def result(string)
6
- string.extend(described_class).trigrams
6
+ described_class.new(string).trigrams
7
7
  end
8
8
 
9
9
  it 'splits strings into trigrams' do
10
- result('Paris').should == %w(**p *pa par ari ris)
10
+ result('Paris').should == %w(**p *pa par ari ris is*)
11
11
  end
12
12
 
13
13
  it 'removes accents' do
14
- result('Montélimar').should == %w(**m *mo mon ont nte tel eli lim ima mar)
14
+ result('Montélimar').should == %w(**m *mo mon ont nte tel eli lim ima mar ar*)
15
15
  end
16
16
 
17
17
  it 'handles multi word strings' do
18
- result('Le Mans').should == %w(**l *le le* e*m *ma man ans)
18
+ result('Le Mans').should == %w(**l *le le* e*m *ma man ans ns*)
19
19
  end
20
20
 
21
- it 'removes symbols' do
21
+ it 'removes symbols and duplicates' do
22
+ # The final ess, sse, se* would be dupes.
22
23
  result('Besse-en-Chandesse').should == %w(**b *be bes ess sse se* e*e *en en* n*c *ch cha han and nde des)
23
24
  end
24
25
  end
metadata CHANGED
@@ -1,135 +1,154 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: fuzzily
3
- version: !ruby/object:Gem::Version
4
- hash: 23
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 2
9
- - 0
10
- version: 0.2.0
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Julien Letessier
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2013-03-06 00:00:00 +00:00
19
- default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
22
- requirement: &id001 !ruby/object:Gem::Requirement
23
- none: false
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- hash: 33
28
- segments:
29
- - 2
30
- - 3
31
- - 17
32
- version: 2.3.17
33
- prerelease: false
12
+ date: 2013-03-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
34
15
  name: activerecord
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.3.17
35
22
  type: :runtime
36
- version_requirements: *id001
37
- - !ruby/object:Gem::Dependency
38
- requirement: &id002 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- hash: 3
44
- segments:
45
- - 0
46
- version: "0"
47
23
  prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.3.17
30
+ - !ruby/object:Gem::Dependency
48
31
  name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
49
38
  type: :development
50
- version_requirements: *id002
51
- - !ruby/object:Gem::Dependency
52
- requirement: &id003 !ruby/object:Gem::Requirement
53
- none: false
54
- requirements:
55
- - - ">="
56
- - !ruby/object:Gem::Version
57
- hash: 3
58
- segments:
59
- - 0
60
- version: "0"
61
39
  prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
62
47
  name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
63
54
  type: :development
64
- version_requirements: *id003
65
- - !ruby/object:Gem::Dependency
66
- requirement: &id004 !ruby/object:Gem::Requirement
67
- none: false
68
- requirements:
69
- - - ">="
70
- - !ruby/object:Gem::Version
71
- hash: 3
72
- segments:
73
- - 0
74
- version: "0"
75
55
  prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
76
63
  name: appraisal
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
77
70
  type: :development
78
- version_requirements: *id004
79
- - !ruby/object:Gem::Dependency
80
- requirement: &id005 !ruby/object:Gem::Requirement
81
- none: false
82
- requirements:
83
- - - ">="
84
- - !ruby/object:Gem::Version
85
- hash: 3
86
- segments:
87
- - 0
88
- version: "0"
89
71
  prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
90
79
  name: pry
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
91
86
  type: :development
92
- version_requirements: *id005
93
- - !ruby/object:Gem::Dependency
94
- requirement: &id006 !ruby/object:Gem::Requirement
95
- none: false
96
- requirements:
97
- - - ">="
98
- - !ruby/object:Gem::Version
99
- hash: 3
100
- segments:
101
- - 0
102
- version: "0"
103
87
  prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
104
95
  name: pry-nav
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
105
102
  type: :development
106
- version_requirements: *id006
107
- - !ruby/object:Gem::Dependency
108
- requirement: &id007 !ruby/object:Gem::Requirement
109
- none: false
110
- requirements:
111
- - - ">="
112
- - !ruby/object:Gem::Version
113
- hash: 3
114
- segments:
115
- - 0
116
- version: "0"
117
103
  prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
118
111
  name: sqlite3
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
119
118
  type: :development
120
- version_requirements: *id007
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
127
+ name: mysql2
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
121
142
  description: Fast fuzzy string matching for rails
122
- email:
143
+ email:
123
144
  - julien.letessier@gmail.com
124
145
  executables: []
125
-
126
146
  extensions: []
127
-
128
147
  extra_rdoc_files: []
129
-
130
- files:
148
+ files:
131
149
  - .gitignore
132
150
  - .rspec
151
+ - .ruby-version
133
152
  - .travis.yml
134
153
  - Appraisals
135
154
  - Gemfile
@@ -158,41 +177,38 @@ files:
158
177
  - spec/fuzzily/trigram_spec.rb
159
178
  - spec/meta_spec.rb
160
179
  - spec/spec_helper.rb
161
- has_rdoc: true
162
- homepage: ""
180
+ homepage: ''
163
181
  licenses: []
164
-
165
182
  post_install_message:
166
183
  rdoc_options: []
167
-
168
- require_paths:
184
+ require_paths:
169
185
  - lib
170
- required_ruby_version: !ruby/object:Gem::Requirement
186
+ required_ruby_version: !ruby/object:Gem::Requirement
171
187
  none: false
172
- requirements:
173
- - - ">="
174
- - !ruby/object:Gem::Version
175
- hash: 3
176
- segments:
188
+ requirements:
189
+ - - ! '>='
190
+ - !ruby/object:Gem::Version
191
+ version: '0'
192
+ segments:
177
193
  - 0
178
- version: "0"
179
- required_rubygems_version: !ruby/object:Gem::Requirement
194
+ hash: 2813832546497713737
195
+ required_rubygems_version: !ruby/object:Gem::Requirement
180
196
  none: false
181
- requirements:
182
- - - ">="
183
- - !ruby/object:Gem::Version
184
- hash: 3
185
- segments:
197
+ requirements:
198
+ - - ! '>='
199
+ - !ruby/object:Gem::Version
200
+ version: '0'
201
+ segments:
186
202
  - 0
187
- version: "0"
203
+ hash: 2813832546497713737
188
204
  requirements: []
189
-
190
205
  rubyforge_project:
191
- rubygems_version: 1.3.9.5
206
+ rubygems_version: 1.8.23
192
207
  signing_key:
193
208
  specification_version: 3
194
- summary: A fast, trigram-based, database-backed fuzzy string search/match engine for Rails.
195
- test_files:
209
+ summary: A fast, trigram-based, database-backed fuzzy string search/match engine for
210
+ Rails.
211
+ test_files:
196
212
  - spec/fuzzily/migration_spec.rb
197
213
  - spec/fuzzily/model_spec.rb
198
214
  - spec/fuzzily/searchable_spec.rb