fuzzily 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +0 -1
- data/Gemfile.lock +46 -0
- data/lib/fuzzily/migration.rb +27 -24
- data/lib/fuzzily/model.rb +3 -2
- data/lib/fuzzily/searchable.rb +33 -4
- data/lib/fuzzily/trigram.rb +11 -13
- data/lib/fuzzily/version.rb +1 -1
- data/spec/fuzzily/searchable_spec.rb +27 -10
- data/spec/fuzzily/trigram_spec.rb +19 -4
- metadata +4 -3
data/.gitignore
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
fuzzily (0.0.2)
|
5
|
+
activerecord (~> 2.3)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activerecord (2.3.14)
|
11
|
+
activesupport (= 2.3.14)
|
12
|
+
activesupport (2.3.14)
|
13
|
+
appraisal (0.4.1)
|
14
|
+
bundler
|
15
|
+
rake
|
16
|
+
coderay (1.0.8)
|
17
|
+
diff-lcs (1.1.3)
|
18
|
+
method_source (0.8.1)
|
19
|
+
pry (0.9.10)
|
20
|
+
coderay (~> 1.0.5)
|
21
|
+
method_source (~> 0.8)
|
22
|
+
slop (~> 3.3.1)
|
23
|
+
pry-nav (0.2.2)
|
24
|
+
pry (~> 0.9.10)
|
25
|
+
rake (0.9.2.2)
|
26
|
+
rspec (2.11.0)
|
27
|
+
rspec-core (~> 2.11.0)
|
28
|
+
rspec-expectations (~> 2.11.0)
|
29
|
+
rspec-mocks (~> 2.11.0)
|
30
|
+
rspec-core (2.11.1)
|
31
|
+
rspec-expectations (2.11.3)
|
32
|
+
diff-lcs (~> 1.1.3)
|
33
|
+
rspec-mocks (2.11.3)
|
34
|
+
slop (3.3.3)
|
35
|
+
sqlite3 (1.3.6)
|
36
|
+
|
37
|
+
PLATFORMS
|
38
|
+
ruby
|
39
|
+
|
40
|
+
DEPENDENCIES
|
41
|
+
appraisal
|
42
|
+
fuzzily!
|
43
|
+
pry
|
44
|
+
pry-nav
|
45
|
+
rspec
|
46
|
+
sqlite3
|
data/lib/fuzzily/migration.rb
CHANGED
@@ -2,34 +2,37 @@ require 'active_record'
|
|
2
2
|
|
3
3
|
module Fuzzily
|
4
4
|
module Migration
|
5
|
+
def self.extended(by)
|
6
|
+
by.singleton_class.class_eval do
|
7
|
+
def trigrams_table_name=(custom_name)
|
8
|
+
@trigrams_table_name = custom_name
|
9
|
+
end
|
5
10
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
def trigrams_table_name
|
11
|
-
@trigrams_table_name ||= :trigrams
|
12
|
-
end
|
11
|
+
def trigrams_table_name
|
12
|
+
@trigrams_table_name ||= :trigrams
|
13
|
+
end
|
13
14
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
def up
|
16
|
+
create_table trigrams_table_name do |t|
|
17
|
+
t.string :trigram, :limit => 3
|
18
|
+
t.integer :score
|
19
|
+
t.integer :owner_id
|
20
|
+
t.string :owner_type
|
21
|
+
t.string :fuzzy_field
|
22
|
+
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
24
|
+
add_index trigrams_table_name,
|
25
|
+
[:owner_type, :fuzzy_field, :trigram, :owner_id, :score],
|
26
|
+
:name => :index_for_match
|
27
|
+
add_index trigrams_table_name,
|
28
|
+
[:owner_type, :owner_id],
|
29
|
+
:name => :index_by_owner
|
30
|
+
end
|
30
31
|
|
31
|
-
|
32
|
-
|
32
|
+
def down
|
33
|
+
drop_table trigrams_table_name
|
34
|
+
end
|
35
|
+
end
|
33
36
|
end
|
34
37
|
end
|
35
38
|
end
|
data/lib/fuzzily/model.rb
CHANGED
@@ -39,9 +39,10 @@ module Fuzzily
|
|
39
39
|
def matches_for(text, options = {})
|
40
40
|
options[:limit] ||= 10
|
41
41
|
self.
|
42
|
-
scoped(:select => 'owner_id, owner_type, SUM(score) AS
|
42
|
+
scoped(:select => 'owner_id, owner_type, SUM(score) AS total_score').
|
43
43
|
scoped(:group => :owner_id).
|
44
|
-
scoped(:order => '
|
44
|
+
scoped(:order => 'total_score DESC').
|
45
|
+
scoped(:limit => options[:limit]).
|
45
46
|
with_trigram(text.extend(String).trigrams).
|
46
47
|
map(&:owner)
|
47
48
|
end
|
data/lib/fuzzily/searchable.rb
CHANGED
@@ -24,7 +24,8 @@ module Fuzzily
|
|
24
24
|
:class_name => trigram_class_name,
|
25
25
|
:as => :owner,
|
26
26
|
:conditions => { :fuzzy_field => field.to_s },
|
27
|
-
:dependent => :destroy
|
27
|
+
:dependent => :destroy,
|
28
|
+
:autosave => true
|
28
29
|
|
29
30
|
singleton_class.send(:define_method,"find_by_fuzzy_#{field}".to_sym) do |*args|
|
30
31
|
case args.size
|
@@ -32,13 +33,41 @@ module Fuzzily
|
|
32
33
|
when 2 then pattern, options = args
|
33
34
|
else raise 'Wrong # of arguments'
|
34
35
|
end
|
35
|
-
|
36
|
+
|
37
|
+
trigram_class_name.constantize.
|
38
|
+
scoped(options).
|
39
|
+
for_model(self.name).
|
40
|
+
for_field(field.to_s).
|
41
|
+
matches_for(pattern)
|
42
|
+
end
|
43
|
+
|
44
|
+
singleton_class.send(:define_method,"bulk_update_fuzzy_#{field}".to_sym) do
|
45
|
+
trigram_class = trigram_class_name.constantize
|
46
|
+
|
47
|
+
self.scoped(:include => trigram_association).find_in_batches(:batch_size => 100) do |batch|
|
48
|
+
inserts = []
|
49
|
+
batch.each do |record|
|
50
|
+
record.send(field).extend(String).trigrams.each do |trigram|
|
51
|
+
inserts << sanitize_sql_array(['(?,?,?,?,?)', self.name, record.id, field.to_s, 1, trigram])
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
trigram_class.transaction do
|
56
|
+
batch.each { |record| record.send(trigram_association).delete_all }
|
57
|
+
trigram_class.connection.insert(%Q{
|
58
|
+
INSERT INTO `#{trigram_class.table_name}`
|
59
|
+
(`owner_type`, `owner_id`, `fuzzy_field`, `score`, `trigram`)
|
60
|
+
VALUES
|
61
|
+
#{inserts.join(", ")}
|
62
|
+
})
|
63
|
+
end
|
64
|
+
end
|
36
65
|
end
|
37
66
|
|
38
67
|
define_method update_trigrams_method do
|
39
|
-
self.send(trigram_association).
|
68
|
+
self.send(trigram_association).delete_all
|
40
69
|
self.send(field).extend(String).trigrams.each do |trigram|
|
41
|
-
self.send(trigram_association).create!(:score => 1, :trigram => trigram)
|
70
|
+
self.send(trigram_association).create!(:score => 1, :trigram => trigram, :owner_type => self.class.name)
|
42
71
|
end
|
43
72
|
end
|
44
73
|
|
data/lib/fuzzily/trigram.rb
CHANGED
@@ -1,25 +1,23 @@
|
|
1
|
-
require '
|
1
|
+
require 'active_support/core_ext/string/multibyte'
|
2
2
|
|
3
3
|
module Fuzzily
|
4
4
|
module String
|
5
5
|
def trigrams
|
6
|
-
|
7
|
-
|
8
|
-
end.flatten.uniq
|
6
|
+
normalized = self.normalize
|
7
|
+
(0..(normalized.length - 3)).map { |index| normalized[index,3] }.uniq
|
9
8
|
end
|
10
9
|
|
11
|
-
|
10
|
+
protected
|
12
11
|
|
13
12
|
# Remove accents, downcase, replace spaces and word start with '*',
|
14
13
|
# return list of normalized words
|
15
|
-
def
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
}
|
14
|
+
def normalize
|
15
|
+
# Iconv.iconv('ascii//translit//ignore', 'utf-8', self).first.
|
16
|
+
ActiveSupport::Multibyte::Chars.new(self).
|
17
|
+
mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').downcase.to_s.
|
18
|
+
gsub(/\W/,' ').
|
19
|
+
gsub(/\s+/,'*').
|
20
|
+
gsub(/^/,'**')
|
23
21
|
end
|
24
22
|
end
|
25
23
|
end
|
data/lib/fuzzily/version.rb
CHANGED
@@ -10,10 +10,11 @@ describe Fuzzily::Searchable do
|
|
10
10
|
before(:each) { prepare_owners_table }
|
11
11
|
|
12
12
|
subject do
|
13
|
-
|
14
|
-
|
15
|
-
self
|
13
|
+
silence_warnings do
|
14
|
+
Stuff = Class.new(ActiveRecord::Base)
|
16
15
|
end
|
16
|
+
def Stuff.name ; 'Stuff' ; end
|
17
|
+
Stuff
|
17
18
|
end
|
18
19
|
|
19
20
|
describe '.fuzzily_searchable' do
|
@@ -39,14 +40,14 @@ describe Fuzzily::Searchable do
|
|
39
40
|
end
|
40
41
|
|
41
42
|
describe '(callbacks)' do
|
43
|
+
before { subject.fuzzily_searchable :name }
|
44
|
+
|
42
45
|
it 'generates trigram records on creation' do
|
43
|
-
subject.fuzzily_searchable :name
|
44
46
|
subject.create(:name => 'Paris')
|
45
47
|
subject.last.trigrams_for_name.should_not be_empty
|
46
48
|
end
|
47
49
|
|
48
50
|
it 'generates the correct trigrams' do
|
49
|
-
subject.fuzzily_searchable :name
|
50
51
|
record = subject.create(:name => 'FOO')
|
51
52
|
Trigram.first.trigram.should == '**f'
|
52
53
|
Trigram.first.owner_id.should == record.id
|
@@ -54,19 +55,35 @@ describe Fuzzily::Searchable do
|
|
54
55
|
end
|
55
56
|
|
56
57
|
it 'updates all trigram records on save' do
|
57
|
-
subject.fuzzily_searchable :name
|
58
58
|
subject.create(:name => 'Paris')
|
59
59
|
subject.first.update_attribute :name, 'Rome'
|
60
60
|
Trigram.all.map(&:trigram).should =~ %w(**r *ro rom ome)
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
|
-
describe '#
|
65
|
-
it '
|
64
|
+
describe '#update_fuzzy_<field>!' do
|
65
|
+
it 're-creates trigrams' do
|
66
|
+
subject.fuzzily_searchable :name
|
67
|
+
subject.create(:name => 'Paris')
|
68
|
+
old_ids = Trigram.all.map(&:id)
|
69
|
+
subject.last.update_fuzzy_name!
|
70
|
+
(old_ids & Trigram.all.map(&:id)).should be_empty
|
71
|
+
end
|
66
72
|
end
|
67
73
|
|
68
|
-
|
69
|
-
|
74
|
+
context '(integrationg test)' do
|
75
|
+
describe '#find_by_fuzzy_<field>' do
|
76
|
+
it 'returns records' do
|
77
|
+
subject.fuzzily_searchable :name
|
78
|
+
@paris = subject.create(:name => 'Paris')
|
79
|
+
@palma = subject.create(:name => 'Palma de Majorca')
|
80
|
+
@palmyre = subject.create(:name => 'La Palmyre')
|
81
|
+
|
82
|
+
subject.find_by_fuzzy_name('Piris').should_not be_empty
|
83
|
+
subject.find_by_fuzzy_name('Piris').should =~ [@paris, @palma]
|
84
|
+
subject.find_by_fuzzy_name('Paradise').should =~ [@paris, @palma, @palmyre]
|
85
|
+
end
|
86
|
+
end
|
70
87
|
end
|
71
88
|
|
72
89
|
end
|
@@ -1,8 +1,23 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Fuzzily::String do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
def result(string)
|
5
|
+
string.extend(described_class).trigrams
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'splits strings into trigrams' do
|
9
|
+
result('Paris').should == %w(**p *pa par ari ris)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'removes accents' do
|
13
|
+
result('Montélimar').should == %w(**m *mo mon ont nte tel eli lim ima mar)
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'handles multi word strings' do
|
17
|
+
result('Le Mans').should == %w(**l *le le* e*m *ma man ans)
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'removes symbols' do
|
21
|
+
result('Besse-en-Chandesse').should == %w(**b *be bes ess sse se* e*e *en en* n*c *ch cha han and nde des)
|
22
|
+
end
|
8
23
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fuzzily
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Julien Letessier
|
@@ -116,6 +116,7 @@ files:
|
|
116
116
|
- .gitignore
|
117
117
|
- .rspec
|
118
118
|
- Gemfile
|
119
|
+
- Gemfile.lock
|
119
120
|
- LICENSE.txt
|
120
121
|
- README.md
|
121
122
|
- Rakefile
|