fuzzily 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +0 -1
- data/Gemfile.lock +46 -0
- data/lib/fuzzily/migration.rb +27 -24
- data/lib/fuzzily/model.rb +3 -2
- data/lib/fuzzily/searchable.rb +33 -4
- data/lib/fuzzily/trigram.rb +11 -13
- data/lib/fuzzily/version.rb +1 -1
- data/spec/fuzzily/searchable_spec.rb +27 -10
- data/spec/fuzzily/trigram_spec.rb +19 -4
- metadata +4 -3
data/.gitignore
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
fuzzily (0.0.2)
|
5
|
+
activerecord (~> 2.3)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activerecord (2.3.14)
|
11
|
+
activesupport (= 2.3.14)
|
12
|
+
activesupport (2.3.14)
|
13
|
+
appraisal (0.4.1)
|
14
|
+
bundler
|
15
|
+
rake
|
16
|
+
coderay (1.0.8)
|
17
|
+
diff-lcs (1.1.3)
|
18
|
+
method_source (0.8.1)
|
19
|
+
pry (0.9.10)
|
20
|
+
coderay (~> 1.0.5)
|
21
|
+
method_source (~> 0.8)
|
22
|
+
slop (~> 3.3.1)
|
23
|
+
pry-nav (0.2.2)
|
24
|
+
pry (~> 0.9.10)
|
25
|
+
rake (0.9.2.2)
|
26
|
+
rspec (2.11.0)
|
27
|
+
rspec-core (~> 2.11.0)
|
28
|
+
rspec-expectations (~> 2.11.0)
|
29
|
+
rspec-mocks (~> 2.11.0)
|
30
|
+
rspec-core (2.11.1)
|
31
|
+
rspec-expectations (2.11.3)
|
32
|
+
diff-lcs (~> 1.1.3)
|
33
|
+
rspec-mocks (2.11.3)
|
34
|
+
slop (3.3.3)
|
35
|
+
sqlite3 (1.3.6)
|
36
|
+
|
37
|
+
PLATFORMS
|
38
|
+
ruby
|
39
|
+
|
40
|
+
DEPENDENCIES
|
41
|
+
appraisal
|
42
|
+
fuzzily!
|
43
|
+
pry
|
44
|
+
pry-nav
|
45
|
+
rspec
|
46
|
+
sqlite3
|
data/lib/fuzzily/migration.rb
CHANGED
@@ -2,34 +2,37 @@ require 'active_record'
|
|
2
2
|
|
3
3
|
module Fuzzily
|
4
4
|
module Migration
|
5
|
+
def self.extended(by)
|
6
|
+
by.singleton_class.class_eval do
|
7
|
+
def trigrams_table_name=(custom_name)
|
8
|
+
@trigrams_table_name = custom_name
|
9
|
+
end
|
5
10
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
def trigrams_table_name
|
11
|
-
@trigrams_table_name ||= :trigrams
|
12
|
-
end
|
11
|
+
def trigrams_table_name
|
12
|
+
@trigrams_table_name ||= :trigrams
|
13
|
+
end
|
13
14
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
def up
|
16
|
+
create_table trigrams_table_name do |t|
|
17
|
+
t.string :trigram, :limit => 3
|
18
|
+
t.integer :score
|
19
|
+
t.integer :owner_id
|
20
|
+
t.string :owner_type
|
21
|
+
t.string :fuzzy_field
|
22
|
+
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
24
|
+
add_index trigrams_table_name,
|
25
|
+
[:owner_type, :fuzzy_field, :trigram, :owner_id, :score],
|
26
|
+
:name => :index_for_match
|
27
|
+
add_index trigrams_table_name,
|
28
|
+
[:owner_type, :owner_id],
|
29
|
+
:name => :index_by_owner
|
30
|
+
end
|
30
31
|
|
31
|
-
|
32
|
-
|
32
|
+
def down
|
33
|
+
drop_table trigrams_table_name
|
34
|
+
end
|
35
|
+
end
|
33
36
|
end
|
34
37
|
end
|
35
38
|
end
|
data/lib/fuzzily/model.rb
CHANGED
@@ -39,9 +39,10 @@ module Fuzzily
|
|
39
39
|
def matches_for(text, options = {})
|
40
40
|
options[:limit] ||= 10
|
41
41
|
self.
|
42
|
-
scoped(:select => 'owner_id, owner_type, SUM(score) AS
|
42
|
+
scoped(:select => 'owner_id, owner_type, SUM(score) AS total_score').
|
43
43
|
scoped(:group => :owner_id).
|
44
|
-
scoped(:order => '
|
44
|
+
scoped(:order => 'total_score DESC').
|
45
|
+
scoped(:limit => options[:limit]).
|
45
46
|
with_trigram(text.extend(String).trigrams).
|
46
47
|
map(&:owner)
|
47
48
|
end
|
data/lib/fuzzily/searchable.rb
CHANGED
@@ -24,7 +24,8 @@ module Fuzzily
|
|
24
24
|
:class_name => trigram_class_name,
|
25
25
|
:as => :owner,
|
26
26
|
:conditions => { :fuzzy_field => field.to_s },
|
27
|
-
:dependent => :destroy
|
27
|
+
:dependent => :destroy,
|
28
|
+
:autosave => true
|
28
29
|
|
29
30
|
singleton_class.send(:define_method,"find_by_fuzzy_#{field}".to_sym) do |*args|
|
30
31
|
case args.size
|
@@ -32,13 +33,41 @@ module Fuzzily
|
|
32
33
|
when 2 then pattern, options = args
|
33
34
|
else raise 'Wrong # of arguments'
|
34
35
|
end
|
35
|
-
|
36
|
+
|
37
|
+
trigram_class_name.constantize.
|
38
|
+
scoped(options).
|
39
|
+
for_model(self.name).
|
40
|
+
for_field(field.to_s).
|
41
|
+
matches_for(pattern)
|
42
|
+
end
|
43
|
+
|
44
|
+
singleton_class.send(:define_method,"bulk_update_fuzzy_#{field}".to_sym) do
|
45
|
+
trigram_class = trigram_class_name.constantize
|
46
|
+
|
47
|
+
self.scoped(:include => trigram_association).find_in_batches(:batch_size => 100) do |batch|
|
48
|
+
inserts = []
|
49
|
+
batch.each do |record|
|
50
|
+
record.send(field).extend(String).trigrams.each do |trigram|
|
51
|
+
inserts << sanitize_sql_array(['(?,?,?,?,?)', self.name, record.id, field.to_s, 1, trigram])
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
trigram_class.transaction do
|
56
|
+
batch.each { |record| record.send(trigram_association).delete_all }
|
57
|
+
trigram_class.connection.insert(%Q{
|
58
|
+
INSERT INTO `#{trigram_class.table_name}`
|
59
|
+
(`owner_type`, `owner_id`, `fuzzy_field`, `score`, `trigram`)
|
60
|
+
VALUES
|
61
|
+
#{inserts.join(", ")}
|
62
|
+
})
|
63
|
+
end
|
64
|
+
end
|
36
65
|
end
|
37
66
|
|
38
67
|
define_method update_trigrams_method do
|
39
|
-
self.send(trigram_association).
|
68
|
+
self.send(trigram_association).delete_all
|
40
69
|
self.send(field).extend(String).trigrams.each do |trigram|
|
41
|
-
self.send(trigram_association).create!(:score => 1, :trigram => trigram)
|
70
|
+
self.send(trigram_association).create!(:score => 1, :trigram => trigram, :owner_type => self.class.name)
|
42
71
|
end
|
43
72
|
end
|
44
73
|
|
data/lib/fuzzily/trigram.rb
CHANGED
@@ -1,25 +1,23 @@
|
|
1
|
-
require '
|
1
|
+
require 'active_support/core_ext/string/multibyte'
|
2
2
|
|
3
3
|
module Fuzzily
|
4
4
|
module String
|
5
5
|
def trigrams
|
6
|
-
|
7
|
-
|
8
|
-
end.flatten.uniq
|
6
|
+
normalized = self.normalize
|
7
|
+
(0..(normalized.length - 3)).map { |index| normalized[index,3] }.uniq
|
9
8
|
end
|
10
9
|
|
11
|
-
|
10
|
+
protected
|
12
11
|
|
13
12
|
# Remove accents, downcase, replace spaces and word start with '*',
|
14
13
|
# return list of normalized words
|
15
|
-
def
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
}
|
14
|
+
def normalize
|
15
|
+
# Iconv.iconv('ascii//translit//ignore', 'utf-8', self).first.
|
16
|
+
ActiveSupport::Multibyte::Chars.new(self).
|
17
|
+
mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').downcase.to_s.
|
18
|
+
gsub(/\W/,' ').
|
19
|
+
gsub(/\s+/,'*').
|
20
|
+
gsub(/^/,'**')
|
23
21
|
end
|
24
22
|
end
|
25
23
|
end
|
data/lib/fuzzily/version.rb
CHANGED
@@ -10,10 +10,11 @@ describe Fuzzily::Searchable do
|
|
10
10
|
before(:each) { prepare_owners_table }
|
11
11
|
|
12
12
|
subject do
|
13
|
-
|
14
|
-
|
15
|
-
self
|
13
|
+
silence_warnings do
|
14
|
+
Stuff = Class.new(ActiveRecord::Base)
|
16
15
|
end
|
16
|
+
def Stuff.name ; 'Stuff' ; end
|
17
|
+
Stuff
|
17
18
|
end
|
18
19
|
|
19
20
|
describe '.fuzzily_searchable' do
|
@@ -39,14 +40,14 @@ describe Fuzzily::Searchable do
|
|
39
40
|
end
|
40
41
|
|
41
42
|
describe '(callbacks)' do
|
43
|
+
before { subject.fuzzily_searchable :name }
|
44
|
+
|
42
45
|
it 'generates trigram records on creation' do
|
43
|
-
subject.fuzzily_searchable :name
|
44
46
|
subject.create(:name => 'Paris')
|
45
47
|
subject.last.trigrams_for_name.should_not be_empty
|
46
48
|
end
|
47
49
|
|
48
50
|
it 'generates the correct trigrams' do
|
49
|
-
subject.fuzzily_searchable :name
|
50
51
|
record = subject.create(:name => 'FOO')
|
51
52
|
Trigram.first.trigram.should == '**f'
|
52
53
|
Trigram.first.owner_id.should == record.id
|
@@ -54,19 +55,35 @@ describe Fuzzily::Searchable do
|
|
54
55
|
end
|
55
56
|
|
56
57
|
it 'updates all trigram records on save' do
|
57
|
-
subject.fuzzily_searchable :name
|
58
58
|
subject.create(:name => 'Paris')
|
59
59
|
subject.first.update_attribute :name, 'Rome'
|
60
60
|
Trigram.all.map(&:trigram).should =~ %w(**r *ro rom ome)
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
|
-
describe '#
|
65
|
-
it '
|
64
|
+
describe '#update_fuzzy_<field>!' do
|
65
|
+
it 're-creates trigrams' do
|
66
|
+
subject.fuzzily_searchable :name
|
67
|
+
subject.create(:name => 'Paris')
|
68
|
+
old_ids = Trigram.all.map(&:id)
|
69
|
+
subject.last.update_fuzzy_name!
|
70
|
+
(old_ids & Trigram.all.map(&:id)).should be_empty
|
71
|
+
end
|
66
72
|
end
|
67
73
|
|
68
|
-
|
69
|
-
|
74
|
+
context '(integrationg test)' do
|
75
|
+
describe '#find_by_fuzzy_<field>' do
|
76
|
+
it 'returns records' do
|
77
|
+
subject.fuzzily_searchable :name
|
78
|
+
@paris = subject.create(:name => 'Paris')
|
79
|
+
@palma = subject.create(:name => 'Palma de Majorca')
|
80
|
+
@palmyre = subject.create(:name => 'La Palmyre')
|
81
|
+
|
82
|
+
subject.find_by_fuzzy_name('Piris').should_not be_empty
|
83
|
+
subject.find_by_fuzzy_name('Piris').should =~ [@paris, @palma]
|
84
|
+
subject.find_by_fuzzy_name('Paradise').should =~ [@paris, @palma, @palmyre]
|
85
|
+
end
|
86
|
+
end
|
70
87
|
end
|
71
88
|
|
72
89
|
end
|
@@ -1,8 +1,23 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Fuzzily::String do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
def result(string)
|
5
|
+
string.extend(described_class).trigrams
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'splits strings into trigrams' do
|
9
|
+
result('Paris').should == %w(**p *pa par ari ris)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'removes accents' do
|
13
|
+
result('Montélimar').should == %w(**m *mo mon ont nte tel eli lim ima mar)
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'handles multi word strings' do
|
17
|
+
result('Le Mans').should == %w(**l *le le* e*m *ma man ans)
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'removes symbols' do
|
21
|
+
result('Besse-en-Chandesse').should == %w(**b *be bes ess sse se* e*e *en en* n*c *ch cha han and nde des)
|
22
|
+
end
|
8
23
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fuzzily
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Julien Letessier
|
@@ -116,6 +116,7 @@ files:
|
|
116
116
|
- .gitignore
|
117
117
|
- .rspec
|
118
118
|
- Gemfile
|
119
|
+
- Gemfile.lock
|
119
120
|
- LICENSE.txt
|
120
121
|
- README.md
|
121
122
|
- Rakefile
|