fuzzily 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -3,7 +3,6 @@
3
3
  .bundle
4
4
  .config
5
5
  .yardoc
6
- Gemfile.lock
7
6
  InstalledFiles
8
7
  _yardoc
9
8
  coverage
data/Gemfile.lock ADDED
@@ -0,0 +1,46 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ fuzzily (0.0.2)
5
+ activerecord (~> 2.3)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ activerecord (2.3.14)
11
+ activesupport (= 2.3.14)
12
+ activesupport (2.3.14)
13
+ appraisal (0.4.1)
14
+ bundler
15
+ rake
16
+ coderay (1.0.8)
17
+ diff-lcs (1.1.3)
18
+ method_source (0.8.1)
19
+ pry (0.9.10)
20
+ coderay (~> 1.0.5)
21
+ method_source (~> 0.8)
22
+ slop (~> 3.3.1)
23
+ pry-nav (0.2.2)
24
+ pry (~> 0.9.10)
25
+ rake (0.9.2.2)
26
+ rspec (2.11.0)
27
+ rspec-core (~> 2.11.0)
28
+ rspec-expectations (~> 2.11.0)
29
+ rspec-mocks (~> 2.11.0)
30
+ rspec-core (2.11.1)
31
+ rspec-expectations (2.11.3)
32
+ diff-lcs (~> 1.1.3)
33
+ rspec-mocks (2.11.3)
34
+ slop (3.3.3)
35
+ sqlite3 (1.3.6)
36
+
37
+ PLATFORMS
38
+ ruby
39
+
40
+ DEPENDENCIES
41
+ appraisal
42
+ fuzzily!
43
+ pry
44
+ pry-nav
45
+ rspec
46
+ sqlite3
@@ -2,34 +2,37 @@ require 'active_record'
2
2
 
3
3
  module Fuzzily
4
4
  module Migration
5
+ def self.extended(by)
6
+ by.singleton_class.class_eval do
7
+ def trigrams_table_name=(custom_name)
8
+ @trigrams_table_name = custom_name
9
+ end
5
10
 
6
- def trigrams_table_name=(custom_name)
7
- @trigrams_table_name = custom_name
8
- end
9
-
10
- def trigrams_table_name
11
- @trigrams_table_name ||= :trigrams
12
- end
11
+ def trigrams_table_name
12
+ @trigrams_table_name ||= :trigrams
13
+ end
13
14
 
14
- def up
15
- create_table trigrams_table_name do |t|
16
- t.string :trigram, :limit => 3
17
- t.integer :score
18
- t.integer :owner_id
19
- t.string :owner_type
20
- t.string :fuzzy_field
21
- end
15
+ def up
16
+ create_table trigrams_table_name do |t|
17
+ t.string :trigram, :limit => 3
18
+ t.integer :score
19
+ t.integer :owner_id
20
+ t.string :owner_type
21
+ t.string :fuzzy_field
22
+ end
22
23
 
23
- add_index trigrams_table_name,
24
- [:owner_type, :fuzzy_field, :trigram, :owner_id, :score],
25
- :name => :index_for_match
26
- add_index trigrams_table_name,
27
- [:owner_type, :owner_id],
28
- :name => :index_by_owner
29
- end
24
+ add_index trigrams_table_name,
25
+ [:owner_type, :fuzzy_field, :trigram, :owner_id, :score],
26
+ :name => :index_for_match
27
+ add_index trigrams_table_name,
28
+ [:owner_type, :owner_id],
29
+ :name => :index_by_owner
30
+ end
30
31
 
31
- def down
32
- drop_table trigrams_table_name
32
+ def down
33
+ drop_table trigrams_table_name
34
+ end
35
+ end
33
36
  end
34
37
  end
35
38
  end
data/lib/fuzzily/model.rb CHANGED
@@ -39,9 +39,10 @@ module Fuzzily
39
39
  def matches_for(text, options = {})
40
40
  options[:limit] ||= 10
41
41
  self.
42
- scoped(:select => 'owner_id, owner_type, SUM(score) AS score').
42
+ scoped(:select => 'owner_id, owner_type, SUM(score) AS total_score').
43
43
  scoped(:group => :owner_id).
44
- scoped(:order => 'score DESC', :limit => options[:limit]).
44
+ scoped(:order => 'total_score DESC').
45
+ scoped(:limit => options[:limit]).
45
46
  with_trigram(text.extend(String).trigrams).
46
47
  map(&:owner)
47
48
  end
@@ -24,7 +24,8 @@ module Fuzzily
24
24
  :class_name => trigram_class_name,
25
25
  :as => :owner,
26
26
  :conditions => { :fuzzy_field => field.to_s },
27
- :dependent => :destroy
27
+ :dependent => :destroy,
28
+ :autosave => true
28
29
 
29
30
  singleton_class.send(:define_method,"find_by_fuzzy_#{field}".to_sym) do |*args|
30
31
  case args.size
@@ -32,13 +33,41 @@ module Fuzzily
32
33
  when 2 then pattern, options = args
33
34
  else raise 'Wrong # of arguments'
34
35
  end
35
- Trigram.scoped(options).for_model(self.name).for_field(field).matches(pattern)
36
+
37
+ trigram_class_name.constantize.
38
+ scoped(options).
39
+ for_model(self.name).
40
+ for_field(field.to_s).
41
+ matches_for(pattern)
42
+ end
43
+
44
+ singleton_class.send(:define_method,"bulk_update_fuzzy_#{field}".to_sym) do
45
+ trigram_class = trigram_class_name.constantize
46
+
47
+ self.scoped(:include => trigram_association).find_in_batches(:batch_size => 100) do |batch|
48
+ inserts = []
49
+ batch.each do |record|
50
+ record.send(field).extend(String).trigrams.each do |trigram|
51
+ inserts << sanitize_sql_array(['(?,?,?,?,?)', self.name, record.id, field.to_s, 1, trigram])
52
+ end
53
+ end
54
+
55
+ trigram_class.transaction do
56
+ batch.each { |record| record.send(trigram_association).delete_all }
57
+ trigram_class.connection.insert(%Q{
58
+ INSERT INTO `#{trigram_class.table_name}`
59
+ (`owner_type`, `owner_id`, `fuzzy_field`, `score`, `trigram`)
60
+ VALUES
61
+ #{inserts.join(", ")}
62
+ })
63
+ end
64
+ end
36
65
  end
37
66
 
38
67
  define_method update_trigrams_method do
39
- self.send(trigram_association).destroy_all
68
+ self.send(trigram_association).delete_all
40
69
  self.send(field).extend(String).trigrams.each do |trigram|
41
- self.send(trigram_association).create!(:score => 1, :trigram => trigram)
70
+ self.send(trigram_association).create!(:score => 1, :trigram => trigram, :owner_type => self.class.name)
42
71
  end
43
72
  end
44
73
 
@@ -1,25 +1,23 @@
1
- require 'iconv'
1
+ require 'active_support/core_ext/string/multibyte'
2
2
 
3
3
  module Fuzzily
4
4
  module String
5
5
  def trigrams
6
- normalized_words.map do |word|
7
- (0..(word.length - 3)).map { |index| word[index,3] }
8
- end.flatten.uniq
6
+ normalized = self.normalize
7
+ (0..(normalized.length - 3)).map { |index| normalized[index,3] }.uniq
9
8
  end
10
9
 
11
- private
10
+ protected
12
11
 
13
12
  # Remove accents, downcase, replace spaces and word start with '*',
14
13
  # return list of normalized words
15
- def normalized_words
16
- self.split(/\s+/).map { |word|
17
- Iconv.iconv('ascii//translit//ignore', 'utf-8', word).first.downcase.gsub(/\W/,'')
18
- }.
19
- delete_if(&:empty?).
20
- map { |word|
21
- "**#{word}"
22
- }
14
+ def normalize
15
+ # Iconv.iconv('ascii//translit//ignore', 'utf-8', self).first.
16
+ ActiveSupport::Multibyte::Chars.new(self).
17
+ mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').downcase.to_s.
18
+ gsub(/\W/,' ').
19
+ gsub(/\s+/,'*').
20
+ gsub(/^/,'**')
23
21
  end
24
22
  end
25
23
  end
@@ -1,3 +1,3 @@
1
1
  module Fuzzily
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -10,10 +10,11 @@ describe Fuzzily::Searchable do
10
10
  before(:each) { prepare_owners_table }
11
11
 
12
12
  subject do
13
- Stuff.clone.class_eval do
14
- def self.name ; 'Stuff' ; end
15
- self
13
+ silence_warnings do
14
+ Stuff = Class.new(ActiveRecord::Base)
16
15
  end
16
+ def Stuff.name ; 'Stuff' ; end
17
+ Stuff
17
18
  end
18
19
 
19
20
  describe '.fuzzily_searchable' do
@@ -39,14 +40,14 @@ describe Fuzzily::Searchable do
39
40
  end
40
41
 
41
42
  describe '(callbacks)' do
43
+ before { subject.fuzzily_searchable :name }
44
+
42
45
  it 'generates trigram records on creation' do
43
- subject.fuzzily_searchable :name
44
46
  subject.create(:name => 'Paris')
45
47
  subject.last.trigrams_for_name.should_not be_empty
46
48
  end
47
49
 
48
50
  it 'generates the correct trigrams' do
49
- subject.fuzzily_searchable :name
50
51
  record = subject.create(:name => 'FOO')
51
52
  Trigram.first.trigram.should == '**f'
52
53
  Trigram.first.owner_id.should == record.id
@@ -54,19 +55,35 @@ describe Fuzzily::Searchable do
54
55
  end
55
56
 
56
57
  it 'updates all trigram records on save' do
57
- subject.fuzzily_searchable :name
58
58
  subject.create(:name => 'Paris')
59
59
  subject.first.update_attribute :name, 'Rome'
60
60
  Trigram.all.map(&:trigram).should =~ %w(**r *ro rom ome)
61
61
  end
62
62
  end
63
63
 
64
- describe '#find_by_fuzzy_<field>' do
65
- it 'works'
64
+ describe '#update_fuzzy_<field>!' do
65
+ it 're-creates trigrams' do
66
+ subject.fuzzily_searchable :name
67
+ subject.create(:name => 'Paris')
68
+ old_ids = Trigram.all.map(&:id)
69
+ subject.last.update_fuzzy_name!
70
+ (old_ids & Trigram.all.map(&:id)).should be_empty
71
+ end
66
72
  end
67
73
 
68
- describe '#update_fuzzy_<field>!' do
69
- it 'works'
74
+ context '(integrationg test)' do
75
+ describe '#find_by_fuzzy_<field>' do
76
+ it 'returns records' do
77
+ subject.fuzzily_searchable :name
78
+ @paris = subject.create(:name => 'Paris')
79
+ @palma = subject.create(:name => 'Palma de Majorca')
80
+ @palmyre = subject.create(:name => 'La Palmyre')
81
+
82
+ subject.find_by_fuzzy_name('Piris').should_not be_empty
83
+ subject.find_by_fuzzy_name('Piris').should =~ [@paris, @palma]
84
+ subject.find_by_fuzzy_name('Paradise').should =~ [@paris, @palma, @palmyre]
85
+ end
86
+ end
70
87
  end
71
88
 
72
89
  end
@@ -1,8 +1,23 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Fuzzily::String do
4
- it 'splits strings into trigrams'
5
- it 'removes accents'
6
- it 'removes symbols'
7
- it 'handles multi word strings'
4
+ def result(string)
5
+ string.extend(described_class).trigrams
6
+ end
7
+
8
+ it 'splits strings into trigrams' do
9
+ result('Paris').should == %w(**p *pa par ari ris)
10
+ end
11
+
12
+ it 'removes accents' do
13
+ result('Montélimar').should == %w(**m *mo mon ont nte tel eli lim ima mar)
14
+ end
15
+
16
+ it 'handles multi word strings' do
17
+ result('Le Mans').should == %w(**l *le le* e*m *ma man ans)
18
+ end
19
+
20
+ it 'removes symbols' do
21
+ result('Besse-en-Chandesse').should == %w(**b *be bes ess sse se* e*e *en en* n*c *ch cha han and nde des)
22
+ end
8
23
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fuzzily
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 2
10
+ version: 0.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Julien Letessier
@@ -116,6 +116,7 @@ files:
116
116
  - .gitignore
117
117
  - .rspec
118
118
  - Gemfile
119
+ - Gemfile.lock
119
120
  - LICENSE.txt
120
121
  - README.md
121
122
  - Rakefile