dusen 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,8 +8,9 @@ module Dusen
8
8
 
9
9
  belongs_to :source, :polymorphic => true, :inverse_of => :search_text_record
10
10
 
11
- def update_words!(words)
12
- update_attributes!(:words => words, :stale => false)
11
+ def update_words!(text)
12
+ text = Util.normalize_word_boundaries(text)
13
+ update_attributes!(:words => text, :stale => false)
13
14
  end
14
15
 
15
16
  def invalidate!
@@ -41,18 +42,19 @@ module Dusen
41
42
  true
42
43
  end
43
44
 
44
- def self.match(model, words)
45
+ def self.match(model, phrases)
45
46
  synchronize_model(model) if model.search_text?
46
47
  Dusen::Util.append_scope_conditions(
47
48
  model,
48
- :id => matching_source_ids(model, words)
49
+ :id => matching_source_ids(model, phrases)
49
50
  )
50
51
  end
51
52
 
52
- def self.matching_source_ids(model, words)
53
+ def self.matching_source_ids(model, phrases)
54
+ phrases = phrases.collect { |phrase| Util.normalize_word_boundaries(phrase) }
53
55
  conditions = [
54
56
  'MATCH (words) AGAINST (? IN BOOLEAN MODE)',
55
- Dusen::Util.boolean_fulltext_query(words)
57
+ Dusen::Util.boolean_fulltext_query(phrases)
56
58
  ]
57
59
  matching_texts = Dusen::Util.append_scope_conditions(for_model(model), conditions)
58
60
  Dusen::Util.collect_column(matching_texts, :source_id)
data/lib/dusen/parser.rb CHANGED
@@ -3,7 +3,7 @@
3
3
  module Dusen
4
4
  class Parser
5
5
 
6
- WESTERNISH_WORD_CHARACTER = '\\w\\-\\.@_ÄÖÜäöüß' # this is wrong on so many levels
6
+ WESTERNISH_WORD_CHARACTER = '\\w\\-\\.;@_ÄÖÜäöüß' # this is wrong on so many levels
7
7
  TEXT_QUERY = /(?:"([^"]+)"|([#{WESTERNISH_WORD_CHARACTER}]+))/
8
8
  FIELD_QUERY = /(\w+)\:#{TEXT_QUERY}/
9
9
 
data/lib/dusen/util.rb CHANGED
@@ -117,5 +117,10 @@ module Dusen
117
117
  # ::ActiveRecord::Base.connection.select_values(query).collect(&:to_i)
118
118
  #end
119
119
 
120
+ def normalize_word_boundaries(text)
121
+ unwanted_mysql_boundary = /[\.;]/
122
+ text.gsub(unwanted_mysql_boundary, '')
123
+ end
124
+
120
125
  end
121
126
  end
data/lib/dusen/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  module Dusen
4
- VERSION = '0.4.2'
4
+ VERSION = '0.4.3'
5
5
  end
@@ -2,5 +2,5 @@ test:
2
2
  adapter: mysql2
3
3
  host: localhost
4
4
  username: root
5
- password: junior
5
+ password: foobar
6
6
  database: dusen_test
@@ -2,5 +2,5 @@ test:
2
2
  adapter: mysql2
3
3
  host: localhost
4
4
  username: root
5
- password: junior
5
+ password: foobar
6
6
  database: dusen_test
@@ -2,5 +2,5 @@ test:
2
2
  adapter: mysql2
3
3
  host: localhost
4
4
  username: root
5
- password: junior
5
+ password: foobar
6
6
  database: dusen_test
@@ -56,6 +56,18 @@ shared_examples_for 'model with search syntax' do
56
56
  subject.search('Abraham').to_a.should == [match]
57
57
  end
58
58
 
59
+ it 'should find words where one letter is separated from other letters by a period' do
60
+ match = subject.create!(:name => 'E.ONNNEN')
61
+ subject.search('E.ONNNEN').to_a.should == [match]
62
+ end
63
+
64
+ it 'should find words where one letter is separated from other letters by a period' do
65
+ match = subject.create!(:name => 'E;ONNNEN')
66
+ subject.search('E;ONNNEN')
67
+ # debugger
68
+ subject.search('E;ONNNEN').to_a.should == [match]
69
+ end
70
+
59
71
  end
60
72
 
61
73
  describe '.search_syntax' do
@@ -18,4 +18,18 @@ describe Dusen::Util do
18
18
 
19
19
  end
20
20
 
21
+ describe '#normalize_word_boundaries' do
22
+
23
+ it 'should remove characters that MySQL would mistakenly consider a word boundary' do
24
+ Dusen::Util.normalize_word_boundaries("E.ON Bayern").should == 'EON Bayern'
25
+ Dusen::Util.normalize_word_boundaries("E.ON E.ON").should == 'EON EON'
26
+ Dusen::Util.normalize_word_boundaries("E;ON").should == 'EON'
27
+ end
28
+
29
+ it 'should remove characters that MySQL would mistakenly consider a word boundary' do
30
+ Dusen::Util.normalize_word_boundaries("Foobar Raboof").should == 'Foobar Raboof'
31
+ end
32
+
33
+ end
34
+
21
35
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dusen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 11
4
+ hash: 9
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 4
9
- - 2
10
- version: 0.4.2
9
+ - 3
10
+ version: 0.4.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Henning Koch
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-01-08 00:00:00 +01:00
19
- default_executable:
18
+ date: 2013-01-14 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: rails
@@ -133,7 +132,6 @@ files:
133
132
  - spec/shared/spec/dusen/parser_spec.rb
134
133
  - spec/shared/spec/dusen/query_spec.rb
135
134
  - spec/shared/spec/dusen/util_spec.rb
136
- has_rdoc: true
137
135
  homepage: https://github.com/makandra/dusen
138
136
  licenses: []
139
137
 
@@ -163,9 +161,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
161
  requirements: []
164
162
 
165
163
  rubyforge_project:
166
- rubygems_version: 1.3.9.5
164
+ rubygems_version: 1.8.24
167
165
  signing_key:
168
166
  specification_version: 3
169
167
  summary: Comprehensive full text search for ActiveRecord and MySQL
170
168
  test_files: []
171
169
 
170
+ has_rdoc: