picky 0.3.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. data/lib/picky/application.rb +2 -2
  2. data/lib/picky/cacher/partial/default.rb +1 -1
  3. data/lib/picky/configuration/field.rb +8 -10
  4. data/lib/picky/configuration/indexes.rb +6 -6
  5. data/lib/picky/configuration/queries.rb +4 -3
  6. data/lib/picky/cores.rb +2 -2
  7. data/lib/picky/extensions/array.rb +2 -12
  8. data/lib/picky/generator.rb +27 -4
  9. data/lib/picky/index/bundle.rb +5 -41
  10. data/lib/picky/index/bundle_checker.rb +58 -0
  11. data/lib/picky/index/type.rb +4 -1
  12. data/lib/picky/index/wrappers/exact_first.rb +57 -0
  13. data/lib/picky/indexes.rb +12 -19
  14. data/lib/picky/loader.rb +7 -8
  15. data/lib/picky/query/allocation.rb +1 -1
  16. data/lib/picky/query/combinations.rb +9 -6
  17. data/lib/picky/query/combinator.rb +11 -5
  18. data/lib/picky/rack/harakiri.rb +1 -1
  19. data/lib/picky/results/base.rb +4 -12
  20. data/lib/picky/results/live.rb +0 -6
  21. data/lib/picky/routing.rb +17 -17
  22. data/lib/picky/sources/csv.rb +1 -2
  23. data/lib/picky/sources/db.rb +0 -1
  24. data/lib/picky/sources/delicious.rb +41 -0
  25. data/lib/picky/tokenizers/base.rb +52 -43
  26. data/lib/picky/tokenizers/default/index.rb +7 -0
  27. data/lib/picky/tokenizers/default/query.rb +7 -0
  28. data/lib/picky/tokenizers/index.rb +0 -9
  29. data/lib/picky/tokenizers/query.rb +0 -9
  30. data/lib/tasks/application.rake +1 -1
  31. data/lib/tasks/cache.rake +41 -48
  32. data/lib/tasks/framework.rake +1 -1
  33. data/lib/tasks/index.rake +22 -12
  34. data/lib/tasks/server.rake +3 -3
  35. data/lib/tasks/shortcuts.rake +9 -2
  36. data/lib/tasks/statistics.rake +8 -8
  37. data/lib/tasks/try.rake +4 -2
  38. data/project_prototype/Gemfile +1 -1
  39. data/project_prototype/app/application.rb +7 -3
  40. data/spec/lib/cacher/partial/default_spec.rb +1 -1
  41. data/spec/lib/cacher/partial/none_spec.rb +12 -0
  42. data/spec/lib/cacher/partial/subtoken_spec.rb +29 -1
  43. data/spec/lib/configuration/field_spec.rb +162 -3
  44. data/spec/lib/configuration/indexes_spec.rb +150 -0
  45. data/spec/lib/cores_spec.rb +43 -0
  46. data/spec/lib/extensions/module_spec.rb +27 -16
  47. data/spec/lib/generator_spec.rb +3 -3
  48. data/spec/lib/index/bundle_checker_spec.rb +67 -0
  49. data/spec/lib/index/bundle_spec.rb +0 -50
  50. data/spec/lib/index/type_spec.rb +47 -0
  51. data/spec/lib/index/wrappers/exact_first_spec.rb +95 -0
  52. data/spec/lib/indexers/base_spec.rb +18 -2
  53. data/spec/lib/loader_spec.rb +21 -1
  54. data/spec/lib/query/allocation_spec.rb +25 -0
  55. data/spec/lib/query/base_spec.rb +37 -0
  56. data/spec/lib/query/combination_spec.rb +10 -1
  57. data/spec/lib/query/combinations_spec.rb +82 -3
  58. data/spec/lib/query/combinator_spec.rb +45 -0
  59. data/spec/lib/query/token_spec.rb +24 -0
  60. data/spec/lib/rack/harakiri_spec.rb +28 -0
  61. data/spec/lib/results/base_spec.rb +24 -0
  62. data/spec/lib/results/live_spec.rb +15 -0
  63. data/spec/lib/routing_spec.rb +5 -0
  64. data/spec/lib/sources/db_spec.rb +31 -1
  65. data/spec/lib/sources/delicious_spec.rb +75 -0
  66. data/spec/lib/tokenizers/base_spec.rb +160 -49
  67. data/spec/lib/tokenizers/default/index_spec.rb +11 -0
  68. data/spec/lib/tokenizers/default/query_spec.rb +11 -0
  69. metadata +26 -5
  70. data/lib/picky/index/combined.rb +0 -45
  71. data/lib/picky/tokenizers/default.rb +0 -3
@@ -6,56 +6,167 @@ describe Tokenizers::Base do
6
6
  before(:each) do
7
7
  @tokenizer = Tokenizers::Base.new
8
8
  end
9
-
10
- context 'stopwords' do
11
- describe '.stopwords' do
12
- context 'without stopwords given' do
13
- it 'should define a method remove_stopwords' do
14
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
15
- end
16
- it 'should define a method remove_stopwords that does nothing' do
17
- @tokenizer.remove_stopwords('from this text').should == nil
18
- end
19
- it 'should not define a method remove_non_single_stopwords' do
20
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should raise_error(NoMethodError)
21
- end
22
- end
23
- context 'with stopwords given' do
24
- before(:each) do
25
- class << @tokenizer
26
- stopwords(/r|e/)
27
- end
28
- end
29
- it 'should define a method remove_stopwords' do
30
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
31
- end
32
- it 'should define a method stopwords that removes stopwords' do
33
- @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
34
- end
35
- it 'should define a method remove_non_single_stopwords' do
36
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
37
- end
38
- it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
39
- @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
40
- end
41
- it 'should define a method remove_non_single_stopwords that does not single stopwords' do
42
- @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
43
- end
44
- end
45
- context 'error case' do
46
- before(:each) do
47
- class << @tokenizer
48
- stopwords(/any/)
49
- end
50
- end
51
- it 'should not remove non-single stopwords with a star' do
52
- @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
53
- end
54
- it 'should not remove non-single stopwords with a tilde' do
55
- @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
56
- end
9
+
10
+ describe "removes_characters_after_splitting" do
11
+ context "without removes_characters_after_splitting called" do
12
+ it "has remove_after_normalizing_illegals" do
13
+ lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
14
+ end
15
+ it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
16
+ unchanging = stub :unchanging
17
+ @tokenizer.remove_after_normalizing_illegals unchanging
18
+ end
19
+ end
20
+ context "with removes_characters_after_splitting called" do
21
+ before(:each) do
22
+ @tokenizer.removes_characters_after_splitting(/[afo]/)
23
+ end
24
+ it "has remove_after_normalizing_illegals" do
25
+ lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
26
+ end
27
+ it "removes illegal characters" do
28
+ @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
29
+ end
30
+ end
31
+ end
32
+
33
+ describe "normalizes_words" do
34
+ context "without normalizes_words called" do
35
+ it "has normalize_with_patterns" do
36
+ lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
37
+ end
38
+ it 'should define a method normalize_with_patterns does nothing' do
39
+ unchanging = stub :unchanging
40
+ @tokenizer.normalize_with_patterns(unchanging).should == unchanging
41
+ end
42
+ end
43
+ context "with normalizes_words called" do
44
+ before(:each) do
45
+ @tokenizer.normalizes_words([
46
+ [/st\./, 'sankt'],
47
+ [/stras?s?e?/, 'str']
48
+ ])
49
+ end
50
+ it "has normalize_with_patterns" do
51
+ lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
52
+ end
53
+ it "normalizes, but just the first one" do
54
+ @tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
55
+ end
56
+ end
57
+ end
58
+
59
+ describe "splits_text_on" do
60
+ context "without splits_text_on called" do
61
+ it "has split" do
62
+ lambda { @tokenizer.split('any') }.should_not raise_error
63
+ end
64
+ it 'should define a method split that splits by default on \s' do
65
+ @tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
66
+ end
67
+ end
68
+ context "with removes_characters called" do
69
+ before(:each) do
70
+ @tokenizer.splits_text_on(/[\s\.\/]/)
71
+ end
72
+ it "has split" do
73
+ lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
74
+ end
75
+ it "removes illegal characters" do
76
+ @tokenizer.split('a b/c.d').should == ['a','b','c','d']
77
+ end
78
+ end
79
+ end
80
+
81
+ describe "removes_characters" do
82
+ context "without removes_characters called" do
83
+ it "has remove_illegals" do
84
+ lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
85
+ end
86
+ it 'should define a method remove_illegals that does nothing' do
87
+ unchanging = stub :unchanging
88
+ @tokenizer.remove_illegals unchanging
89
+ end
90
+ end
91
+ context "with removes_characters called" do
92
+ before(:each) do
93
+ @tokenizer.removes_characters(/[afo]/)
94
+ end
95
+ it "has remove_illegals" do
96
+ lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
97
+ end
98
+ it "removes illegal characters" do
99
+ @tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
100
+ end
101
+ end
102
+ end
103
+
104
+ describe 'contracts_expressions' do
105
+ context 'without contract_expressions called' do
106
+ it 'should define a method contract' do
107
+ lambda { @tokenizer.contract('from this text') }.should_not raise_error
108
+ end
109
+ it 'should define a method contract that does nothing' do
110
+ unchanging = stub :unchanging
111
+ @tokenizer.contract unchanging
112
+ end
113
+ end
114
+ context 'with contracts_expressions called' do
115
+ before(:each) do
116
+ @tokenizer.contracts_expressions(/Mister|Mr./, 'mr')
117
+ end
118
+ it 'should define a method remove_stopwords' do
119
+ lambda { @tokenizer.contract('from this text') }.should_not raise_error
120
+ end
121
+ it 'should define a method contract that contracts expressions' do
122
+ @tokenizer.contract('Mister Meyer, Mr. Peter').should == 'mr Meyer, mr Peter'
123
+ end
124
+ end
125
+ end
126
+
127
+ describe 'stopwords' do
128
+ context 'without stopwords given' do
129
+ it 'should define a method remove_stopwords' do
130
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
131
+ end
132
+ it 'should define a method remove_stopwords that does nothing' do
133
+ @tokenizer.remove_stopwords('from this text').should == 'from this text'
134
+ end
135
+ it 'should define a method remove_non_single_stopwords' do
136
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
137
+
138
+ end
139
+ end
140
+ context 'with stopwords given' do
141
+ before(:each) do
142
+ @tokenizer.stopwords(/r|e/)
143
+ end
144
+ it 'should define a method remove_stopwords' do
145
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
146
+ end
147
+ it 'should define a method stopwords that removes stopwords' do
148
+ @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
149
+ end
150
+ it 'should define a method remove_non_single_stopwords' do
151
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
152
+ end
153
+ it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
154
+ @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
155
+ end
156
+ it 'should define a method remove_non_single_stopwords that does not single stopwords' do
157
+ @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
158
+ end
159
+ end
160
+ context 'error case' do
161
+ before(:each) do
162
+ @tokenizer.stopwords(/any/)
163
+ end
164
+ it 'should not remove non-single stopwords with a star' do
165
+ @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
166
+ end
167
+ it 'should not remove non-single stopwords with a tilde' do
168
+ @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
57
169
  end
58
170
  end
59
171
  end
60
-
61
172
  end
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe Tokenizers::Default::Index do
6
+
7
+ it "is an instance of the index tokenizer" do
8
+ Tokenizers::Default::Index.should be_kind_of(Tokenizers::Index)
9
+ end
10
+
11
+ end
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe Tokenizers::Default::Query do
6
+
7
+ it "is an instance of the index tokenizer" do
8
+ Tokenizers::Default::Query.should be_kind_of(Tokenizers::Query)
9
+ end
10
+
11
+ end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 3
7
+ - 9
8
8
  - 0
9
- version: 0.3.0
9
+ version: 0.9.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-10-24 00:00:00 +02:00
17
+ date: 2010-10-26 00:00:00 +02:00
18
18
  default_executable: picky
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -75,9 +75,10 @@ files:
75
75
  - lib/picky/helpers/gc.rb
76
76
  - lib/picky/helpers/measuring.rb
77
77
  - lib/picky/index/bundle.rb
78
+ - lib/picky/index/bundle_checker.rb
78
79
  - lib/picky/index/category.rb
79
- - lib/picky/index/combined.rb
80
80
  - lib/picky/index/type.rb
81
+ - lib/picky/index/wrappers/exact_first.rb
81
82
  - lib/picky/indexers/base.rb
82
83
  - lib/picky/indexers/default.rb
83
84
  - lib/picky/indexers/field.rb
@@ -111,8 +112,10 @@ files:
111
112
  - lib/picky/sources/base.rb
112
113
  - lib/picky/sources/csv.rb
113
114
  - lib/picky/sources/db.rb
115
+ - lib/picky/sources/delicious.rb
114
116
  - lib/picky/tokenizers/base.rb
115
- - lib/picky/tokenizers/default.rb
117
+ - lib/picky/tokenizers/default/index.rb
118
+ - lib/picky/tokenizers/default/query.rb
116
119
  - lib/picky/tokenizers/index.rb
117
120
  - lib/picky/tokenizers/query.rb
118
121
  - lib/picky/umlaut_substituter.rb
@@ -145,6 +148,7 @@ files:
145
148
  - spec/ext/performant_spec.rb
146
149
  - spec/lib/application_spec.rb
147
150
  - spec/lib/cacher/partial/default_spec.rb
151
+ - spec/lib/cacher/partial/none_spec.rb
148
152
  - spec/lib/cacher/partial/subtoken_spec.rb
149
153
  - spec/lib/cacher/partial_generator_spec.rb
150
154
  - spec/lib/cacher/similarity/double_levenshtone_spec.rb
@@ -153,6 +157,7 @@ files:
153
157
  - spec/lib/cacher/weights/logarithmic_spec.rb
154
158
  - spec/lib/cacher/weights_generator_spec.rb
155
159
  - spec/lib/configuration/field_spec.rb
160
+ - spec/lib/configuration/indexes_spec.rb
156
161
  - spec/lib/configuration/type_spec.rb
157
162
  - spec/lib/cores_spec.rb
158
163
  - spec/lib/extensions/array_spec.rb
@@ -164,9 +169,12 @@ files:
164
169
  - spec/lib/helpers/cache_spec.rb
165
170
  - spec/lib/helpers/gc_spec.rb
166
171
  - spec/lib/helpers/measuring_spec.rb
172
+ - spec/lib/index/bundle_checker_spec.rb
167
173
  - spec/lib/index/bundle_partial_generation_speed_spec.rb
168
174
  - spec/lib/index/bundle_spec.rb
169
175
  - spec/lib/index/category_spec.rb
176
+ - spec/lib/index/type_spec.rb
177
+ - spec/lib/index/wrappers/exact_first_spec.rb
170
178
  - spec/lib/indexers/base_spec.rb
171
179
  - spec/lib/indexers/field_spec.rb
172
180
  - spec/lib/loader_spec.rb
@@ -186,11 +194,15 @@ files:
186
194
  - spec/lib/query/weights_spec.rb
187
195
  - spec/lib/rack/harakiri_spec.rb
188
196
  - spec/lib/results/base_spec.rb
197
+ - spec/lib/results/live_spec.rb
189
198
  - spec/lib/routing_spec.rb
190
199
  - spec/lib/solr/schema_generator_spec.rb
191
200
  - spec/lib/sources/csv_spec.rb
192
201
  - spec/lib/sources/db_spec.rb
202
+ - spec/lib/sources/delicious_spec.rb
193
203
  - spec/lib/tokenizers/base_spec.rb
204
+ - spec/lib/tokenizers/default/index_spec.rb
205
+ - spec/lib/tokenizers/default/query_spec.rb
194
206
  - spec/lib/tokenizers/index_spec.rb
195
207
  - spec/lib/tokenizers/query_spec.rb
196
208
  - spec/lib/umlaut_substituter_spec.rb
@@ -232,6 +244,7 @@ test_files:
232
244
  - spec/ext/performant_spec.rb
233
245
  - spec/lib/application_spec.rb
234
246
  - spec/lib/cacher/partial/default_spec.rb
247
+ - spec/lib/cacher/partial/none_spec.rb
235
248
  - spec/lib/cacher/partial/subtoken_spec.rb
236
249
  - spec/lib/cacher/partial_generator_spec.rb
237
250
  - spec/lib/cacher/similarity/double_levenshtone_spec.rb
@@ -240,6 +253,7 @@ test_files:
240
253
  - spec/lib/cacher/weights/logarithmic_spec.rb
241
254
  - spec/lib/cacher/weights_generator_spec.rb
242
255
  - spec/lib/configuration/field_spec.rb
256
+ - spec/lib/configuration/indexes_spec.rb
243
257
  - spec/lib/configuration/type_spec.rb
244
258
  - spec/lib/cores_spec.rb
245
259
  - spec/lib/extensions/array_spec.rb
@@ -251,9 +265,12 @@ test_files:
251
265
  - spec/lib/helpers/cache_spec.rb
252
266
  - spec/lib/helpers/gc_spec.rb
253
267
  - spec/lib/helpers/measuring_spec.rb
268
+ - spec/lib/index/bundle_checker_spec.rb
254
269
  - spec/lib/index/bundle_partial_generation_speed_spec.rb
255
270
  - spec/lib/index/bundle_spec.rb
256
271
  - spec/lib/index/category_spec.rb
272
+ - spec/lib/index/type_spec.rb
273
+ - spec/lib/index/wrappers/exact_first_spec.rb
257
274
  - spec/lib/indexers/base_spec.rb
258
275
  - spec/lib/indexers/field_spec.rb
259
276
  - spec/lib/loader_spec.rb
@@ -273,11 +290,15 @@ test_files:
273
290
  - spec/lib/query/weights_spec.rb
274
291
  - spec/lib/rack/harakiri_spec.rb
275
292
  - spec/lib/results/base_spec.rb
293
+ - spec/lib/results/live_spec.rb
276
294
  - spec/lib/routing_spec.rb
277
295
  - spec/lib/solr/schema_generator_spec.rb
278
296
  - spec/lib/sources/csv_spec.rb
279
297
  - spec/lib/sources/db_spec.rb
298
+ - spec/lib/sources/delicious_spec.rb
280
299
  - spec/lib/tokenizers/base_spec.rb
300
+ - spec/lib/tokenizers/default/index_spec.rb
301
+ - spec/lib/tokenizers/default/query_spec.rb
281
302
  - spec/lib/tokenizers/index_spec.rb
282
303
  - spec/lib/tokenizers/query_spec.rb
283
304
  - spec/lib/umlaut_substituter_spec.rb
@@ -1,45 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Index
4
-
5
- # This index combines an exact and partial index.
6
- # It serves to order the results such that exact hits are found first.
7
- #
8
- # TODO Need to use the right subtokens. Bake in?
9
- #
10
- # TODO One can use it as a wrapper, and it will extract the indexes itself. Rename: ExactFirst.
11
- #
12
- class Combined < Bundle
13
-
14
- delegate :similar,
15
- :identifier,
16
- :name,
17
- :to => :@exact
18
- delegate :type,
19
- :category,
20
- :weight,
21
- :generate_partial_from,
22
- :generate_caches_from_memory,
23
- :generate_derived,
24
- :dump,
25
- :load,
26
- :to => :@partial
27
-
28
- # TODO initialize type_or_category # => installs itself on all exact and partial
29
- #
30
- def initialize exact, partial
31
- @exact = exact
32
- @partial = partial
33
- end
34
-
35
- def ids text
36
- @exact.ids(text) + @partial.ids(text)
37
- end
38
-
39
- def weight text
40
- [@exact.weight(text) || 0, @partial.weight(text) || 0].max
41
- end
42
-
43
- end
44
-
45
- end
@@ -1,3 +0,0 @@
1
- module Tokenizers
2
- Default = Index
3
- end