picky 0.3.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. data/lib/picky/application.rb +2 -2
  2. data/lib/picky/cacher/partial/default.rb +1 -1
  3. data/lib/picky/configuration/field.rb +8 -10
  4. data/lib/picky/configuration/indexes.rb +6 -6
  5. data/lib/picky/configuration/queries.rb +4 -3
  6. data/lib/picky/cores.rb +2 -2
  7. data/lib/picky/extensions/array.rb +2 -12
  8. data/lib/picky/generator.rb +27 -4
  9. data/lib/picky/index/bundle.rb +5 -41
  10. data/lib/picky/index/bundle_checker.rb +58 -0
  11. data/lib/picky/index/type.rb +4 -1
  12. data/lib/picky/index/wrappers/exact_first.rb +57 -0
  13. data/lib/picky/indexes.rb +12 -19
  14. data/lib/picky/loader.rb +7 -8
  15. data/lib/picky/query/allocation.rb +1 -1
  16. data/lib/picky/query/combinations.rb +9 -6
  17. data/lib/picky/query/combinator.rb +11 -5
  18. data/lib/picky/rack/harakiri.rb +1 -1
  19. data/lib/picky/results/base.rb +4 -12
  20. data/lib/picky/results/live.rb +0 -6
  21. data/lib/picky/routing.rb +17 -17
  22. data/lib/picky/sources/csv.rb +1 -2
  23. data/lib/picky/sources/db.rb +0 -1
  24. data/lib/picky/sources/delicious.rb +41 -0
  25. data/lib/picky/tokenizers/base.rb +52 -43
  26. data/lib/picky/tokenizers/default/index.rb +7 -0
  27. data/lib/picky/tokenizers/default/query.rb +7 -0
  28. data/lib/picky/tokenizers/index.rb +0 -9
  29. data/lib/picky/tokenizers/query.rb +0 -9
  30. data/lib/tasks/application.rake +1 -1
  31. data/lib/tasks/cache.rake +41 -48
  32. data/lib/tasks/framework.rake +1 -1
  33. data/lib/tasks/index.rake +22 -12
  34. data/lib/tasks/server.rake +3 -3
  35. data/lib/tasks/shortcuts.rake +9 -2
  36. data/lib/tasks/statistics.rake +8 -8
  37. data/lib/tasks/try.rake +4 -2
  38. data/project_prototype/Gemfile +1 -1
  39. data/project_prototype/app/application.rb +7 -3
  40. data/spec/lib/cacher/partial/default_spec.rb +1 -1
  41. data/spec/lib/cacher/partial/none_spec.rb +12 -0
  42. data/spec/lib/cacher/partial/subtoken_spec.rb +29 -1
  43. data/spec/lib/configuration/field_spec.rb +162 -3
  44. data/spec/lib/configuration/indexes_spec.rb +150 -0
  45. data/spec/lib/cores_spec.rb +43 -0
  46. data/spec/lib/extensions/module_spec.rb +27 -16
  47. data/spec/lib/generator_spec.rb +3 -3
  48. data/spec/lib/index/bundle_checker_spec.rb +67 -0
  49. data/spec/lib/index/bundle_spec.rb +0 -50
  50. data/spec/lib/index/type_spec.rb +47 -0
  51. data/spec/lib/index/wrappers/exact_first_spec.rb +95 -0
  52. data/spec/lib/indexers/base_spec.rb +18 -2
  53. data/spec/lib/loader_spec.rb +21 -1
  54. data/spec/lib/query/allocation_spec.rb +25 -0
  55. data/spec/lib/query/base_spec.rb +37 -0
  56. data/spec/lib/query/combination_spec.rb +10 -1
  57. data/spec/lib/query/combinations_spec.rb +82 -3
  58. data/spec/lib/query/combinator_spec.rb +45 -0
  59. data/spec/lib/query/token_spec.rb +24 -0
  60. data/spec/lib/rack/harakiri_spec.rb +28 -0
  61. data/spec/lib/results/base_spec.rb +24 -0
  62. data/spec/lib/results/live_spec.rb +15 -0
  63. data/spec/lib/routing_spec.rb +5 -0
  64. data/spec/lib/sources/db_spec.rb +31 -1
  65. data/spec/lib/sources/delicious_spec.rb +75 -0
  66. data/spec/lib/tokenizers/base_spec.rb +160 -49
  67. data/spec/lib/tokenizers/default/index_spec.rb +11 -0
  68. data/spec/lib/tokenizers/default/query_spec.rb +11 -0
  69. metadata +26 -5
  70. data/lib/picky/index/combined.rb +0 -45
  71. data/lib/picky/tokenizers/default.rb +0 -3
@@ -6,56 +6,167 @@ describe Tokenizers::Base do
6
6
  before(:each) do
7
7
  @tokenizer = Tokenizers::Base.new
8
8
  end
9
-
10
- context 'stopwords' do
11
- describe '.stopwords' do
12
- context 'without stopwords given' do
13
- it 'should define a method remove_stopwords' do
14
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
15
- end
16
- it 'should define a method remove_stopwords that does nothing' do
17
- @tokenizer.remove_stopwords('from this text').should == nil
18
- end
19
- it 'should not define a method remove_non_single_stopwords' do
20
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should raise_error(NoMethodError)
21
- end
22
- end
23
- context 'with stopwords given' do
24
- before(:each) do
25
- class << @tokenizer
26
- stopwords(/r|e/)
27
- end
28
- end
29
- it 'should define a method remove_stopwords' do
30
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
31
- end
32
- it 'should define a method stopwords that removes stopwords' do
33
- @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
34
- end
35
- it 'should define a method remove_non_single_stopwords' do
36
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
37
- end
38
- it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
39
- @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
40
- end
41
- it 'should define a method remove_non_single_stopwords that does not single stopwords' do
42
- @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
43
- end
44
- end
45
- context 'error case' do
46
- before(:each) do
47
- class << @tokenizer
48
- stopwords(/any/)
49
- end
50
- end
51
- it 'should not remove non-single stopwords with a star' do
52
- @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
53
- end
54
- it 'should not remove non-single stopwords with a tilde' do
55
- @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
56
- end
9
+
10
+ describe "removes_characters_after_splitting" do
11
+ context "without removes_characters_after_splitting called" do
12
+ it "has remove_after_normalizing_illegals" do
13
+ lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
14
+ end
15
+ it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
16
+ unchanging = stub :unchanging
17
+ @tokenizer.remove_after_normalizing_illegals unchanging
18
+ end
19
+ end
20
+ context "with removes_characters_after_splitting called" do
21
+ before(:each) do
22
+ @tokenizer.removes_characters_after_splitting(/[afo]/)
23
+ end
24
+ it "has remove_after_normalizing_illegals" do
25
+ lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
26
+ end
27
+ it "removes illegal characters" do
28
+ @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
29
+ end
30
+ end
31
+ end
32
+
33
+ describe "normalizes_words" do
34
+ context "without normalizes_words called" do
35
+ it "has normalize_with_patterns" do
36
+ lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
37
+ end
38
+ it 'should define a method normalize_with_patterns does nothing' do
39
+ unchanging = stub :unchanging
40
+ @tokenizer.normalize_with_patterns(unchanging).should == unchanging
41
+ end
42
+ end
43
+ context "with normalizes_words called" do
44
+ before(:each) do
45
+ @tokenizer.normalizes_words([
46
+ [/st\./, 'sankt'],
47
+ [/stras?s?e?/, 'str']
48
+ ])
49
+ end
50
+ it "has normalize_with_patterns" do
51
+ lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
52
+ end
53
+ it "normalizes, but just the first one" do
54
+ @tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
55
+ end
56
+ end
57
+ end
58
+
59
+ describe "splits_text_on" do
60
+ context "without splits_text_on called" do
61
+ it "has split" do
62
+ lambda { @tokenizer.split('any') }.should_not raise_error
63
+ end
64
+ it 'should define a method split that splits by default on \s' do
65
+ @tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
66
+ end
67
+ end
68
+ context "with removes_characters called" do
69
+ before(:each) do
70
+ @tokenizer.splits_text_on(/[\s\.\/]/)
71
+ end
72
+ it "has split" do
73
+ lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
74
+ end
75
+ it "removes illegal characters" do
76
+ @tokenizer.split('a b/c.d').should == ['a','b','c','d']
77
+ end
78
+ end
79
+ end
80
+
81
+ describe "removes_characters" do
82
+ context "without removes_characters called" do
83
+ it "has remove_illegals" do
84
+ lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
85
+ end
86
+ it 'should define a method remove_illegals that does nothing' do
87
+ unchanging = stub :unchanging
88
+ @tokenizer.remove_illegals unchanging
89
+ end
90
+ end
91
+ context "with removes_characters called" do
92
+ before(:each) do
93
+ @tokenizer.removes_characters(/[afo]/)
94
+ end
95
+ it "has remove_illegals" do
96
+ lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
97
+ end
98
+ it "removes illegal characters" do
99
+ @tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
100
+ end
101
+ end
102
+ end
103
+
104
+ describe 'contracts_expressions' do
105
+ context 'without contract_expressions called' do
106
+ it 'should define a method contract' do
107
+ lambda { @tokenizer.contract('from this text') }.should_not raise_error
108
+ end
109
+ it 'should define a method contract that does nothing' do
110
+ unchanging = stub :unchanging
111
+ @tokenizer.contract unchanging
112
+ end
113
+ end
114
+ context 'with contracts_expressions called' do
115
+ before(:each) do
116
+ @tokenizer.contracts_expressions(/Mister|Mr./, 'mr')
117
+ end
118
+ it 'should define a method remove_stopwords' do
119
+ lambda { @tokenizer.contract('from this text') }.should_not raise_error
120
+ end
121
+ it 'should define a method contract that contracts expressions' do
122
+ @tokenizer.contract('Mister Meyer, Mr. Peter').should == 'mr Meyer, mr Peter'
123
+ end
124
+ end
125
+ end
126
+
127
+ describe 'stopwords' do
128
+ context 'without stopwords given' do
129
+ it 'should define a method remove_stopwords' do
130
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
131
+ end
132
+ it 'should define a method remove_stopwords that does nothing' do
133
+ @tokenizer.remove_stopwords('from this text').should == 'from this text'
134
+ end
135
+ it 'should define a method remove_non_single_stopwords' do
136
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
137
+
138
+ end
139
+ end
140
+ context 'with stopwords given' do
141
+ before(:each) do
142
+ @tokenizer.stopwords(/r|e/)
143
+ end
144
+ it 'should define a method remove_stopwords' do
145
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
146
+ end
147
+ it 'should define a method stopwords that removes stopwords' do
148
+ @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
149
+ end
150
+ it 'should define a method remove_non_single_stopwords' do
151
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
152
+ end
153
+ it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
154
+ @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
155
+ end
156
+ it 'should define a method remove_non_single_stopwords that does not single stopwords' do
157
+ @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
158
+ end
159
+ end
160
+ context 'error case' do
161
+ before(:each) do
162
+ @tokenizer.stopwords(/any/)
163
+ end
164
+ it 'should not remove non-single stopwords with a star' do
165
+ @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
166
+ end
167
+ it 'should not remove non-single stopwords with a tilde' do
168
+ @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
57
169
  end
58
170
  end
59
171
  end
60
-
61
172
  end
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe Tokenizers::Default::Index do
6
+
7
+ it "is an instance of the index tokenizer" do
8
+ Tokenizers::Default::Index.should be_kind_of(Tokenizers::Index)
9
+ end
10
+
11
+ end
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe Tokenizers::Default::Query do
6
+
7
+ it "is an instance of the index tokenizer" do
8
+ Tokenizers::Default::Query.should be_kind_of(Tokenizers::Query)
9
+ end
10
+
11
+ end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 3
7
+ - 9
8
8
  - 0
9
- version: 0.3.0
9
+ version: 0.9.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-10-24 00:00:00 +02:00
17
+ date: 2010-10-26 00:00:00 +02:00
18
18
  default_executable: picky
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -75,9 +75,10 @@ files:
75
75
  - lib/picky/helpers/gc.rb
76
76
  - lib/picky/helpers/measuring.rb
77
77
  - lib/picky/index/bundle.rb
78
+ - lib/picky/index/bundle_checker.rb
78
79
  - lib/picky/index/category.rb
79
- - lib/picky/index/combined.rb
80
80
  - lib/picky/index/type.rb
81
+ - lib/picky/index/wrappers/exact_first.rb
81
82
  - lib/picky/indexers/base.rb
82
83
  - lib/picky/indexers/default.rb
83
84
  - lib/picky/indexers/field.rb
@@ -111,8 +112,10 @@ files:
111
112
  - lib/picky/sources/base.rb
112
113
  - lib/picky/sources/csv.rb
113
114
  - lib/picky/sources/db.rb
115
+ - lib/picky/sources/delicious.rb
114
116
  - lib/picky/tokenizers/base.rb
115
- - lib/picky/tokenizers/default.rb
117
+ - lib/picky/tokenizers/default/index.rb
118
+ - lib/picky/tokenizers/default/query.rb
116
119
  - lib/picky/tokenizers/index.rb
117
120
  - lib/picky/tokenizers/query.rb
118
121
  - lib/picky/umlaut_substituter.rb
@@ -145,6 +148,7 @@ files:
145
148
  - spec/ext/performant_spec.rb
146
149
  - spec/lib/application_spec.rb
147
150
  - spec/lib/cacher/partial/default_spec.rb
151
+ - spec/lib/cacher/partial/none_spec.rb
148
152
  - spec/lib/cacher/partial/subtoken_spec.rb
149
153
  - spec/lib/cacher/partial_generator_spec.rb
150
154
  - spec/lib/cacher/similarity/double_levenshtone_spec.rb
@@ -153,6 +157,7 @@ files:
153
157
  - spec/lib/cacher/weights/logarithmic_spec.rb
154
158
  - spec/lib/cacher/weights_generator_spec.rb
155
159
  - spec/lib/configuration/field_spec.rb
160
+ - spec/lib/configuration/indexes_spec.rb
156
161
  - spec/lib/configuration/type_spec.rb
157
162
  - spec/lib/cores_spec.rb
158
163
  - spec/lib/extensions/array_spec.rb
@@ -164,9 +169,12 @@ files:
164
169
  - spec/lib/helpers/cache_spec.rb
165
170
  - spec/lib/helpers/gc_spec.rb
166
171
  - spec/lib/helpers/measuring_spec.rb
172
+ - spec/lib/index/bundle_checker_spec.rb
167
173
  - spec/lib/index/bundle_partial_generation_speed_spec.rb
168
174
  - spec/lib/index/bundle_spec.rb
169
175
  - spec/lib/index/category_spec.rb
176
+ - spec/lib/index/type_spec.rb
177
+ - spec/lib/index/wrappers/exact_first_spec.rb
170
178
  - spec/lib/indexers/base_spec.rb
171
179
  - spec/lib/indexers/field_spec.rb
172
180
  - spec/lib/loader_spec.rb
@@ -186,11 +194,15 @@ files:
186
194
  - spec/lib/query/weights_spec.rb
187
195
  - spec/lib/rack/harakiri_spec.rb
188
196
  - spec/lib/results/base_spec.rb
197
+ - spec/lib/results/live_spec.rb
189
198
  - spec/lib/routing_spec.rb
190
199
  - spec/lib/solr/schema_generator_spec.rb
191
200
  - spec/lib/sources/csv_spec.rb
192
201
  - spec/lib/sources/db_spec.rb
202
+ - spec/lib/sources/delicious_spec.rb
193
203
  - spec/lib/tokenizers/base_spec.rb
204
+ - spec/lib/tokenizers/default/index_spec.rb
205
+ - spec/lib/tokenizers/default/query_spec.rb
194
206
  - spec/lib/tokenizers/index_spec.rb
195
207
  - spec/lib/tokenizers/query_spec.rb
196
208
  - spec/lib/umlaut_substituter_spec.rb
@@ -232,6 +244,7 @@ test_files:
232
244
  - spec/ext/performant_spec.rb
233
245
  - spec/lib/application_spec.rb
234
246
  - spec/lib/cacher/partial/default_spec.rb
247
+ - spec/lib/cacher/partial/none_spec.rb
235
248
  - spec/lib/cacher/partial/subtoken_spec.rb
236
249
  - spec/lib/cacher/partial_generator_spec.rb
237
250
  - spec/lib/cacher/similarity/double_levenshtone_spec.rb
@@ -240,6 +253,7 @@ test_files:
240
253
  - spec/lib/cacher/weights/logarithmic_spec.rb
241
254
  - spec/lib/cacher/weights_generator_spec.rb
242
255
  - spec/lib/configuration/field_spec.rb
256
+ - spec/lib/configuration/indexes_spec.rb
243
257
  - spec/lib/configuration/type_spec.rb
244
258
  - spec/lib/cores_spec.rb
245
259
  - spec/lib/extensions/array_spec.rb
@@ -251,9 +265,12 @@ test_files:
251
265
  - spec/lib/helpers/cache_spec.rb
252
266
  - spec/lib/helpers/gc_spec.rb
253
267
  - spec/lib/helpers/measuring_spec.rb
268
+ - spec/lib/index/bundle_checker_spec.rb
254
269
  - spec/lib/index/bundle_partial_generation_speed_spec.rb
255
270
  - spec/lib/index/bundle_spec.rb
256
271
  - spec/lib/index/category_spec.rb
272
+ - spec/lib/index/type_spec.rb
273
+ - spec/lib/index/wrappers/exact_first_spec.rb
257
274
  - spec/lib/indexers/base_spec.rb
258
275
  - spec/lib/indexers/field_spec.rb
259
276
  - spec/lib/loader_spec.rb
@@ -273,11 +290,15 @@ test_files:
273
290
  - spec/lib/query/weights_spec.rb
274
291
  - spec/lib/rack/harakiri_spec.rb
275
292
  - spec/lib/results/base_spec.rb
293
+ - spec/lib/results/live_spec.rb
276
294
  - spec/lib/routing_spec.rb
277
295
  - spec/lib/solr/schema_generator_spec.rb
278
296
  - spec/lib/sources/csv_spec.rb
279
297
  - spec/lib/sources/db_spec.rb
298
+ - spec/lib/sources/delicious_spec.rb
280
299
  - spec/lib/tokenizers/base_spec.rb
300
+ - spec/lib/tokenizers/default/index_spec.rb
301
+ - spec/lib/tokenizers/default/query_spec.rb
281
302
  - spec/lib/tokenizers/index_spec.rb
282
303
  - spec/lib/tokenizers/query_spec.rb
283
304
  - spec/lib/umlaut_substituter_spec.rb
@@ -1,45 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Index
4
-
5
- # This index combines an exact and partial index.
6
- # It serves to order the results such that exact hits are found first.
7
- #
8
- # TODO Need to use the right subtokens. Bake in?
9
- #
10
- # TODO One can use it as a wrapper, and it will extract the indexes itself. Rename: ExactFirst.
11
- #
12
- class Combined < Bundle
13
-
14
- delegate :similar,
15
- :identifier,
16
- :name,
17
- :to => :@exact
18
- delegate :type,
19
- :category,
20
- :weight,
21
- :generate_partial_from,
22
- :generate_caches_from_memory,
23
- :generate_derived,
24
- :dump,
25
- :load,
26
- :to => :@partial
27
-
28
- # TODO initialize type_or_category # => installs itself on all exact and partial
29
- #
30
- def initialize exact, partial
31
- @exact = exact
32
- @partial = partial
33
- end
34
-
35
- def ids text
36
- @exact.ids(text) + @partial.ids(text)
37
- end
38
-
39
- def weight text
40
- [@exact.weight(text) || 0, @partial.weight(text) || 0].max
41
- end
42
-
43
- end
44
-
45
- end
@@ -1,3 +0,0 @@
1
- module Tokenizers
2
- Default = Index
3
- end