picky 0.3.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/application.rb +2 -2
- data/lib/picky/cacher/partial/default.rb +1 -1
- data/lib/picky/configuration/field.rb +8 -10
- data/lib/picky/configuration/indexes.rb +6 -6
- data/lib/picky/configuration/queries.rb +4 -3
- data/lib/picky/cores.rb +2 -2
- data/lib/picky/extensions/array.rb +2 -12
- data/lib/picky/generator.rb +27 -4
- data/lib/picky/index/bundle.rb +5 -41
- data/lib/picky/index/bundle_checker.rb +58 -0
- data/lib/picky/index/type.rb +4 -1
- data/lib/picky/index/wrappers/exact_first.rb +57 -0
- data/lib/picky/indexes.rb +12 -19
- data/lib/picky/loader.rb +7 -8
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/combinations.rb +9 -6
- data/lib/picky/query/combinator.rb +11 -5
- data/lib/picky/rack/harakiri.rb +1 -1
- data/lib/picky/results/base.rb +4 -12
- data/lib/picky/results/live.rb +0 -6
- data/lib/picky/routing.rb +17 -17
- data/lib/picky/sources/csv.rb +1 -2
- data/lib/picky/sources/db.rb +0 -1
- data/lib/picky/sources/delicious.rb +41 -0
- data/lib/picky/tokenizers/base.rb +52 -43
- data/lib/picky/tokenizers/default/index.rb +7 -0
- data/lib/picky/tokenizers/default/query.rb +7 -0
- data/lib/picky/tokenizers/index.rb +0 -9
- data/lib/picky/tokenizers/query.rb +0 -9
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/cache.rake +41 -48
- data/lib/tasks/framework.rake +1 -1
- data/lib/tasks/index.rake +22 -12
- data/lib/tasks/server.rake +3 -3
- data/lib/tasks/shortcuts.rake +9 -2
- data/lib/tasks/statistics.rake +8 -8
- data/lib/tasks/try.rake +4 -2
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/application.rb +7 -3
- data/spec/lib/cacher/partial/default_spec.rb +1 -1
- data/spec/lib/cacher/partial/none_spec.rb +12 -0
- data/spec/lib/cacher/partial/subtoken_spec.rb +29 -1
- data/spec/lib/configuration/field_spec.rb +162 -3
- data/spec/lib/configuration/indexes_spec.rb +150 -0
- data/spec/lib/cores_spec.rb +43 -0
- data/spec/lib/extensions/module_spec.rb +27 -16
- data/spec/lib/generator_spec.rb +3 -3
- data/spec/lib/index/bundle_checker_spec.rb +67 -0
- data/spec/lib/index/bundle_spec.rb +0 -50
- data/spec/lib/index/type_spec.rb +47 -0
- data/spec/lib/index/wrappers/exact_first_spec.rb +95 -0
- data/spec/lib/indexers/base_spec.rb +18 -2
- data/spec/lib/loader_spec.rb +21 -1
- data/spec/lib/query/allocation_spec.rb +25 -0
- data/spec/lib/query/base_spec.rb +37 -0
- data/spec/lib/query/combination_spec.rb +10 -1
- data/spec/lib/query/combinations_spec.rb +82 -3
- data/spec/lib/query/combinator_spec.rb +45 -0
- data/spec/lib/query/token_spec.rb +24 -0
- data/spec/lib/rack/harakiri_spec.rb +28 -0
- data/spec/lib/results/base_spec.rb +24 -0
- data/spec/lib/results/live_spec.rb +15 -0
- data/spec/lib/routing_spec.rb +5 -0
- data/spec/lib/sources/db_spec.rb +31 -1
- data/spec/lib/sources/delicious_spec.rb +75 -0
- data/spec/lib/tokenizers/base_spec.rb +160 -49
- data/spec/lib/tokenizers/default/index_spec.rb +11 -0
- data/spec/lib/tokenizers/default/query_spec.rb +11 -0
- metadata +26 -5
- data/lib/picky/index/combined.rb +0 -45
- data/lib/picky/tokenizers/default.rb +0 -3
@@ -6,56 +6,167 @@ describe Tokenizers::Base do
|
|
6
6
|
before(:each) do
|
7
7
|
@tokenizer = Tokenizers::Base.new
|
8
8
|
end
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
9
|
+
|
10
|
+
describe "removes_characters_after_splitting" do
|
11
|
+
context "without removes_characters_after_splitting called" do
|
12
|
+
it "has remove_after_normalizing_illegals" do
|
13
|
+
lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
|
14
|
+
end
|
15
|
+
it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
|
16
|
+
unchanging = stub :unchanging
|
17
|
+
@tokenizer.remove_after_normalizing_illegals unchanging
|
18
|
+
end
|
19
|
+
end
|
20
|
+
context "with removes_characters_after_splitting called" do
|
21
|
+
before(:each) do
|
22
|
+
@tokenizer.removes_characters_after_splitting(/[afo]/)
|
23
|
+
end
|
24
|
+
it "has remove_after_normalizing_illegals" do
|
25
|
+
lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
|
26
|
+
end
|
27
|
+
it "removes illegal characters" do
|
28
|
+
@tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "normalizes_words" do
|
34
|
+
context "without normalizes_words called" do
|
35
|
+
it "has normalize_with_patterns" do
|
36
|
+
lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
|
37
|
+
end
|
38
|
+
it 'should define a method normalize_with_patterns does nothing' do
|
39
|
+
unchanging = stub :unchanging
|
40
|
+
@tokenizer.normalize_with_patterns(unchanging).should == unchanging
|
41
|
+
end
|
42
|
+
end
|
43
|
+
context "with normalizes_words called" do
|
44
|
+
before(:each) do
|
45
|
+
@tokenizer.normalizes_words([
|
46
|
+
[/st\./, 'sankt'],
|
47
|
+
[/stras?s?e?/, 'str']
|
48
|
+
])
|
49
|
+
end
|
50
|
+
it "has normalize_with_patterns" do
|
51
|
+
lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
|
52
|
+
end
|
53
|
+
it "normalizes, but just the first one" do
|
54
|
+
@tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
describe "splits_text_on" do
|
60
|
+
context "without splits_text_on called" do
|
61
|
+
it "has split" do
|
62
|
+
lambda { @tokenizer.split('any') }.should_not raise_error
|
63
|
+
end
|
64
|
+
it 'should define a method split that splits by default on \s' do
|
65
|
+
@tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
|
66
|
+
end
|
67
|
+
end
|
68
|
+
context "with removes_characters called" do
|
69
|
+
before(:each) do
|
70
|
+
@tokenizer.splits_text_on(/[\s\.\/]/)
|
71
|
+
end
|
72
|
+
it "has split" do
|
73
|
+
lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
|
74
|
+
end
|
75
|
+
it "removes illegal characters" do
|
76
|
+
@tokenizer.split('a b/c.d').should == ['a','b','c','d']
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
describe "removes_characters" do
|
82
|
+
context "without removes_characters called" do
|
83
|
+
it "has remove_illegals" do
|
84
|
+
lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
|
85
|
+
end
|
86
|
+
it 'should define a method remove_illegals that does nothing' do
|
87
|
+
unchanging = stub :unchanging
|
88
|
+
@tokenizer.remove_illegals unchanging
|
89
|
+
end
|
90
|
+
end
|
91
|
+
context "with removes_characters called" do
|
92
|
+
before(:each) do
|
93
|
+
@tokenizer.removes_characters(/[afo]/)
|
94
|
+
end
|
95
|
+
it "has remove_illegals" do
|
96
|
+
lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
|
97
|
+
end
|
98
|
+
it "removes illegal characters" do
|
99
|
+
@tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
describe 'contracts_expressions' do
|
105
|
+
context 'without contract_expressions called' do
|
106
|
+
it 'should define a method contract' do
|
107
|
+
lambda { @tokenizer.contract('from this text') }.should_not raise_error
|
108
|
+
end
|
109
|
+
it 'should define a method contract that does nothing' do
|
110
|
+
unchanging = stub :unchanging
|
111
|
+
@tokenizer.contract unchanging
|
112
|
+
end
|
113
|
+
end
|
114
|
+
context 'with contracts_expressions called' do
|
115
|
+
before(:each) do
|
116
|
+
@tokenizer.contracts_expressions(/Mister|Mr./, 'mr')
|
117
|
+
end
|
118
|
+
it 'should define a method remove_stopwords' do
|
119
|
+
lambda { @tokenizer.contract('from this text') }.should_not raise_error
|
120
|
+
end
|
121
|
+
it 'should define a method contract that contracts expressions' do
|
122
|
+
@tokenizer.contract('Mister Meyer, Mr. Peter').should == 'mr Meyer, mr Peter'
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
describe 'stopwords' do
|
128
|
+
context 'without stopwords given' do
|
129
|
+
it 'should define a method remove_stopwords' do
|
130
|
+
lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
131
|
+
end
|
132
|
+
it 'should define a method remove_stopwords that does nothing' do
|
133
|
+
@tokenizer.remove_stopwords('from this text').should == 'from this text'
|
134
|
+
end
|
135
|
+
it 'should define a method remove_non_single_stopwords' do
|
136
|
+
lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
|
137
|
+
|
138
|
+
end
|
139
|
+
end
|
140
|
+
context 'with stopwords given' do
|
141
|
+
before(:each) do
|
142
|
+
@tokenizer.stopwords(/r|e/)
|
143
|
+
end
|
144
|
+
it 'should define a method remove_stopwords' do
|
145
|
+
lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
146
|
+
end
|
147
|
+
it 'should define a method stopwords that removes stopwords' do
|
148
|
+
@tokenizer.remove_stopwords('from this text').should == 'fom this txt'
|
149
|
+
end
|
150
|
+
it 'should define a method remove_non_single_stopwords' do
|
151
|
+
lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
|
152
|
+
end
|
153
|
+
it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
|
154
|
+
@tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
|
155
|
+
end
|
156
|
+
it 'should define a method remove_non_single_stopwords that does not single stopwords' do
|
157
|
+
@tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
|
158
|
+
end
|
159
|
+
end
|
160
|
+
context 'error case' do
|
161
|
+
before(:each) do
|
162
|
+
@tokenizer.stopwords(/any/)
|
163
|
+
end
|
164
|
+
it 'should not remove non-single stopwords with a star' do
|
165
|
+
@tokenizer.remove_non_single_stopwords('a*').should == 'a*'
|
166
|
+
end
|
167
|
+
it 'should not remove non-single stopwords with a tilde' do
|
168
|
+
@tokenizer.remove_non_single_stopwords('a~').should == 'a~'
|
57
169
|
end
|
58
170
|
end
|
59
171
|
end
|
60
|
-
|
61
172
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
7
|
+
- 9
|
8
8
|
- 0
|
9
|
-
version: 0.
|
9
|
+
version: 0.9.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Florian Hanke
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-10-
|
17
|
+
date: 2010-10-26 00:00:00 +02:00
|
18
18
|
default_executable: picky
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -75,9 +75,10 @@ files:
|
|
75
75
|
- lib/picky/helpers/gc.rb
|
76
76
|
- lib/picky/helpers/measuring.rb
|
77
77
|
- lib/picky/index/bundle.rb
|
78
|
+
- lib/picky/index/bundle_checker.rb
|
78
79
|
- lib/picky/index/category.rb
|
79
|
-
- lib/picky/index/combined.rb
|
80
80
|
- lib/picky/index/type.rb
|
81
|
+
- lib/picky/index/wrappers/exact_first.rb
|
81
82
|
- lib/picky/indexers/base.rb
|
82
83
|
- lib/picky/indexers/default.rb
|
83
84
|
- lib/picky/indexers/field.rb
|
@@ -111,8 +112,10 @@ files:
|
|
111
112
|
- lib/picky/sources/base.rb
|
112
113
|
- lib/picky/sources/csv.rb
|
113
114
|
- lib/picky/sources/db.rb
|
115
|
+
- lib/picky/sources/delicious.rb
|
114
116
|
- lib/picky/tokenizers/base.rb
|
115
|
-
- lib/picky/tokenizers/default.rb
|
117
|
+
- lib/picky/tokenizers/default/index.rb
|
118
|
+
- lib/picky/tokenizers/default/query.rb
|
116
119
|
- lib/picky/tokenizers/index.rb
|
117
120
|
- lib/picky/tokenizers/query.rb
|
118
121
|
- lib/picky/umlaut_substituter.rb
|
@@ -145,6 +148,7 @@ files:
|
|
145
148
|
- spec/ext/performant_spec.rb
|
146
149
|
- spec/lib/application_spec.rb
|
147
150
|
- spec/lib/cacher/partial/default_spec.rb
|
151
|
+
- spec/lib/cacher/partial/none_spec.rb
|
148
152
|
- spec/lib/cacher/partial/subtoken_spec.rb
|
149
153
|
- spec/lib/cacher/partial_generator_spec.rb
|
150
154
|
- spec/lib/cacher/similarity/double_levenshtone_spec.rb
|
@@ -153,6 +157,7 @@ files:
|
|
153
157
|
- spec/lib/cacher/weights/logarithmic_spec.rb
|
154
158
|
- spec/lib/cacher/weights_generator_spec.rb
|
155
159
|
- spec/lib/configuration/field_spec.rb
|
160
|
+
- spec/lib/configuration/indexes_spec.rb
|
156
161
|
- spec/lib/configuration/type_spec.rb
|
157
162
|
- spec/lib/cores_spec.rb
|
158
163
|
- spec/lib/extensions/array_spec.rb
|
@@ -164,9 +169,12 @@ files:
|
|
164
169
|
- spec/lib/helpers/cache_spec.rb
|
165
170
|
- spec/lib/helpers/gc_spec.rb
|
166
171
|
- spec/lib/helpers/measuring_spec.rb
|
172
|
+
- spec/lib/index/bundle_checker_spec.rb
|
167
173
|
- spec/lib/index/bundle_partial_generation_speed_spec.rb
|
168
174
|
- spec/lib/index/bundle_spec.rb
|
169
175
|
- spec/lib/index/category_spec.rb
|
176
|
+
- spec/lib/index/type_spec.rb
|
177
|
+
- spec/lib/index/wrappers/exact_first_spec.rb
|
170
178
|
- spec/lib/indexers/base_spec.rb
|
171
179
|
- spec/lib/indexers/field_spec.rb
|
172
180
|
- spec/lib/loader_spec.rb
|
@@ -186,11 +194,15 @@ files:
|
|
186
194
|
- spec/lib/query/weights_spec.rb
|
187
195
|
- spec/lib/rack/harakiri_spec.rb
|
188
196
|
- spec/lib/results/base_spec.rb
|
197
|
+
- spec/lib/results/live_spec.rb
|
189
198
|
- spec/lib/routing_spec.rb
|
190
199
|
- spec/lib/solr/schema_generator_spec.rb
|
191
200
|
- spec/lib/sources/csv_spec.rb
|
192
201
|
- spec/lib/sources/db_spec.rb
|
202
|
+
- spec/lib/sources/delicious_spec.rb
|
193
203
|
- spec/lib/tokenizers/base_spec.rb
|
204
|
+
- spec/lib/tokenizers/default/index_spec.rb
|
205
|
+
- spec/lib/tokenizers/default/query_spec.rb
|
194
206
|
- spec/lib/tokenizers/index_spec.rb
|
195
207
|
- spec/lib/tokenizers/query_spec.rb
|
196
208
|
- spec/lib/umlaut_substituter_spec.rb
|
@@ -232,6 +244,7 @@ test_files:
|
|
232
244
|
- spec/ext/performant_spec.rb
|
233
245
|
- spec/lib/application_spec.rb
|
234
246
|
- spec/lib/cacher/partial/default_spec.rb
|
247
|
+
- spec/lib/cacher/partial/none_spec.rb
|
235
248
|
- spec/lib/cacher/partial/subtoken_spec.rb
|
236
249
|
- spec/lib/cacher/partial_generator_spec.rb
|
237
250
|
- spec/lib/cacher/similarity/double_levenshtone_spec.rb
|
@@ -240,6 +253,7 @@ test_files:
|
|
240
253
|
- spec/lib/cacher/weights/logarithmic_spec.rb
|
241
254
|
- spec/lib/cacher/weights_generator_spec.rb
|
242
255
|
- spec/lib/configuration/field_spec.rb
|
256
|
+
- spec/lib/configuration/indexes_spec.rb
|
243
257
|
- spec/lib/configuration/type_spec.rb
|
244
258
|
- spec/lib/cores_spec.rb
|
245
259
|
- spec/lib/extensions/array_spec.rb
|
@@ -251,9 +265,12 @@ test_files:
|
|
251
265
|
- spec/lib/helpers/cache_spec.rb
|
252
266
|
- spec/lib/helpers/gc_spec.rb
|
253
267
|
- spec/lib/helpers/measuring_spec.rb
|
268
|
+
- spec/lib/index/bundle_checker_spec.rb
|
254
269
|
- spec/lib/index/bundle_partial_generation_speed_spec.rb
|
255
270
|
- spec/lib/index/bundle_spec.rb
|
256
271
|
- spec/lib/index/category_spec.rb
|
272
|
+
- spec/lib/index/type_spec.rb
|
273
|
+
- spec/lib/index/wrappers/exact_first_spec.rb
|
257
274
|
- spec/lib/indexers/base_spec.rb
|
258
275
|
- spec/lib/indexers/field_spec.rb
|
259
276
|
- spec/lib/loader_spec.rb
|
@@ -273,11 +290,15 @@ test_files:
|
|
273
290
|
- spec/lib/query/weights_spec.rb
|
274
291
|
- spec/lib/rack/harakiri_spec.rb
|
275
292
|
- spec/lib/results/base_spec.rb
|
293
|
+
- spec/lib/results/live_spec.rb
|
276
294
|
- spec/lib/routing_spec.rb
|
277
295
|
- spec/lib/solr/schema_generator_spec.rb
|
278
296
|
- spec/lib/sources/csv_spec.rb
|
279
297
|
- spec/lib/sources/db_spec.rb
|
298
|
+
- spec/lib/sources/delicious_spec.rb
|
280
299
|
- spec/lib/tokenizers/base_spec.rb
|
300
|
+
- spec/lib/tokenizers/default/index_spec.rb
|
301
|
+
- spec/lib/tokenizers/default/query_spec.rb
|
281
302
|
- spec/lib/tokenizers/index_spec.rb
|
282
303
|
- spec/lib/tokenizers/query_spec.rb
|
283
304
|
- spec/lib/umlaut_substituter_spec.rb
|
data/lib/picky/index/combined.rb
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
module Index
|
4
|
-
|
5
|
-
# This index combines an exact and partial index.
|
6
|
-
# It serves to order the results such that exact hits are found first.
|
7
|
-
#
|
8
|
-
# TODO Need to use the right subtokens. Bake in?
|
9
|
-
#
|
10
|
-
# TODO One can use it as a wrapper, and it will extract the indexes itself. Rename: ExactFirst.
|
11
|
-
#
|
12
|
-
class Combined < Bundle
|
13
|
-
|
14
|
-
delegate :similar,
|
15
|
-
:identifier,
|
16
|
-
:name,
|
17
|
-
:to => :@exact
|
18
|
-
delegate :type,
|
19
|
-
:category,
|
20
|
-
:weight,
|
21
|
-
:generate_partial_from,
|
22
|
-
:generate_caches_from_memory,
|
23
|
-
:generate_derived,
|
24
|
-
:dump,
|
25
|
-
:load,
|
26
|
-
:to => :@partial
|
27
|
-
|
28
|
-
# TODO initialize type_or_category # => installs itself on all exact and partial
|
29
|
-
#
|
30
|
-
def initialize exact, partial
|
31
|
-
@exact = exact
|
32
|
-
@partial = partial
|
33
|
-
end
|
34
|
-
|
35
|
-
def ids text
|
36
|
-
@exact.ids(text) + @partial.ids(text)
|
37
|
-
end
|
38
|
-
|
39
|
-
def weight text
|
40
|
-
[@exact.weight(text) || 0, @partial.weight(text) || 0].max
|
41
|
-
end
|
42
|
-
|
43
|
-
end
|
44
|
-
|
45
|
-
end
|