picky 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -207,8 +207,6 @@ class Application
207
207
  #
208
208
  # Warns if something is missing.
209
209
  #
210
- # TODO Good specs.
211
- #
212
210
  def check # :nodoc:
213
211
  warnings = []
214
212
  warnings << check_external_interface
@@ -1,32 +1,30 @@
1
1
  module Configuration # :nodoc:all
2
-
2
+
3
3
  # Holds the configuration for a
4
4
  # index/category combination.
5
5
  #
6
- # TODO Rename paths?
7
- #
8
6
  class Index
9
-
7
+
10
8
  attr_reader :index, :category
11
-
9
+
12
10
  def initialize index, category
13
11
  @index = index
14
12
  @category = category
15
13
  end
16
-
14
+
17
15
  def index_name
18
16
  @index_name ||= index.name
19
17
  end
20
18
  def category_name
21
19
  @category_name ||= category.name
22
20
  end
23
-
21
+
24
22
  #
25
23
  #
26
24
  def index_path bundle_name, name
27
25
  "#{index_directory}/#{category_name}_#{bundle_name}_#{name}"
28
26
  end
29
-
27
+
30
28
  # Was: search_index_file_name
31
29
  #
32
30
  def prepared_index_path
@@ -36,21 +34,17 @@ module Configuration # :nodoc:all
36
34
  @prepared_index_file ||= Internals::Index::File::Text.new prepared_index_path
37
35
  @prepared_index_file.open_for_indexing &block
38
36
  end
39
-
40
- # def file_name
41
- # @file_name ||= "#{@index_name}_#{@category_name}"
42
- # end
43
-
37
+
44
38
  # Identifier for internal use.
45
39
  #
46
40
  def identifier
47
41
  @identifier ||= "#{index_name}:#{category_name}"
48
42
  end
49
-
43
+
50
44
  def to_s
51
45
  "#{index_name} #{category_name}"
52
46
  end
53
-
47
+
54
48
  def self.index_root
55
49
  @index_root ||= "#{PICKY_ROOT}/index"
56
50
  end
@@ -67,7 +61,7 @@ module Configuration # :nodoc:all
67
61
  def prepare_index_directory
68
62
  FileUtils.mkdir_p index_directory
69
63
  end
70
-
64
+
71
65
  end
72
-
66
+
73
67
  end
@@ -8,7 +8,7 @@ module Internals
8
8
 
9
9
  # Writes the hash into Redis.
10
10
  #
11
- # TODO Could we use multi?
11
+ # Note: We could use multi, but it did not help.
12
12
  #
13
13
  def dump hash
14
14
  redis = backend
@@ -67,7 +67,7 @@ module Internals
67
67
  # for each found similar token.
68
68
  #
69
69
  def similar_possible_for token
70
- # Get as many similar tokens as necessary
70
+ # Get as many tokens as necessary
71
71
  #
72
72
  tokens = similar_tokens_for token
73
73
  # possible combinations
@@ -105,9 +105,13 @@ module Internals
105
105
  # (Also none of the categories matched, but the ignore unassigned
106
106
  # tokens option is true)
107
107
  #
108
+ # TODO Could use Combinations class here and remove the inject.
109
+ #
108
110
  def possible_for token, preselected_categories = nil
109
- possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
110
- possible.compact!
111
+ possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
112
+ combination = category.combination_for token
113
+ combination ? combinations << combination : combinations
114
+ end
111
115
  # This is an optimization to mark tokens that are ignored.
112
116
  #
113
117
  return if ignore_unassigned_tokens && possible.empty?
@@ -1,35 +1,35 @@
1
1
  module Indexed
2
2
  module Wrappers
3
-
3
+
4
4
  module Bundle
5
-
5
+
6
6
  # A calculation rewrites the symbol into a float.
7
7
  #
8
- # TODO I really need to allow integers as keys. The code below is just not ok.
8
+ # TODO I really need to allow integers as keys. The code below is just not up to the needed quality.
9
9
  #
10
10
  class Calculation < Wrapper
11
-
11
+
12
12
  #
13
13
  #
14
14
  def recalculate float
15
15
  float
16
16
  end
17
-
17
+
18
18
  #
19
19
  #
20
20
  def ids sym
21
21
  @bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
22
22
  end
23
-
23
+
24
24
  #
25
25
  #
26
26
  def weight sym
27
27
  @bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
28
28
  end
29
-
29
+
30
30
  end
31
-
31
+
32
32
  end
33
-
33
+
34
34
  end
35
35
  end
@@ -3,18 +3,18 @@ module Internals
3
3
  # encoding: utf-8
4
4
  #
5
5
  module Indexed
6
-
6
+
7
7
  # TODO Spec
8
8
  #
9
9
  module Wrappers
10
-
10
+
11
11
  # This index combines an exact and partial index.
12
12
  # It serves to order the results such that exact hits are found first.
13
13
  #
14
14
  # TODO Need to use the right subtokens. Bake in?
15
15
  #
16
- class ExactFirst < Indexed::Bundle::Memory
17
-
16
+ class ExactFirst < Indexed::Bundle::Base
17
+
18
18
  delegate :similar,
19
19
  :identifier,
20
20
  :name,
@@ -28,12 +28,12 @@ module Internals
28
28
  :dump,
29
29
  :load,
30
30
  :to => :@partial
31
-
31
+
32
32
  def initialize category
33
33
  @exact = category.exact
34
34
  @partial = category.partial
35
35
  end
36
-
36
+
37
37
  def self.wrap index_or_category
38
38
  if index_or_category.respond_to? :categories
39
39
  wrap_each_of index_or_category.categories
@@ -47,19 +47,19 @@ module Internals
47
47
  def self.wrap_each_of categories
48
48
  categories.categories.collect! { |category| new(category) }
49
49
  end
50
-
50
+
51
51
  def ids text
52
52
  @exact.ids(text) + @partial.ids(text)
53
53
  end
54
-
54
+
55
55
  def weight text
56
56
  [@exact.weight(text) || 0, @partial.weight(text) || 0].max
57
57
  end
58
-
58
+
59
59
  end
60
-
60
+
61
61
  end
62
-
62
+
63
63
  end
64
-
64
+
65
65
  end
@@ -5,8 +5,6 @@ module Internals
5
5
  #
6
6
  class Allocations # :nodoc:all
7
7
 
8
- # TODO Remove size
9
- #
10
8
  delegate :each, :inject, :empty?, :size, :to => :@allocations
11
9
  attr_reader :total
12
10
 
@@ -23,7 +21,7 @@ module Internals
23
21
  end
24
22
  # Sort the allocations.
25
23
  #
26
- def sort
24
+ def sort!
27
25
  @allocations.sort!
28
26
  end
29
27
 
@@ -116,7 +114,7 @@ module Internals
116
114
  end
117
115
 
118
116
  end
119
-
117
+
120
118
  end
121
-
119
+
122
120
  end
@@ -8,12 +8,12 @@ module Internals
8
8
  # An allocation consists of a number of combinations.
9
9
  #
10
10
  module Combinations # :nodoc:all
11
-
11
+
12
12
  # Memory Combinations contain specific methods for
13
13
  # calculating score and ids in memory.
14
14
  #
15
15
  class Memory < Base
16
-
16
+
17
17
  # Returns the result ids for the allocation.
18
18
  #
19
19
  # Sorts the ids by size and & through them in the following order (sizes):
@@ -24,7 +24,7 @@ module Internals
24
24
  # Note: Uses a C-optimized intersection routine for speed and memory efficiency.
25
25
  #
26
26
  # Note: In the memory based version we ignore the (amount) needed hint.
27
- # TODO Not ignore it?
27
+ # We might use the fact to optimize the algorithm.
28
28
  #
29
29
  def ids _, _
30
30
  return [] if @combinations.empty?
@@ -43,16 +43,16 @@ module Internals
43
43
  # this precondition for a fast algorithm is always given.
44
44
  #
45
45
  id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
46
-
46
+
47
47
  # Call the optimized C algorithm.
48
48
  #
49
49
  Performant::Array.memory_efficient_intersect id_arrays
50
50
  end
51
-
51
+
52
52
  end
53
-
53
+
54
54
  end
55
-
55
+
56
56
  end
57
-
57
+
58
58
  end
@@ -27,6 +27,9 @@ module Internals
27
27
 
28
28
  # Returns a number of possible allocations for the given tokens.
29
29
  #
30
+ def sorted_allocations_for tokens
31
+
32
+ end
30
33
  def allocations_for tokens
31
34
  Allocations.new allocations_ary_for(tokens)
32
35
  end
@@ -40,21 +43,10 @@ module Internals
40
43
  #
41
44
  possible_combinations = tokens.possible_combinations_in index
42
45
 
43
- # Optimization for ignoring tokens that allocate to nothing and
44
- # can be ignored.
45
- # For example in a special search, where "florian" is not
46
- # mapped to any category.
47
- #
48
- possible_combinations.compact!
49
-
50
46
  # Generate all possible combinations.
51
47
  #
52
48
  expanded_combinations = expand_combinations_from possible_combinations
53
49
 
54
- # If there are none, try the next allocation.
55
- #
56
- return [] unless expanded_combinations
57
-
58
50
  # Add the wrapped possible allocations to the ones we already have.
59
51
  #
60
52
  expanded_combinations.map! do |expanded_combination|
@@ -62,7 +54,7 @@ module Internals
62
54
  end
63
55
  end
64
56
 
65
- # This is the core of the search engine.
57
+ # This is the core of the search engine. No kidding.
66
58
  #
67
59
  # Gets an array of
68
60
  # [
@@ -122,7 +114,7 @@ module Internals
122
114
  # If an element has size 0, this means one of the
123
115
  # tokens could not be allocated.
124
116
  #
125
- return if possible_combinations.any?(&:empty?)
117
+ return [] if possible_combinations.any?(&:empty?)
126
118
 
127
119
  # Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
128
120
  #
@@ -170,7 +162,7 @@ module Internals
170
162
  combinations
171
163
  end
172
164
 
173
- return if possible_combinations.empty?
165
+ return [] if possible_combinations.empty?
174
166
 
175
167
  possible_combinations.shift.zip *possible_combinations
176
168
  end
@@ -28,14 +28,18 @@ module Internals
28
28
  # Note: Use this in the search engine if you need a qualified
29
29
  # and normalized token. I.e. one prepared for a search.
30
30
  #
31
- def self.processed text
32
- token = new text
33
- token.qualify
34
- token.extract_original
35
- token.partialize
36
- token.similarize
37
- token.remove_illegals
38
- token
31
+ def self.processed text, downcase = true
32
+ new(text).process downcase
33
+ end
34
+ def process downcases = true
35
+ qualify
36
+ extract_original
37
+ downcase if downcases
38
+ partialize
39
+ similarize
40
+ remove_illegals
41
+ symbolize
42
+ self
39
43
  end
40
44
 
41
45
  # This returns a predefined category name if the user has given one.
@@ -56,6 +60,12 @@ module Internals
56
60
  @original = @text.dup
57
61
  end
58
62
 
63
+ # Downcases the text.
64
+ #
65
+ def downcase
66
+ @text.downcase!
67
+ end
68
+
59
69
  # Partial is a conditional setter.
60
70
  #
61
71
  # It is only settable if it hasn't been set yet.
@@ -69,15 +79,19 @@ module Internals
69
79
 
70
80
  # If the text ends with *, partialize it. If with ", don't.
71
81
  #
82
+ # The latter wins. So "hello*" will not be partially searched.
83
+ #
72
84
  @@no_partial = /\"\Z/
73
85
  @@partial = /\*\Z/
74
86
  def partialize
75
- self.partial = false and return if @text =~ @@no_partial
76
- self.partial = true if @text =~ @@partial
87
+ self.partial = false and return unless @text !~ @@no_partial
88
+ self.partial = true unless @text !~ @@partial
77
89
  end
78
90
 
79
91
  # If the text ends with ~ similarize it. If with ", don't.
80
92
  #
93
+ # The latter wins.
94
+ #
81
95
  @@no_similar = /\"\Z/
82
96
  @@similar = /\~\Z/
83
97
  def similarize
@@ -96,21 +110,10 @@ module Internals
96
110
  @text.gsub! @@illegals, '' unless @text.blank?
97
111
  end
98
112
 
99
- # Visitor for tokenizer.
100
113
  #
101
- # TODO Rewrite!!!
102
114
  #
103
- def tokenize_with tokenizer
104
- @text = tokenizer.normalize @text
105
- end
106
- # TODO spec!
107
- #
108
- # TODO Rewrite!!
109
- #
110
- def tokenized tokenizer
111
- tokenizer.tokenize(@text.to_s).each do |text|
112
- yield text
113
- end
115
+ def symbolize
116
+ @text = @text.to_sym
114
117
  end
115
118
 
116
119
  # Returns an array of possible combinations.
@@ -181,6 +184,12 @@ module Internals
181
184
  "#{similar?? :similarity : :index}:#{@text}"
182
185
  end
183
186
 
187
+ # If the originals & the text are the same, they are the same.
188
+ #
189
+ def == other
190
+ self.original == other.original && self.text == other.text
191
+ end
192
+
184
193
  # Displays the qualifier text and the text, joined.
185
194
  #
186
195
  # e.g. name:meier
@@ -1,31 +1,36 @@
1
1
  # encoding: utf-8
2
2
  #
3
3
  module Internals
4
-
4
+
5
5
  #
6
6
  #
7
7
  module Query
8
-
8
+
9
9
  # This class primarily handles switching through similar token constellations.
10
10
  #
11
11
  class Tokens # :nodoc:all
12
-
12
+
13
13
  # Basically delegates to its internal tokens array.
14
14
  #
15
15
  self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
16
-
16
+
17
17
  #
18
18
  #
19
19
  def initialize tokens = []
20
20
  @tokens = tokens
21
21
  end
22
-
22
+ def self.processed words, downcase = true
23
+ new words.collect! { |word| Token.processed word, downcase }
24
+ end
25
+
26
+ # Tokenizes each token.
23
27
  #
28
+ # Note: Passed tokenizer needs to offer #normalize(text).
24
29
  #
25
30
  def tokenize_with tokenizer
26
31
  @tokens.each { |token| token.tokenize_with(tokenizer) }
27
32
  end
28
-
33
+
29
34
  # Generates an array in the form of
30
35
  # [
31
36
  # [combination], # of token 1
@@ -33,14 +38,17 @@ module Internals
33
38
  # [combination, combination] # of token 3
34
39
  # ]
35
40
  #
36
- # TODO If we want token behaviour defined per Query, we can
37
- # compact! here
38
- #
39
41
  def possible_combinations_in type
40
42
  @tokens.inject([]) do |combinations, token|
41
- combinations << token.possible_combinations_in(type)
43
+ possible_combinations = token.possible_combinations_in type
44
+
45
+ # Note: Optimization for ignoring tokens that allocate to nothing and
46
+ # can be ignored.
47
+ # For example in a special search, where "florian" is not
48
+ # mapped to any category.
49
+ #
50
+ possible_combinations ? combinations << possible_combinations : combinations
42
51
  end
43
- # TODO compact! if ignore_unassigned_tokens
44
52
  end
45
53
 
46
54
  # Makes the last of the tokens partial.
@@ -57,33 +65,37 @@ module Internals
57
65
  def cap? maximum
58
66
  @tokens.size > maximum
59
67
  end
60
-
68
+
61
69
  # Rejects blank tokens.
62
70
  #
63
71
  def reject
64
72
  @tokens.reject! &:blank?
65
73
  end
66
-
74
+
67
75
  # Returns a solr query.
68
76
  #
69
77
  def to_solr_query
70
78
  @tokens.map(&:to_solr).join ' '
71
79
  end
72
-
80
+
73
81
  #
74
82
  #
75
83
  def originals
76
84
  @tokens.map(&:original)
77
85
  end
78
-
86
+
87
+ def == other
88
+ self.tokens == other.tokens
89
+ end
90
+
79
91
  # Just join the token original texts.
80
92
  #
81
93
  def to_s
82
94
  originals.join ' '
83
95
  end
84
-
96
+
85
97
  end
86
-
98
+
87
99
  end
88
-
100
+
89
101
  end
@@ -3,19 +3,19 @@ module Query
3
3
  # Calculates weights for certain combinations.
4
4
  #
5
5
  class Weights # :nodoc:all
6
-
6
+
7
7
  #
8
8
  #
9
9
  def initialize weights = {}
10
10
  @weights = weights
11
11
  end
12
-
12
+
13
13
  # Get the weight of an allocation.
14
14
  #
15
15
  def weight_for clustered
16
16
  @weights[clustered] || 0
17
17
  end
18
-
18
+
19
19
  # Returns an energy term E for allocation. this turns into a probability
20
20
  # by P(allocation) = 1/Z * exp (-1/T * E(allocation)),
21
21
  # where Z is the normalizing partition function
@@ -31,24 +31,26 @@ module Query
31
31
  # Note: Cache this if more complicated weighings become necessary.
32
32
  #
33
33
  def score combinations
34
- # TODO Or hide: combinations#to_weights_key
34
+ # TODO Or hide: combinations#to_weights_key (but it's an array, so…)
35
35
  #
36
36
  # TODO combinations could cluster uniq as combinations are added (since combinations don't change).
37
37
  #
38
+ # TODO Or it could use actual combinations? Could it? Or make combinations comparable to Symbols.
39
+ #
38
40
  weight_for combinations.map(&:category_name).clustered_uniq_fast
39
41
  end
40
-
42
+
41
43
  # Are there any weights defined?
42
44
  #
43
45
  def empty?
44
46
  @weights.empty?
45
47
  end
46
-
48
+
47
49
  # Prints out a nice representation of the configured weights.
48
50
  #
49
51
  def to_s
50
52
  @weights.to_s
51
53
  end
52
-
54
+
53
55
  end
54
56
  end
@@ -20,6 +20,7 @@ Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@r
20
20
  Normalizes words: #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
21
21
  Rejects tokens? #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
22
22
  Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-' }
23
+ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
23
24
  TOKENIZER
24
25
  end
25
26
 
@@ -125,6 +126,13 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
125
126
  tokens.reject! &@reject_condition
126
127
  end
127
128
 
129
+ def case_sensitive case_sensitive
130
+ @case_sensitive = case_sensitive
131
+ end
132
+ def downcase?
133
+ !@case_sensitive
134
+ end
135
+
128
136
  # Checks if the right argument type has been given.
129
137
  #
130
138
  def check_argument_in method, type, argument, &condition
@@ -156,6 +164,7 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
156
164
  normalizes_words options[:normalizes_words] if options[:normalizes_words]
157
165
  removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
158
166
  substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
167
+ case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil?
159
168
 
160
169
  # Defaults.
161
170
  #
@@ -163,37 +172,54 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
163
172
  reject_token_if &(options[:reject_token_if] || :blank?)
164
173
  end
165
174
 
166
- # Hooks.
175
+ # Default preprocessing hook.
167
176
  #
168
-
169
- # Preprocessing.
177
+ # Does:
178
+ # 1. Character substitution.
179
+ # 2. Remove illegal expressions.
180
+ # 3. Remove non-single stopwords. (Stopwords that occur with other words)
170
181
  #
171
- def preprocess text; end
182
+ def preprocess text
183
+ text = substitute_characters text
184
+ remove_illegals text
185
+ # We do not remove single stopwords e.g. in the indexer for
186
+ # an entirely different reason than in the query tokenizer.
187
+ # An indexed thing with just name "UND" (a possible stopword)
188
+ # should not lose its name.
189
+ #
190
+ remove_non_single_stopwords text
191
+ text
192
+ end
172
193
  # Pretokenizing.
173
194
  #
174
- def pretokenize text; end
175
- # Postprocessing.
195
+ # Does:
196
+ # 1. Split the text into words.
197
+ # 2. Normalize each word.
198
+ #
199
+ def pretokenize text
200
+ words = split text
201
+ words.collect! do |word|
202
+ normalize_with_patterns word
203
+ word
204
+ end
205
+ end
206
+ # Basic postprocessing (overridden in both query/index tokenizers).
176
207
  #
177
208
  def process tokens
178
209
  reject tokens # Reject any tokens that don't meet criteria
179
210
  tokens
180
211
  end
181
212
 
182
- # Converts words into real tokens.
183
- #
184
- def tokens_for words
185
- Internals::Query::Tokens.new words.collect! { |word| token_for word }
186
- end
213
+ # # Converts words into real tokens.
214
+ # #
215
+ # def tokens_for words
216
+ # Internals::Query::Tokens.new words.collect! { |word| token_for word }
217
+ # end
187
218
  # Turns non-blank text into symbols.
188
219
  #
189
220
  def symbolize text
190
221
  text.blank? ? nil : text.to_sym
191
222
  end
192
- # Returns a tokens object.
193
- #
194
- def empty_tokens
195
- Internals::Query::Tokens.new
196
- end
197
223
 
198
224
  end
199
225
 
@@ -15,45 +15,16 @@ module Internals
15
15
  @default ||= new
16
16
  end
17
17
 
18
- # Default indexing preprocessing hook.
19
- #
20
- # Does:
21
- # 1. Character substitution.
22
- # 2. Downcasing.
23
- # 3. Remove illegal expressions.
24
- # 4. Remove non-single stopwords. (Stopwords that occur with other words)
25
- #
26
- def preprocess text
27
- text = substitute_characters text
28
- text.downcase!
29
- remove_illegals text
30
- # we do not remove single stopwords for an entirely different
31
- # reason than in the query tokenizer.
32
- # An indexed thing with just name "UND" (a possible stopword) should not lose its name.
33
- #
34
- remove_non_single_stopwords text
35
- text
36
- end
37
-
38
- # Default indexing pretokenizing hook.
39
- #
40
- # Does:
41
- # 1. Split the text into words.
42
- # 2. Normalize each word.
43
- #
44
- def pretokenize text
45
- words = split text
46
- words.collect! do |word|
47
- normalize_with_patterns word
48
- word
49
- end
50
- end
51
-
52
18
  # Does not actually return a token, but a
53
19
  # symbol "token".
54
20
  #
55
- def token_for text
56
- symbolize text
21
+ def tokens_for words
22
+ words.collect! { |word| word.downcase! if downcase?; word.to_sym }
23
+ end
24
+ # Returns empty tokens.
25
+ #
26
+ def empty_tokens
27
+ []
57
28
  end
58
29
 
59
30
  end
@@ -3,7 +3,7 @@
3
3
  module Internals
4
4
 
5
5
  module Tokenizers
6
-
6
+
7
7
  # There are a few class methods that you can use to configure how a query works.
8
8
  #
9
9
  # removes_characters regexp
@@ -14,66 +14,46 @@ module Internals
14
14
  # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
15
15
  #
16
16
  class Query < Base
17
-
17
+
18
18
  def self.default= new_default
19
19
  @default = new_default
20
20
  end
21
21
  def self.default
22
22
  @default ||= new
23
23
  end
24
-
24
+
25
25
  attr_reader :maximum_tokens
26
-
26
+
27
27
  def initialize options = {}
28
28
  super options
29
29
  @maximum_tokens = options[:maximum_tokens] || 5
30
30
  end
31
-
32
- def preprocess text
33
- remove_illegals text # Remove illegal characters
34
- remove_non_single_stopwords text # remove stop words
35
- text
36
- end
37
-
38
- # Split the text and put some back together.
39
- #
40
- # TODO Make the same as in indexing?
41
- #
42
- def pretokenize text
43
- split text
44
- end
45
-
31
+
46
32
  # Let each token process itself.
47
33
  # Reject, limit, and partialize tokens.
48
34
  #
35
+ # In querying we work with real tokens (in indexing it's just symbols).
36
+ #
49
37
  def process tokens
50
- tokens.tokenize_with self
51
- tokens.reject # Reject any tokens that don't meet criteria
52
- tokens.cap maximum_tokens # Cut off superfluous tokens
53
- tokens.partialize_last # Set certain tokens as partial
38
+ tokens.reject # Reject any tokens that don't meet criteria.
39
+ tokens.cap maximum_tokens # Cut off superfluous tokens.
40
+ tokens.partialize_last # Set certain tokens as partial.
54
41
  tokens
55
42
  end
56
-
57
- # Called by the token.
58
- #
59
- # TODO Perhaps move to Normalizer?
43
+
44
+ # Converts words into real tokens.
60
45
  #
61
- def normalize text
62
- text = substitute_characters text # Substitute special characters
63
- text.downcase! # Downcase all text
64
- normalize_with_patterns text # normalize
65
- text.to_sym # symbolize
46
+ def tokens_for words
47
+ Internals::Query::Tokens.processed words, downcase?
66
48
  end
67
-
68
- # Returns a token for a word.
69
- # The basic query tokenizer uses new tokens.
49
+ # Returns a tokens object.
70
50
  #
71
- def token_for word
72
- Internals::Query::Token.processed word
51
+ def empty_tokens
52
+ Internals::Query::Tokens.new
73
53
  end
74
-
54
+
75
55
  end
76
-
56
+
77
57
  end
78
-
58
+
79
59
  end
data/lib/picky/loader.rb CHANGED
@@ -179,8 +179,6 @@ module Loader # :nodoc:all
179
179
  load_internals 'indexed/categories'
180
180
  load_internals 'indexed/index'
181
181
 
182
- # TODO Ok here?
183
- #
184
182
  load_internals 'indexed/wrappers/exact_first'
185
183
 
186
184
  # Bundle Wrapper
data/lib/picky/search.rb CHANGED
@@ -146,7 +146,7 @@ class Search
146
146
  # Sort the allocations.
147
147
  # (allocations are sorted according to score, highest to lowest)
148
148
  #
149
- allocations.sort
149
+ allocations.sort!
150
150
 
151
151
  # Return the allocations.
152
152
  #
@@ -1,17 +1,9 @@
1
- # TODO This file needs some love.
1
+ # Server tasks, like starting/stopping/restarting.
2
2
  #
3
3
  namespace :server do
4
-
5
- def chdir_to_root
6
- Dir.chdir PICKY_ROOT
7
- end
8
-
9
- def current_pid
10
- pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
11
- pid.blank? ? nil : pid.chomp
12
- end
13
-
4
+
14
5
  # desc "Start the unicorns. (Wehee!)"
6
+ #
15
7
  task :start => :framework do
16
8
  chdir_to_root
17
9
  daemonize = PICKY_ENVIRONMENT == 'production' ? '-D' : ''
@@ -19,17 +11,27 @@ namespace :server do
19
11
  puts "Running \`#{command}\`."
20
12
  exec command
21
13
  end
22
-
14
+
23
15
  # desc "Stop the unicorns. (Blam!)"
16
+ #
24
17
  task :stop => :framework do
25
18
  `kill -QUIT #{current_pid}` if current_pid
26
19
  end
27
-
20
+
28
21
  # desc "Restart the unicorns."
29
22
  task :restart do
30
23
  Rake::Task[:"server:stop"].invoke
31
24
  sleep 5
32
25
  Rake::Task[:"server:start"].invoke
33
26
  end
34
-
27
+
28
+ def chdir_to_root
29
+ Dir.chdir PICKY_ROOT
30
+ end
31
+
32
+ def current_pid
33
+ pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
34
+ pid.blank? ? nil : pid.chomp
35
+ end
36
+
35
37
  end
data/lib/tasks/todo.rake CHANGED
@@ -1,5 +1,5 @@
1
1
  desc "Finds where Picky still needs input from you."
2
- task :todo do
2
+ task :'to#{}do' do
3
3
  if system "grep -e 'TODO.*' -n --color=always -R *"
4
4
  puts "Picky needs a bit of input from you there. Thanks."
5
5
  else
@@ -273,14 +273,14 @@ describe Internals::FrontendAdapters::Rack do
273
273
  end
274
274
  context 'without app' do
275
275
  context 'with url' do
276
- it 'should use the 404 with default_options from the url' do
276
+ it 'should use the 200 with default_options from the url' do
277
277
  @routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET", :path_info => /some_url/ }
278
278
 
279
279
  @rack_adapter.answer 'some_url'
280
280
  end
281
281
  end
282
282
  context 'without url' do
283
- it 'should use the 404 with default_options' do
283
+ it 'should use the 200 with default_options' do
284
284
  @routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET" }
285
285
 
286
286
  @rack_adapter.answer
@@ -5,7 +5,7 @@ require 'spec_helper'
5
5
  describe Internals::Tokenizers::Base do
6
6
 
7
7
  context 'with special instance' do
8
- let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello } }
8
+ let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }, case_sensitive: true }
9
9
  it 'rejects tokens with length < 2' do
10
10
  tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
11
11
  end
@@ -13,7 +13,7 @@ describe Internals::Tokenizers::Base do
13
13
  tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
14
14
  end
15
15
  describe 'to_s' do
16
- it 'does something' do
16
+ it 'spits out the right text' do
17
17
  tokenizer.to_s.should == <<-EXPECTED
18
18
  Removes characters: -
19
19
  Stopwords: -
@@ -22,6 +22,7 @@ Removes chars after split: -
22
22
  Normalizes words: -
23
23
  Rejects tokens? Yes, see line 8 in app/application.rb
24
24
  Substitutes chars? -
25
+ Case sensitive? Yes.
25
26
  EXPECTED
26
27
  end
27
28
  end
@@ -31,7 +32,7 @@ EXPECTED
31
32
  let(:tokenizer) { described_class.new }
32
33
 
33
34
  describe 'to_s' do
34
- it 'does something' do
35
+ it 'spits out the right text' do
35
36
  tokenizer.to_s.should == <<-EXPECTED
36
37
  Removes characters: -
37
38
  Stopwords: -
@@ -40,6 +41,7 @@ Removes chars after split: -
40
41
  Normalizes words: -
41
42
  Rejects tokens? -
42
43
  Substitutes chars? -
44
+ Case sensitive? -
43
45
  EXPECTED
44
46
  end
45
47
  end
@@ -41,6 +41,7 @@ describe Internals::Tokenizers::Query do
41
41
  it 'should call methods in order' do
42
42
  text = stub :text
43
43
 
44
+ tokenizer.should_receive(:substitute_characters).once.with(text).and_return text
44
45
  tokenizer.should_receive(:remove_illegals).once.ordered.with text
45
46
  tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
46
47
 
@@ -57,13 +58,7 @@ describe Internals::Tokenizers::Query do
57
58
  before(:each) do
58
59
  @tokens = mock :tokens, :null_object => true
59
60
  end
60
- it 'should tokenize the tokens' do
61
- @tokens.should_receive(:tokenize_with).once.with tokenizer
62
-
63
- tokenizer.process @tokens
64
- end
65
61
  it 'should call methods on the tokens in order' do
66
- @tokens.should_receive(:tokenize_with).once.ordered
67
62
  @tokens.should_receive(:reject).once.ordered
68
63
  @tokens.should_receive(:cap).once.ordered
69
64
  @tokens.should_receive(:partialize_last).once.ordered
@@ -122,13 +117,5 @@ describe Internals::Tokenizers::Query do
122
117
  tokenizer.tokenize('').map(&:to_s).should == []
123
118
  end
124
119
  end
125
- describe "token_for" do
126
- it "should get a preprocessed token" do
127
- text = stub(:text)
128
- Internals::Query::Token.should_receive(:processed).with text
129
-
130
- tokenizer.token_for text
131
- end
132
- end
133
120
 
134
121
  end
@@ -64,7 +64,7 @@ describe 'Query::Combination' do
64
64
 
65
65
  describe 'ids' do
66
66
  it 'should call ids with the text on bundle' do
67
- @bundle.should_receive(:ids).once.with 'some_text'
67
+ @bundle.should_receive(:ids).once.with :some_text
68
68
 
69
69
  @combination.ids
70
70
  end
@@ -80,7 +80,7 @@ describe 'Query::Combination' do
80
80
 
81
81
  describe 'weight' do
82
82
  it 'should call weight with the text on bundle' do
83
- @bundle.should_receive(:weight).once.with 'some_text'
83
+ @bundle.should_receive(:weight).once.with :some_text
84
84
 
85
85
  @combination.weight
86
86
  end
@@ -44,17 +44,17 @@ describe Internals::Query::Indexes do
44
44
  it 'can handle empty combinations' do
45
45
  combinations = [[1,2,3], [:a, :b, :c], []]
46
46
 
47
- indexes.expand_combinations_from(combinations).should == nil
47
+ indexes.expand_combinations_from(combinations).should == []
48
48
  end
49
49
  it 'can handle empty combinations' do
50
50
  combinations = [[], [:a, :b, :c], []]
51
51
 
52
- indexes.expand_combinations_from(combinations).should == nil
52
+ indexes.expand_combinations_from(combinations).should == []
53
53
  end
54
54
  it 'can handle totally empty combinations' do
55
55
  combinations = [[], [], []]
56
56
 
57
- indexes.expand_combinations_from(combinations).should == nil
57
+ indexes.expand_combinations_from(combinations).should == []
58
58
  end
59
59
  it 'is fast in a complicated case' do
60
60
  combinations = [[1,2,3], [:a, :b, :c], [:k, :l]]
@@ -64,12 +64,12 @@ describe Internals::Query::Indexes do
64
64
  it 'is fast in a simple case' do
65
65
  combinations = [[1], [2], [3]]
66
66
 
67
- performance_of { indexes.expand_combinations_from(combinations) }.should < 0.00055
67
+ performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0006
68
68
  end
69
69
  it 'is very fast in a 1-empty case' do
70
70
  combinations = [[], [2], [3]]
71
71
 
72
- performance_of { indexes.expand_combinations_from(combinations) }.should < 0.00045
72
+ performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0005
73
73
  end
74
74
  it 'is very fast in a all-empty case' do
75
75
  combinations = [[], [], []]
@@ -8,6 +8,15 @@ describe Internals::Query::Token do
8
8
  Internals::Query::Qualifiers.instance.prepare
9
9
  end
10
10
 
11
+ describe '==' do
12
+ it 'is equal if the originals are equal' do
13
+ described_class.processed('similar~').should == described_class.processed('similar~')
14
+ end
15
+ it 'is not equal if the originals are not equal' do
16
+ described_class.processed('similar~').should_not == described_class.processed('similar')
17
+ end
18
+ end
19
+
11
20
  describe 'next_similar_token' do
12
21
  before(:each) do
13
22
  @bundle = stub :bundle, :similar => [:array, :of, :similar]
@@ -157,20 +166,29 @@ describe Internals::Query::Token do
157
166
  end
158
167
 
159
168
  describe 'processed' do
169
+ it 'should return a new token' do
170
+ described_class.processed('some text').should be_kind_of(described_class)
171
+ end
172
+ it 'generates a token' do
173
+ described_class.processed('some text').class.should == described_class
174
+ end
175
+ end
176
+
177
+ describe 'process' do
178
+ let(:token) { described_class.new 'any_text' }
179
+ it 'returns itself' do
180
+ token.process.should == token
181
+ end
160
182
  it 'should have an order' do
161
- token = stub :token
162
- described_class.should_receive(:new).once.and_return token
163
-
164
183
  token.should_receive(:qualify).once.ordered
165
184
  token.should_receive(:extract_original).once.ordered
185
+ token.should_receive(:downcase).once.ordered
166
186
  token.should_receive(:partialize).once.ordered
167
187
  token.should_receive(:similarize).once.ordered
168
188
  token.should_receive(:remove_illegals).once.ordered
189
+ token.should_receive(:symbolize).once.ordered
169
190
 
170
- described_class.processed :any_text
171
- end
172
- it 'should return a new token' do
173
- described_class.processed('some text').should be_kind_of(described_class)
191
+ token.process
174
192
  end
175
193
  end
176
194
 
@@ -352,6 +370,13 @@ describe Internals::Query::Token do
352
370
  before(:each) do
353
371
  @token = described_class.processed 'text*'
354
372
  end
373
+ it 'should not set partial' do
374
+ @token.instance_variable_set :@partial, false
375
+
376
+ @token.partial = true
377
+
378
+ @token.instance_variable_get(:@partial).should be_false
379
+ end
355
380
  it 'should not set partial' do
356
381
  @token.partial = false
357
382
 
@@ -382,20 +407,20 @@ describe Internals::Query::Token do
382
407
  it 'should remove *' do
383
408
  token = described_class.processed 'text*'
384
409
 
385
- token.text.should == 'text'
410
+ token.text.should == :text
386
411
  end
387
412
  it 'should remove ~' do
388
413
  token = described_class.processed 'text~'
389
414
 
390
- token.text.should == 'text'
415
+ token.text.should == :text
391
416
  end
392
417
  it 'should remove "' do
393
418
  token = described_class.processed 'text"'
394
419
 
395
- token.text.should == 'text'
420
+ token.text.should == :text
396
421
  end
397
422
  it "should pass on a processed text" do
398
- described_class.processed('text').text.should == 'text'
423
+ described_class.processed('text').text.should == :text
399
424
  end
400
425
  end
401
426
 
@@ -7,6 +7,35 @@ describe Internals::Query::Tokens do
7
7
  Internals::Query::Qualifiers.instance.prepare
8
8
  end
9
9
 
10
+ describe '.processed' do
11
+ it 'generates processed tokens from all words' do
12
+ expected = [
13
+ Internals::Query::Token.processed('this~'),
14
+ Internals::Query::Token.processed('is'),
15
+ Internals::Query::Token.processed('a'),
16
+ Internals::Query::Token.processed('sp:solr'),
17
+ Internals::Query::Token.processed('query"')
18
+ ]
19
+
20
+ described_class.should_receive(:new).once.with expected
21
+
22
+ described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
23
+ end
24
+ it 'generates processed tokens from all words' do
25
+ expected = [
26
+ Internals::Query::Token.processed('this~', false),
27
+ Internals::Query::Token.processed('is', false),
28
+ Internals::Query::Token.processed('a', false),
29
+ Internals::Query::Token.processed('sp:solr', false),
30
+ Internals::Query::Token.processed('query"', false)
31
+ ]
32
+
33
+ described_class.should_receive(:new).once.with expected
34
+
35
+ described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
36
+ end
37
+ end
38
+
10
39
  describe 'to_solr_query' do
11
40
  context 'many tokens' do
12
41
  before(:each) do
@@ -151,6 +180,16 @@ describe Internals::Query::Tokens do
151
180
  [:combination31, :combination32, :combination33]
152
181
  ]
153
182
  end
183
+ it 'should work correctly' do
184
+ @token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
185
+ @token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return nil
186
+ @token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
187
+
188
+ @tokens.possible_combinations_in(:some_index).should == [
189
+ [:combination11, :combination12],
190
+ [:combination31, :combination32, :combination33]
191
+ ]
192
+ end
154
193
  end
155
194
 
156
195
  describe 'to_s' do
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.0
5
+ version: 2.1.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - Florian Hanke
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-03-28 00:00:00 +02:00
13
+ date: 2011-04-07 00:00:00 +10:00
14
14
  default_executable: picky
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency