picky 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -207,8 +207,6 @@ class Application
207
207
  #
208
208
  # Warns if something is missing.
209
209
  #
210
- # TODO Good specs.
211
- #
212
210
  def check # :nodoc:
213
211
  warnings = []
214
212
  warnings << check_external_interface
@@ -1,32 +1,30 @@
1
1
  module Configuration # :nodoc:all
2
-
2
+
3
3
  # Holds the configuration for a
4
4
  # index/category combination.
5
5
  #
6
- # TODO Rename paths?
7
- #
8
6
  class Index
9
-
7
+
10
8
  attr_reader :index, :category
11
-
9
+
12
10
  def initialize index, category
13
11
  @index = index
14
12
  @category = category
15
13
  end
16
-
14
+
17
15
  def index_name
18
16
  @index_name ||= index.name
19
17
  end
20
18
  def category_name
21
19
  @category_name ||= category.name
22
20
  end
23
-
21
+
24
22
  #
25
23
  #
26
24
  def index_path bundle_name, name
27
25
  "#{index_directory}/#{category_name}_#{bundle_name}_#{name}"
28
26
  end
29
-
27
+
30
28
  # Was: search_index_file_name
31
29
  #
32
30
  def prepared_index_path
@@ -36,21 +34,17 @@ module Configuration # :nodoc:all
36
34
  @prepared_index_file ||= Internals::Index::File::Text.new prepared_index_path
37
35
  @prepared_index_file.open_for_indexing &block
38
36
  end
39
-
40
- # def file_name
41
- # @file_name ||= "#{@index_name}_#{@category_name}"
42
- # end
43
-
37
+
44
38
  # Identifier for internal use.
45
39
  #
46
40
  def identifier
47
41
  @identifier ||= "#{index_name}:#{category_name}"
48
42
  end
49
-
43
+
50
44
  def to_s
51
45
  "#{index_name} #{category_name}"
52
46
  end
53
-
47
+
54
48
  def self.index_root
55
49
  @index_root ||= "#{PICKY_ROOT}/index"
56
50
  end
@@ -67,7 +61,7 @@ module Configuration # :nodoc:all
67
61
  def prepare_index_directory
68
62
  FileUtils.mkdir_p index_directory
69
63
  end
70
-
64
+
71
65
  end
72
-
66
+
73
67
  end
@@ -8,7 +8,7 @@ module Internals
8
8
 
9
9
  # Writes the hash into Redis.
10
10
  #
11
- # TODO Could we use multi?
11
+ # Note: We could use multi, but it did not help.
12
12
  #
13
13
  def dump hash
14
14
  redis = backend
@@ -67,7 +67,7 @@ module Internals
67
67
  # for each found similar token.
68
68
  #
69
69
  def similar_possible_for token
70
- # Get as many similar tokens as necessary
70
+ # Get as many tokens as necessary
71
71
  #
72
72
  tokens = similar_tokens_for token
73
73
  # possible combinations
@@ -105,9 +105,13 @@ module Internals
105
105
  # (Also none of the categories matched, but the ignore unassigned
106
106
  # tokens option is true)
107
107
  #
108
+ # TODO Could use Combinations class here and remove the inject.
109
+ #
108
110
  def possible_for token, preselected_categories = nil
109
- possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
110
- possible.compact!
111
+ possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
112
+ combination = category.combination_for token
113
+ combination ? combinations << combination : combinations
114
+ end
111
115
  # This is an optimization to mark tokens that are ignored.
112
116
  #
113
117
  return if ignore_unassigned_tokens && possible.empty?
@@ -1,35 +1,35 @@
1
1
  module Indexed
2
2
  module Wrappers
3
-
3
+
4
4
  module Bundle
5
-
5
+
6
6
  # A calculation rewrites the symbol into a float.
7
7
  #
8
- # TODO I really need to allow integers as keys. The code below is just not ok.
8
+ # TODO I really need to allow integers as keys. The code below is just not up to the needed quality.
9
9
  #
10
10
  class Calculation < Wrapper
11
-
11
+
12
12
  #
13
13
  #
14
14
  def recalculate float
15
15
  float
16
16
  end
17
-
17
+
18
18
  #
19
19
  #
20
20
  def ids sym
21
21
  @bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
22
22
  end
23
-
23
+
24
24
  #
25
25
  #
26
26
  def weight sym
27
27
  @bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
28
28
  end
29
-
29
+
30
30
  end
31
-
31
+
32
32
  end
33
-
33
+
34
34
  end
35
35
  end
@@ -3,18 +3,18 @@ module Internals
3
3
  # encoding: utf-8
4
4
  #
5
5
  module Indexed
6
-
6
+
7
7
  # TODO Spec
8
8
  #
9
9
  module Wrappers
10
-
10
+
11
11
  # This index combines an exact and partial index.
12
12
  # It serves to order the results such that exact hits are found first.
13
13
  #
14
14
  # TODO Need to use the right subtokens. Bake in?
15
15
  #
16
- class ExactFirst < Indexed::Bundle::Memory
17
-
16
+ class ExactFirst < Indexed::Bundle::Base
17
+
18
18
  delegate :similar,
19
19
  :identifier,
20
20
  :name,
@@ -28,12 +28,12 @@ module Internals
28
28
  :dump,
29
29
  :load,
30
30
  :to => :@partial
31
-
31
+
32
32
  def initialize category
33
33
  @exact = category.exact
34
34
  @partial = category.partial
35
35
  end
36
-
36
+
37
37
  def self.wrap index_or_category
38
38
  if index_or_category.respond_to? :categories
39
39
  wrap_each_of index_or_category.categories
@@ -47,19 +47,19 @@ module Internals
47
47
  def self.wrap_each_of categories
48
48
  categories.categories.collect! { |category| new(category) }
49
49
  end
50
-
50
+
51
51
  def ids text
52
52
  @exact.ids(text) + @partial.ids(text)
53
53
  end
54
-
54
+
55
55
  def weight text
56
56
  [@exact.weight(text) || 0, @partial.weight(text) || 0].max
57
57
  end
58
-
58
+
59
59
  end
60
-
60
+
61
61
  end
62
-
62
+
63
63
  end
64
-
64
+
65
65
  end
@@ -5,8 +5,6 @@ module Internals
5
5
  #
6
6
  class Allocations # :nodoc:all
7
7
 
8
- # TODO Remove size
9
- #
10
8
  delegate :each, :inject, :empty?, :size, :to => :@allocations
11
9
  attr_reader :total
12
10
 
@@ -23,7 +21,7 @@ module Internals
23
21
  end
24
22
  # Sort the allocations.
25
23
  #
26
- def sort
24
+ def sort!
27
25
  @allocations.sort!
28
26
  end
29
27
 
@@ -116,7 +114,7 @@ module Internals
116
114
  end
117
115
 
118
116
  end
119
-
117
+
120
118
  end
121
-
119
+
122
120
  end
@@ -8,12 +8,12 @@ module Internals
8
8
  # An allocation consists of a number of combinations.
9
9
  #
10
10
  module Combinations # :nodoc:all
11
-
11
+
12
12
  # Memory Combinations contain specific methods for
13
13
  # calculating score and ids in memory.
14
14
  #
15
15
  class Memory < Base
16
-
16
+
17
17
  # Returns the result ids for the allocation.
18
18
  #
19
19
  # Sorts the ids by size and & through them in the following order (sizes):
@@ -24,7 +24,7 @@ module Internals
24
24
  # Note: Uses a C-optimized intersection routine for speed and memory efficiency.
25
25
  #
26
26
  # Note: In the memory based version we ignore the (amount) needed hint.
27
- # TODO Not ignore it?
27
+ # We might use the fact to optimize the algorithm.
28
28
  #
29
29
  def ids _, _
30
30
  return [] if @combinations.empty?
@@ -43,16 +43,16 @@ module Internals
43
43
  # this precondition for a fast algorithm is always given.
44
44
  #
45
45
  id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
46
-
46
+
47
47
  # Call the optimized C algorithm.
48
48
  #
49
49
  Performant::Array.memory_efficient_intersect id_arrays
50
50
  end
51
-
51
+
52
52
  end
53
-
53
+
54
54
  end
55
-
55
+
56
56
  end
57
-
57
+
58
58
  end
@@ -27,6 +27,9 @@ module Internals
27
27
 
28
28
  # Returns a number of possible allocations for the given tokens.
29
29
  #
30
+ def sorted_allocations_for tokens
31
+
32
+ end
30
33
  def allocations_for tokens
31
34
  Allocations.new allocations_ary_for(tokens)
32
35
  end
@@ -40,21 +43,10 @@ module Internals
40
43
  #
41
44
  possible_combinations = tokens.possible_combinations_in index
42
45
 
43
- # Optimization for ignoring tokens that allocate to nothing and
44
- # can be ignored.
45
- # For example in a special search, where "florian" is not
46
- # mapped to any category.
47
- #
48
- possible_combinations.compact!
49
-
50
46
  # Generate all possible combinations.
51
47
  #
52
48
  expanded_combinations = expand_combinations_from possible_combinations
53
49
 
54
- # If there are none, try the next allocation.
55
- #
56
- return [] unless expanded_combinations
57
-
58
50
  # Add the wrapped possible allocations to the ones we already have.
59
51
  #
60
52
  expanded_combinations.map! do |expanded_combination|
@@ -62,7 +54,7 @@ module Internals
62
54
  end
63
55
  end
64
56
 
65
- # This is the core of the search engine.
57
+ # This is the core of the search engine. No kidding.
66
58
  #
67
59
  # Gets an array of
68
60
  # [
@@ -122,7 +114,7 @@ module Internals
122
114
  # If an element has size 0, this means one of the
123
115
  # tokens could not be allocated.
124
116
  #
125
- return if possible_combinations.any?(&:empty?)
117
+ return [] if possible_combinations.any?(&:empty?)
126
118
 
127
119
  # Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
128
120
  #
@@ -170,7 +162,7 @@ module Internals
170
162
  combinations
171
163
  end
172
164
 
173
- return if possible_combinations.empty?
165
+ return [] if possible_combinations.empty?
174
166
 
175
167
  possible_combinations.shift.zip *possible_combinations
176
168
  end
@@ -28,14 +28,18 @@ module Internals
28
28
  # Note: Use this in the search engine if you need a qualified
29
29
  # and normalized token. I.e. one prepared for a search.
30
30
  #
31
- def self.processed text
32
- token = new text
33
- token.qualify
34
- token.extract_original
35
- token.partialize
36
- token.similarize
37
- token.remove_illegals
38
- token
31
+ def self.processed text, downcase = true
32
+ new(text).process downcase
33
+ end
34
+ def process downcases = true
35
+ qualify
36
+ extract_original
37
+ downcase if downcases
38
+ partialize
39
+ similarize
40
+ remove_illegals
41
+ symbolize
42
+ self
39
43
  end
40
44
 
41
45
  # This returns a predefined category name if the user has given one.
@@ -56,6 +60,12 @@ module Internals
56
60
  @original = @text.dup
57
61
  end
58
62
 
63
+ # Downcases the text.
64
+ #
65
+ def downcase
66
+ @text.downcase!
67
+ end
68
+
59
69
  # Partial is a conditional setter.
60
70
  #
61
71
  # It is only settable if it hasn't been set yet.
@@ -69,15 +79,19 @@ module Internals
69
79
 
70
80
  # If the text ends with *, partialize it. If with ", don't.
71
81
  #
82
+ # The latter wins. So "hello*" will not be partially searched.
83
+ #
72
84
  @@no_partial = /\"\Z/
73
85
  @@partial = /\*\Z/
74
86
  def partialize
75
- self.partial = false and return if @text =~ @@no_partial
76
- self.partial = true if @text =~ @@partial
87
+ self.partial = false and return unless @text !~ @@no_partial
88
+ self.partial = true unless @text !~ @@partial
77
89
  end
78
90
 
79
91
  # If the text ends with ~ similarize it. If with ", don't.
80
92
  #
93
+ # The latter wins.
94
+ #
81
95
  @@no_similar = /\"\Z/
82
96
  @@similar = /\~\Z/
83
97
  def similarize
@@ -96,21 +110,10 @@ module Internals
96
110
  @text.gsub! @@illegals, '' unless @text.blank?
97
111
  end
98
112
 
99
- # Visitor for tokenizer.
100
113
  #
101
- # TODO Rewrite!!!
102
114
  #
103
- def tokenize_with tokenizer
104
- @text = tokenizer.normalize @text
105
- end
106
- # TODO spec!
107
- #
108
- # TODO Rewrite!!
109
- #
110
- def tokenized tokenizer
111
- tokenizer.tokenize(@text.to_s).each do |text|
112
- yield text
113
- end
115
+ def symbolize
116
+ @text = @text.to_sym
114
117
  end
115
118
 
116
119
  # Returns an array of possible combinations.
@@ -181,6 +184,12 @@ module Internals
181
184
  "#{similar?? :similarity : :index}:#{@text}"
182
185
  end
183
186
 
187
+ # If the originals & the text are the same, they are the same.
188
+ #
189
+ def == other
190
+ self.original == other.original && self.text == other.text
191
+ end
192
+
184
193
  # Displays the qualifier text and the text, joined.
185
194
  #
186
195
  # e.g. name:meier
@@ -1,31 +1,36 @@
1
1
  # encoding: utf-8
2
2
  #
3
3
  module Internals
4
-
4
+
5
5
  #
6
6
  #
7
7
  module Query
8
-
8
+
9
9
  # This class primarily handles switching through similar token constellations.
10
10
  #
11
11
  class Tokens # :nodoc:all
12
-
12
+
13
13
  # Basically delegates to its internal tokens array.
14
14
  #
15
15
  self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
16
-
16
+
17
17
  #
18
18
  #
19
19
  def initialize tokens = []
20
20
  @tokens = tokens
21
21
  end
22
-
22
+ def self.processed words, downcase = true
23
+ new words.collect! { |word| Token.processed word, downcase }
24
+ end
25
+
26
+ # Tokenizes each token.
23
27
  #
28
+ # Note: Passed tokenizer needs to offer #normalize(text).
24
29
  #
25
30
  def tokenize_with tokenizer
26
31
  @tokens.each { |token| token.tokenize_with(tokenizer) }
27
32
  end
28
-
33
+
29
34
  # Generates an array in the form of
30
35
  # [
31
36
  # [combination], # of token 1
@@ -33,14 +38,17 @@ module Internals
33
38
  # [combination, combination] # of token 3
34
39
  # ]
35
40
  #
36
- # TODO If we want token behaviour defined per Query, we can
37
- # compact! here
38
- #
39
41
  def possible_combinations_in type
40
42
  @tokens.inject([]) do |combinations, token|
41
- combinations << token.possible_combinations_in(type)
43
+ possible_combinations = token.possible_combinations_in type
44
+
45
+ # Note: Optimization for ignoring tokens that allocate to nothing and
46
+ # can be ignored.
47
+ # For example in a special search, where "florian" is not
48
+ # mapped to any category.
49
+ #
50
+ possible_combinations ? combinations << possible_combinations : combinations
42
51
  end
43
- # TODO compact! if ignore_unassigned_tokens
44
52
  end
45
53
 
46
54
  # Makes the last of the tokens partial.
@@ -57,33 +65,37 @@ module Internals
57
65
  def cap? maximum
58
66
  @tokens.size > maximum
59
67
  end
60
-
68
+
61
69
  # Rejects blank tokens.
62
70
  #
63
71
  def reject
64
72
  @tokens.reject! &:blank?
65
73
  end
66
-
74
+
67
75
  # Returns a solr query.
68
76
  #
69
77
  def to_solr_query
70
78
  @tokens.map(&:to_solr).join ' '
71
79
  end
72
-
80
+
73
81
  #
74
82
  #
75
83
  def originals
76
84
  @tokens.map(&:original)
77
85
  end
78
-
86
+
87
+ def == other
88
+ self.tokens == other.tokens
89
+ end
90
+
79
91
  # Just join the token original texts.
80
92
  #
81
93
  def to_s
82
94
  originals.join ' '
83
95
  end
84
-
96
+
85
97
  end
86
-
98
+
87
99
  end
88
-
100
+
89
101
  end
@@ -3,19 +3,19 @@ module Query
3
3
  # Calculates weights for certain combinations.
4
4
  #
5
5
  class Weights # :nodoc:all
6
-
6
+
7
7
  #
8
8
  #
9
9
  def initialize weights = {}
10
10
  @weights = weights
11
11
  end
12
-
12
+
13
13
  # Get the weight of an allocation.
14
14
  #
15
15
  def weight_for clustered
16
16
  @weights[clustered] || 0
17
17
  end
18
-
18
+
19
19
  # Returns an energy term E for allocation. this turns into a probability
20
20
  # by P(allocation) = 1/Z * exp (-1/T * E(allocation)),
21
21
  # where Z is the normalizing partition function
@@ -31,24 +31,26 @@ module Query
31
31
  # Note: Cache this if more complicated weighings become necessary.
32
32
  #
33
33
  def score combinations
34
- # TODO Or hide: combinations#to_weights_key
34
+ # TODO Or hide: combinations#to_weights_key (but it's an array, so…)
35
35
  #
36
36
  # TODO combinations could cluster uniq as combinations are added (since combinations don't change).
37
37
  #
38
+ # TODO Or it could use actual combinations? Could it? Or make combinations comparable to Symbols.
39
+ #
38
40
  weight_for combinations.map(&:category_name).clustered_uniq_fast
39
41
  end
40
-
42
+
41
43
  # Are there any weights defined?
42
44
  #
43
45
  def empty?
44
46
  @weights.empty?
45
47
  end
46
-
48
+
47
49
  # Prints out a nice representation of the configured weights.
48
50
  #
49
51
  def to_s
50
52
  @weights.to_s
51
53
  end
52
-
54
+
53
55
  end
54
56
  end
@@ -20,6 +20,7 @@ Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@r
20
20
  Normalizes words: #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
21
21
  Rejects tokens? #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
22
22
  Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-' }
23
+ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
23
24
  TOKENIZER
24
25
  end
25
26
 
@@ -125,6 +126,13 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
125
126
  tokens.reject! &@reject_condition
126
127
  end
127
128
 
129
+ def case_sensitive case_sensitive
130
+ @case_sensitive = case_sensitive
131
+ end
132
+ def downcase?
133
+ !@case_sensitive
134
+ end
135
+
128
136
  # Checks if the right argument type has been given.
129
137
  #
130
138
  def check_argument_in method, type, argument, &condition
@@ -156,6 +164,7 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
156
164
  normalizes_words options[:normalizes_words] if options[:normalizes_words]
157
165
  removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
158
166
  substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
167
+ case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil?
159
168
 
160
169
  # Defaults.
161
170
  #
@@ -163,37 +172,54 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
163
172
  reject_token_if &(options[:reject_token_if] || :blank?)
164
173
  end
165
174
 
166
- # Hooks.
175
+ # Default preprocessing hook.
167
176
  #
168
-
169
- # Preprocessing.
177
+ # Does:
178
+ # 1. Character substitution.
179
+ # 2. Remove illegal expressions.
180
+ # 3. Remove non-single stopwords. (Stopwords that occur with other words)
170
181
  #
171
- def preprocess text; end
182
+ def preprocess text
183
+ text = substitute_characters text
184
+ remove_illegals text
185
+ # We do not remove single stopwords e.g. in the indexer for
186
+ # an entirely different reason than in the query tokenizer.
187
+ # An indexed thing with just name "UND" (a possible stopword)
188
+ # should not lose its name.
189
+ #
190
+ remove_non_single_stopwords text
191
+ text
192
+ end
172
193
  # Pretokenizing.
173
194
  #
174
- def pretokenize text; end
175
- # Postprocessing.
195
+ # Does:
196
+ # 1. Split the text into words.
197
+ # 2. Normalize each word.
198
+ #
199
+ def pretokenize text
200
+ words = split text
201
+ words.collect! do |word|
202
+ normalize_with_patterns word
203
+ word
204
+ end
205
+ end
206
+ # Basic postprocessing (overridden in both query/index tokenizers).
176
207
  #
177
208
  def process tokens
178
209
  reject tokens # Reject any tokens that don't meet criteria
179
210
  tokens
180
211
  end
181
212
 
182
- # Converts words into real tokens.
183
- #
184
- def tokens_for words
185
- Internals::Query::Tokens.new words.collect! { |word| token_for word }
186
- end
213
+ # # Converts words into real tokens.
214
+ # #
215
+ # def tokens_for words
216
+ # Internals::Query::Tokens.new words.collect! { |word| token_for word }
217
+ # end
187
218
  # Turns non-blank text into symbols.
188
219
  #
189
220
  def symbolize text
190
221
  text.blank? ? nil : text.to_sym
191
222
  end
192
- # Returns a tokens object.
193
- #
194
- def empty_tokens
195
- Internals::Query::Tokens.new
196
- end
197
223
 
198
224
  end
199
225
 
@@ -15,45 +15,16 @@ module Internals
15
15
  @default ||= new
16
16
  end
17
17
 
18
- # Default indexing preprocessing hook.
19
- #
20
- # Does:
21
- # 1. Character substitution.
22
- # 2. Downcasing.
23
- # 3. Remove illegal expressions.
24
- # 4. Remove non-single stopwords. (Stopwords that occur with other words)
25
- #
26
- def preprocess text
27
- text = substitute_characters text
28
- text.downcase!
29
- remove_illegals text
30
- # we do not remove single stopwords for an entirely different
31
- # reason than in the query tokenizer.
32
- # An indexed thing with just name "UND" (a possible stopword) should not lose its name.
33
- #
34
- remove_non_single_stopwords text
35
- text
36
- end
37
-
38
- # Default indexing pretokenizing hook.
39
- #
40
- # Does:
41
- # 1. Split the text into words.
42
- # 2. Normalize each word.
43
- #
44
- def pretokenize text
45
- words = split text
46
- words.collect! do |word|
47
- normalize_with_patterns word
48
- word
49
- end
50
- end
51
-
52
18
  # Does not actually return a token, but a
53
19
  # symbol "token".
54
20
  #
55
- def token_for text
56
- symbolize text
21
+ def tokens_for words
22
+ words.collect! { |word| word.downcase! if downcase?; word.to_sym }
23
+ end
24
+ # Returns empty tokens.
25
+ #
26
+ def empty_tokens
27
+ []
57
28
  end
58
29
 
59
30
  end
@@ -3,7 +3,7 @@
3
3
  module Internals
4
4
 
5
5
  module Tokenizers
6
-
6
+
7
7
  # There are a few class methods that you can use to configure how a query works.
8
8
  #
9
9
  # removes_characters regexp
@@ -14,66 +14,46 @@ module Internals
14
14
  # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
15
15
  #
16
16
  class Query < Base
17
-
17
+
18
18
  def self.default= new_default
19
19
  @default = new_default
20
20
  end
21
21
  def self.default
22
22
  @default ||= new
23
23
  end
24
-
24
+
25
25
  attr_reader :maximum_tokens
26
-
26
+
27
27
  def initialize options = {}
28
28
  super options
29
29
  @maximum_tokens = options[:maximum_tokens] || 5
30
30
  end
31
-
32
- def preprocess text
33
- remove_illegals text # Remove illegal characters
34
- remove_non_single_stopwords text # remove stop words
35
- text
36
- end
37
-
38
- # Split the text and put some back together.
39
- #
40
- # TODO Make the same as in indexing?
41
- #
42
- def pretokenize text
43
- split text
44
- end
45
-
31
+
46
32
  # Let each token process itself.
47
33
  # Reject, limit, and partialize tokens.
48
34
  #
35
+ # In querying we work with real tokens (in indexing it's just symbols).
36
+ #
49
37
  def process tokens
50
- tokens.tokenize_with self
51
- tokens.reject # Reject any tokens that don't meet criteria
52
- tokens.cap maximum_tokens # Cut off superfluous tokens
53
- tokens.partialize_last # Set certain tokens as partial
38
+ tokens.reject # Reject any tokens that don't meet criteria.
39
+ tokens.cap maximum_tokens # Cut off superfluous tokens.
40
+ tokens.partialize_last # Set certain tokens as partial.
54
41
  tokens
55
42
  end
56
-
57
- # Called by the token.
58
- #
59
- # TODO Perhaps move to Normalizer?
43
+
44
+ # Converts words into real tokens.
60
45
  #
61
- def normalize text
62
- text = substitute_characters text # Substitute special characters
63
- text.downcase! # Downcase all text
64
- normalize_with_patterns text # normalize
65
- text.to_sym # symbolize
46
+ def tokens_for words
47
+ Internals::Query::Tokens.processed words, downcase?
66
48
  end
67
-
68
- # Returns a token for a word.
69
- # The basic query tokenizer uses new tokens.
49
+ # Returns a tokens object.
70
50
  #
71
- def token_for word
72
- Internals::Query::Token.processed word
51
+ def empty_tokens
52
+ Internals::Query::Tokens.new
73
53
  end
74
-
54
+
75
55
  end
76
-
56
+
77
57
  end
78
-
58
+
79
59
  end
data/lib/picky/loader.rb CHANGED
@@ -179,8 +179,6 @@ module Loader # :nodoc:all
179
179
  load_internals 'indexed/categories'
180
180
  load_internals 'indexed/index'
181
181
 
182
- # TODO Ok here?
183
- #
184
182
  load_internals 'indexed/wrappers/exact_first'
185
183
 
186
184
  # Bundle Wrapper
data/lib/picky/search.rb CHANGED
@@ -146,7 +146,7 @@ class Search
146
146
  # Sort the allocations.
147
147
  # (allocations are sorted according to score, highest to lowest)
148
148
  #
149
- allocations.sort
149
+ allocations.sort!
150
150
 
151
151
  # Return the allocations.
152
152
  #
@@ -1,17 +1,9 @@
1
- # TODO This file needs some love.
1
+ # Server tasks, like starting/stopping/restarting.
2
2
  #
3
3
  namespace :server do
4
-
5
- def chdir_to_root
6
- Dir.chdir PICKY_ROOT
7
- end
8
-
9
- def current_pid
10
- pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
11
- pid.blank? ? nil : pid.chomp
12
- end
13
-
4
+
14
5
  # desc "Start the unicorns. (Wehee!)"
6
+ #
15
7
  task :start => :framework do
16
8
  chdir_to_root
17
9
  daemonize = PICKY_ENVIRONMENT == 'production' ? '-D' : ''
@@ -19,17 +11,27 @@ namespace :server do
19
11
  puts "Running \`#{command}\`."
20
12
  exec command
21
13
  end
22
-
14
+
23
15
  # desc "Stop the unicorns. (Blam!)"
16
+ #
24
17
  task :stop => :framework do
25
18
  `kill -QUIT #{current_pid}` if current_pid
26
19
  end
27
-
20
+
28
21
  # desc "Restart the unicorns."
29
22
  task :restart do
30
23
  Rake::Task[:"server:stop"].invoke
31
24
  sleep 5
32
25
  Rake::Task[:"server:start"].invoke
33
26
  end
34
-
27
+
28
+ def chdir_to_root
29
+ Dir.chdir PICKY_ROOT
30
+ end
31
+
32
+ def current_pid
33
+ pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
34
+ pid.blank? ? nil : pid.chomp
35
+ end
36
+
35
37
  end
data/lib/tasks/todo.rake CHANGED
@@ -1,5 +1,5 @@
1
1
  desc "Finds where Picky still needs input from you."
2
- task :todo do
2
+ task :'to#{}do' do
3
3
  if system "grep -e 'TODO.*' -n --color=always -R *"
4
4
  puts "Picky needs a bit of input from you there. Thanks."
5
5
  else
@@ -273,14 +273,14 @@ describe Internals::FrontendAdapters::Rack do
273
273
  end
274
274
  context 'without app' do
275
275
  context 'with url' do
276
- it 'should use the 404 with default_options from the url' do
276
+ it 'should use the 200 with default_options from the url' do
277
277
  @routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET", :path_info => /some_url/ }
278
278
 
279
279
  @rack_adapter.answer 'some_url'
280
280
  end
281
281
  end
282
282
  context 'without url' do
283
- it 'should use the 404 with default_options' do
283
+ it 'should use the 200 with default_options' do
284
284
  @routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET" }
285
285
 
286
286
  @rack_adapter.answer
@@ -5,7 +5,7 @@ require 'spec_helper'
5
5
  describe Internals::Tokenizers::Base do
6
6
 
7
7
  context 'with special instance' do
8
- let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello } }
8
+ let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }, case_sensitive: true }
9
9
  it 'rejects tokens with length < 2' do
10
10
  tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
11
11
  end
@@ -13,7 +13,7 @@ describe Internals::Tokenizers::Base do
13
13
  tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
14
14
  end
15
15
  describe 'to_s' do
16
- it 'does something' do
16
+ it 'spits out the right text' do
17
17
  tokenizer.to_s.should == <<-EXPECTED
18
18
  Removes characters: -
19
19
  Stopwords: -
@@ -22,6 +22,7 @@ Removes chars after split: -
22
22
  Normalizes words: -
23
23
  Rejects tokens? Yes, see line 8 in app/application.rb
24
24
  Substitutes chars? -
25
+ Case sensitive? Yes.
25
26
  EXPECTED
26
27
  end
27
28
  end
@@ -31,7 +32,7 @@ EXPECTED
31
32
  let(:tokenizer) { described_class.new }
32
33
 
33
34
  describe 'to_s' do
34
- it 'does something' do
35
+ it 'spits out the right text' do
35
36
  tokenizer.to_s.should == <<-EXPECTED
36
37
  Removes characters: -
37
38
  Stopwords: -
@@ -40,6 +41,7 @@ Removes chars after split: -
40
41
  Normalizes words: -
41
42
  Rejects tokens? -
42
43
  Substitutes chars? -
44
+ Case sensitive? -
43
45
  EXPECTED
44
46
  end
45
47
  end
@@ -41,6 +41,7 @@ describe Internals::Tokenizers::Query do
41
41
  it 'should call methods in order' do
42
42
  text = stub :text
43
43
 
44
+ tokenizer.should_receive(:substitute_characters).once.with(text).and_return text
44
45
  tokenizer.should_receive(:remove_illegals).once.ordered.with text
45
46
  tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
46
47
 
@@ -57,13 +58,7 @@ describe Internals::Tokenizers::Query do
57
58
  before(:each) do
58
59
  @tokens = mock :tokens, :null_object => true
59
60
  end
60
- it 'should tokenize the tokens' do
61
- @tokens.should_receive(:tokenize_with).once.with tokenizer
62
-
63
- tokenizer.process @tokens
64
- end
65
61
  it 'should call methods on the tokens in order' do
66
- @tokens.should_receive(:tokenize_with).once.ordered
67
62
  @tokens.should_receive(:reject).once.ordered
68
63
  @tokens.should_receive(:cap).once.ordered
69
64
  @tokens.should_receive(:partialize_last).once.ordered
@@ -122,13 +117,5 @@ describe Internals::Tokenizers::Query do
122
117
  tokenizer.tokenize('').map(&:to_s).should == []
123
118
  end
124
119
  end
125
- describe "token_for" do
126
- it "should get a preprocessed token" do
127
- text = stub(:text)
128
- Internals::Query::Token.should_receive(:processed).with text
129
-
130
- tokenizer.token_for text
131
- end
132
- end
133
120
 
134
121
  end
@@ -64,7 +64,7 @@ describe 'Query::Combination' do
64
64
 
65
65
  describe 'ids' do
66
66
  it 'should call ids with the text on bundle' do
67
- @bundle.should_receive(:ids).once.with 'some_text'
67
+ @bundle.should_receive(:ids).once.with :some_text
68
68
 
69
69
  @combination.ids
70
70
  end
@@ -80,7 +80,7 @@ describe 'Query::Combination' do
80
80
 
81
81
  describe 'weight' do
82
82
  it 'should call weight with the text on bundle' do
83
- @bundle.should_receive(:weight).once.with 'some_text'
83
+ @bundle.should_receive(:weight).once.with :some_text
84
84
 
85
85
  @combination.weight
86
86
  end
@@ -44,17 +44,17 @@ describe Internals::Query::Indexes do
44
44
  it 'can handle empty combinations' do
45
45
  combinations = [[1,2,3], [:a, :b, :c], []]
46
46
 
47
- indexes.expand_combinations_from(combinations).should == nil
47
+ indexes.expand_combinations_from(combinations).should == []
48
48
  end
49
49
  it 'can handle empty combinations' do
50
50
  combinations = [[], [:a, :b, :c], []]
51
51
 
52
- indexes.expand_combinations_from(combinations).should == nil
52
+ indexes.expand_combinations_from(combinations).should == []
53
53
  end
54
54
  it 'can handle totally empty combinations' do
55
55
  combinations = [[], [], []]
56
56
 
57
- indexes.expand_combinations_from(combinations).should == nil
57
+ indexes.expand_combinations_from(combinations).should == []
58
58
  end
59
59
  it 'is fast in a complicated case' do
60
60
  combinations = [[1,2,3], [:a, :b, :c], [:k, :l]]
@@ -64,12 +64,12 @@ describe Internals::Query::Indexes do
64
64
  it 'is fast in a simple case' do
65
65
  combinations = [[1], [2], [3]]
66
66
 
67
- performance_of { indexes.expand_combinations_from(combinations) }.should < 0.00055
67
+ performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0006
68
68
  end
69
69
  it 'is very fast in a 1-empty case' do
70
70
  combinations = [[], [2], [3]]
71
71
 
72
- performance_of { indexes.expand_combinations_from(combinations) }.should < 0.00045
72
+ performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0005
73
73
  end
74
74
  it 'is very fast in a all-empty case' do
75
75
  combinations = [[], [], []]
@@ -8,6 +8,15 @@ describe Internals::Query::Token do
8
8
  Internals::Query::Qualifiers.instance.prepare
9
9
  end
10
10
 
11
+ describe '==' do
12
+ it 'is equal if the originals are equal' do
13
+ described_class.processed('similar~').should == described_class.processed('similar~')
14
+ end
15
+ it 'is not equal if the originals are not equal' do
16
+ described_class.processed('similar~').should_not == described_class.processed('similar')
17
+ end
18
+ end
19
+
11
20
  describe 'next_similar_token' do
12
21
  before(:each) do
13
22
  @bundle = stub :bundle, :similar => [:array, :of, :similar]
@@ -157,20 +166,29 @@ describe Internals::Query::Token do
157
166
  end
158
167
 
159
168
  describe 'processed' do
169
+ it 'should return a new token' do
170
+ described_class.processed('some text').should be_kind_of(described_class)
171
+ end
172
+ it 'generates a token' do
173
+ described_class.processed('some text').class.should == described_class
174
+ end
175
+ end
176
+
177
+ describe 'process' do
178
+ let(:token) { described_class.new 'any_text' }
179
+ it 'returns itself' do
180
+ token.process.should == token
181
+ end
160
182
  it 'should have an order' do
161
- token = stub :token
162
- described_class.should_receive(:new).once.and_return token
163
-
164
183
  token.should_receive(:qualify).once.ordered
165
184
  token.should_receive(:extract_original).once.ordered
185
+ token.should_receive(:downcase).once.ordered
166
186
  token.should_receive(:partialize).once.ordered
167
187
  token.should_receive(:similarize).once.ordered
168
188
  token.should_receive(:remove_illegals).once.ordered
189
+ token.should_receive(:symbolize).once.ordered
169
190
 
170
- described_class.processed :any_text
171
- end
172
- it 'should return a new token' do
173
- described_class.processed('some text').should be_kind_of(described_class)
191
+ token.process
174
192
  end
175
193
  end
176
194
 
@@ -352,6 +370,13 @@ describe Internals::Query::Token do
352
370
  before(:each) do
353
371
  @token = described_class.processed 'text*'
354
372
  end
373
+ it 'should not set partial' do
374
+ @token.instance_variable_set :@partial, false
375
+
376
+ @token.partial = true
377
+
378
+ @token.instance_variable_get(:@partial).should be_false
379
+ end
355
380
  it 'should not set partial' do
356
381
  @token.partial = false
357
382
 
@@ -382,20 +407,20 @@ describe Internals::Query::Token do
382
407
  it 'should remove *' do
383
408
  token = described_class.processed 'text*'
384
409
 
385
- token.text.should == 'text'
410
+ token.text.should == :text
386
411
  end
387
412
  it 'should remove ~' do
388
413
  token = described_class.processed 'text~'
389
414
 
390
- token.text.should == 'text'
415
+ token.text.should == :text
391
416
  end
392
417
  it 'should remove "' do
393
418
  token = described_class.processed 'text"'
394
419
 
395
- token.text.should == 'text'
420
+ token.text.should == :text
396
421
  end
397
422
  it "should pass on a processed text" do
398
- described_class.processed('text').text.should == 'text'
423
+ described_class.processed('text').text.should == :text
399
424
  end
400
425
  end
401
426
 
@@ -7,6 +7,35 @@ describe Internals::Query::Tokens do
7
7
  Internals::Query::Qualifiers.instance.prepare
8
8
  end
9
9
 
10
+ describe '.processed' do
11
+ it 'generates processed tokens from all words' do
12
+ expected = [
13
+ Internals::Query::Token.processed('this~'),
14
+ Internals::Query::Token.processed('is'),
15
+ Internals::Query::Token.processed('a'),
16
+ Internals::Query::Token.processed('sp:solr'),
17
+ Internals::Query::Token.processed('query"')
18
+ ]
19
+
20
+ described_class.should_receive(:new).once.with expected
21
+
22
+ described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
23
+ end
24
+ it 'generates processed tokens from all words' do
25
+ expected = [
26
+ Internals::Query::Token.processed('this~', false),
27
+ Internals::Query::Token.processed('is', false),
28
+ Internals::Query::Token.processed('a', false),
29
+ Internals::Query::Token.processed('sp:solr', false),
30
+ Internals::Query::Token.processed('query"', false)
31
+ ]
32
+
33
+ described_class.should_receive(:new).once.with expected
34
+
35
+ described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
36
+ end
37
+ end
38
+
10
39
  describe 'to_solr_query' do
11
40
  context 'many tokens' do
12
41
  before(:each) do
@@ -151,6 +180,16 @@ describe Internals::Query::Tokens do
151
180
  [:combination31, :combination32, :combination33]
152
181
  ]
153
182
  end
183
+ it 'should work correctly' do
184
+ @token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
185
+ @token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return nil
186
+ @token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
187
+
188
+ @tokens.possible_combinations_in(:some_index).should == [
189
+ [:combination11, :combination12],
190
+ [:combination31, :combination32, :combination33]
191
+ ]
192
+ end
154
193
  end
155
194
 
156
195
  describe 'to_s' do
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.0
5
+ version: 2.1.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - Florian Hanke
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-03-28 00:00:00 +02:00
13
+ date: 2011-04-07 00:00:00 +10:00
14
14
  default_executable: picky
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency