picky 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/application.rb +0 -2
- data/lib/picky/internals/configuration/index.rb +11 -17
- data/lib/picky/internals/index/redis/string_hash.rb +1 -1
- data/lib/picky/internals/indexed/categories.rb +7 -3
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +9 -9
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +12 -12
- data/lib/picky/internals/query/allocations.rb +3 -5
- data/lib/picky/internals/query/combinations/memory.rb +8 -8
- data/lib/picky/internals/query/indexes.rb +6 -14
- data/lib/picky/internals/query/token.rb +32 -23
- data/lib/picky/internals/query/tokens.rb +30 -18
- data/lib/picky/internals/query/weights.rb +9 -7
- data/lib/picky/internals/tokenizers/base.rb +42 -16
- data/lib/picky/internals/tokenizers/index.rb +7 -36
- data/lib/picky/internals/tokenizers/query.rb +20 -40
- data/lib/picky/loader.rb +0 -2
- data/lib/picky/search.rb +1 -1
- data/lib/tasks/server.rake +16 -14
- data/lib/tasks/todo.rake +1 -1
- data/spec/lib/internals/frontend_adapters/rack_spec.rb +2 -2
- data/spec/lib/internals/tokenizers/base_spec.rb +5 -3
- data/spec/lib/internals/tokenizers/query_spec.rb +1 -14
- data/spec/lib/query/combination_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +5 -5
- data/spec/lib/query/token_spec.rb +36 -11
- data/spec/lib/query/tokens_spec.rb +39 -0
- metadata +2 -2
data/lib/picky/application.rb
CHANGED
@@ -1,32 +1,30 @@
|
|
1
1
|
module Configuration # :nodoc:all
|
2
|
-
|
2
|
+
|
3
3
|
# Holds the configuration for a
|
4
4
|
# index/category combination.
|
5
5
|
#
|
6
|
-
# TODO Rename paths?
|
7
|
-
#
|
8
6
|
class Index
|
9
|
-
|
7
|
+
|
10
8
|
attr_reader :index, :category
|
11
|
-
|
9
|
+
|
12
10
|
def initialize index, category
|
13
11
|
@index = index
|
14
12
|
@category = category
|
15
13
|
end
|
16
|
-
|
14
|
+
|
17
15
|
def index_name
|
18
16
|
@index_name ||= index.name
|
19
17
|
end
|
20
18
|
def category_name
|
21
19
|
@category_name ||= category.name
|
22
20
|
end
|
23
|
-
|
21
|
+
|
24
22
|
#
|
25
23
|
#
|
26
24
|
def index_path bundle_name, name
|
27
25
|
"#{index_directory}/#{category_name}_#{bundle_name}_#{name}"
|
28
26
|
end
|
29
|
-
|
27
|
+
|
30
28
|
# Was: search_index_file_name
|
31
29
|
#
|
32
30
|
def prepared_index_path
|
@@ -36,21 +34,17 @@ module Configuration # :nodoc:all
|
|
36
34
|
@prepared_index_file ||= Internals::Index::File::Text.new prepared_index_path
|
37
35
|
@prepared_index_file.open_for_indexing &block
|
38
36
|
end
|
39
|
-
|
40
|
-
# def file_name
|
41
|
-
# @file_name ||= "#{@index_name}_#{@category_name}"
|
42
|
-
# end
|
43
|
-
|
37
|
+
|
44
38
|
# Identifier for internal use.
|
45
39
|
#
|
46
40
|
def identifier
|
47
41
|
@identifier ||= "#{index_name}:#{category_name}"
|
48
42
|
end
|
49
|
-
|
43
|
+
|
50
44
|
def to_s
|
51
45
|
"#{index_name} #{category_name}"
|
52
46
|
end
|
53
|
-
|
47
|
+
|
54
48
|
def self.index_root
|
55
49
|
@index_root ||= "#{PICKY_ROOT}/index"
|
56
50
|
end
|
@@ -67,7 +61,7 @@ module Configuration # :nodoc:all
|
|
67
61
|
def prepare_index_directory
|
68
62
|
FileUtils.mkdir_p index_directory
|
69
63
|
end
|
70
|
-
|
64
|
+
|
71
65
|
end
|
72
|
-
|
66
|
+
|
73
67
|
end
|
@@ -67,7 +67,7 @@ module Internals
|
|
67
67
|
# for each found similar token.
|
68
68
|
#
|
69
69
|
def similar_possible_for token
|
70
|
-
# Get as many
|
70
|
+
# Get as many tokens as necessary
|
71
71
|
#
|
72
72
|
tokens = similar_tokens_for token
|
73
73
|
# possible combinations
|
@@ -105,9 +105,13 @@ module Internals
|
|
105
105
|
# (Also none of the categories matched, but the ignore unassigned
|
106
106
|
# tokens option is true)
|
107
107
|
#
|
108
|
+
# TODO Could use Combinations class here and remove the inject.
|
109
|
+
#
|
108
110
|
def possible_for token, preselected_categories = nil
|
109
|
-
possible = (preselected_categories || possible_categories(token)).
|
110
|
-
|
111
|
+
possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
|
112
|
+
combination = category.combination_for token
|
113
|
+
combination ? combinations << combination : combinations
|
114
|
+
end
|
111
115
|
# This is an optimization to mark tokens that are ignored.
|
112
116
|
#
|
113
117
|
return if ignore_unassigned_tokens && possible.empty?
|
@@ -1,35 +1,35 @@
|
|
1
1
|
module Indexed
|
2
2
|
module Wrappers
|
3
|
-
|
3
|
+
|
4
4
|
module Bundle
|
5
|
-
|
5
|
+
|
6
6
|
# A calculation rewrites the symbol into a float.
|
7
7
|
#
|
8
|
-
# TODO I really need to allow integers as keys. The code below is just not
|
8
|
+
# TODO I really need to allow integers as keys. The code below is just not up to the needed quality.
|
9
9
|
#
|
10
10
|
class Calculation < Wrapper
|
11
|
-
|
11
|
+
|
12
12
|
#
|
13
13
|
#
|
14
14
|
def recalculate float
|
15
15
|
float
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
#
|
19
19
|
#
|
20
20
|
def ids sym
|
21
21
|
@bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
#
|
25
25
|
#
|
26
26
|
def weight sym
|
27
27
|
@bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
end
|
35
35
|
end
|
@@ -3,18 +3,18 @@ module Internals
|
|
3
3
|
# encoding: utf-8
|
4
4
|
#
|
5
5
|
module Indexed
|
6
|
-
|
6
|
+
|
7
7
|
# TODO Spec
|
8
8
|
#
|
9
9
|
module Wrappers
|
10
|
-
|
10
|
+
|
11
11
|
# This index combines an exact and partial index.
|
12
12
|
# It serves to order the results such that exact hits are found first.
|
13
13
|
#
|
14
14
|
# TODO Need to use the right subtokens. Bake in?
|
15
15
|
#
|
16
|
-
class ExactFirst < Indexed::Bundle::
|
17
|
-
|
16
|
+
class ExactFirst < Indexed::Bundle::Base
|
17
|
+
|
18
18
|
delegate :similar,
|
19
19
|
:identifier,
|
20
20
|
:name,
|
@@ -28,12 +28,12 @@ module Internals
|
|
28
28
|
:dump,
|
29
29
|
:load,
|
30
30
|
:to => :@partial
|
31
|
-
|
31
|
+
|
32
32
|
def initialize category
|
33
33
|
@exact = category.exact
|
34
34
|
@partial = category.partial
|
35
35
|
end
|
36
|
-
|
36
|
+
|
37
37
|
def self.wrap index_or_category
|
38
38
|
if index_or_category.respond_to? :categories
|
39
39
|
wrap_each_of index_or_category.categories
|
@@ -47,19 +47,19 @@ module Internals
|
|
47
47
|
def self.wrap_each_of categories
|
48
48
|
categories.categories.collect! { |category| new(category) }
|
49
49
|
end
|
50
|
-
|
50
|
+
|
51
51
|
def ids text
|
52
52
|
@exact.ids(text) + @partial.ids(text)
|
53
53
|
end
|
54
|
-
|
54
|
+
|
55
55
|
def weight text
|
56
56
|
[@exact.weight(text) || 0, @partial.weight(text) || 0].max
|
57
57
|
end
|
58
|
-
|
58
|
+
|
59
59
|
end
|
60
|
-
|
60
|
+
|
61
61
|
end
|
62
|
-
|
62
|
+
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
end
|
@@ -5,8 +5,6 @@ module Internals
|
|
5
5
|
#
|
6
6
|
class Allocations # :nodoc:all
|
7
7
|
|
8
|
-
# TODO Remove size
|
9
|
-
#
|
10
8
|
delegate :each, :inject, :empty?, :size, :to => :@allocations
|
11
9
|
attr_reader :total
|
12
10
|
|
@@ -23,7 +21,7 @@ module Internals
|
|
23
21
|
end
|
24
22
|
# Sort the allocations.
|
25
23
|
#
|
26
|
-
def sort
|
24
|
+
def sort!
|
27
25
|
@allocations.sort!
|
28
26
|
end
|
29
27
|
|
@@ -116,7 +114,7 @@ module Internals
|
|
116
114
|
end
|
117
115
|
|
118
116
|
end
|
119
|
-
|
117
|
+
|
120
118
|
end
|
121
|
-
|
119
|
+
|
122
120
|
end
|
@@ -8,12 +8,12 @@ module Internals
|
|
8
8
|
# An allocation consists of a number of combinations.
|
9
9
|
#
|
10
10
|
module Combinations # :nodoc:all
|
11
|
-
|
11
|
+
|
12
12
|
# Memory Combinations contain specific methods for
|
13
13
|
# calculating score and ids in memory.
|
14
14
|
#
|
15
15
|
class Memory < Base
|
16
|
-
|
16
|
+
|
17
17
|
# Returns the result ids for the allocation.
|
18
18
|
#
|
19
19
|
# Sorts the ids by size and & through them in the following order (sizes):
|
@@ -24,7 +24,7 @@ module Internals
|
|
24
24
|
# Note: Uses a C-optimized intersection routine for speed and memory efficiency.
|
25
25
|
#
|
26
26
|
# Note: In the memory based version we ignore the (amount) needed hint.
|
27
|
-
#
|
27
|
+
# We might use the fact to optimize the algorithm.
|
28
28
|
#
|
29
29
|
def ids _, _
|
30
30
|
return [] if @combinations.empty?
|
@@ -43,16 +43,16 @@ module Internals
|
|
43
43
|
# this precondition for a fast algorithm is always given.
|
44
44
|
#
|
45
45
|
id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
|
46
|
-
|
46
|
+
|
47
47
|
# Call the optimized C algorithm.
|
48
48
|
#
|
49
49
|
Performant::Array.memory_efficient_intersect id_arrays
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
end
|
53
|
-
|
53
|
+
|
54
54
|
end
|
55
|
-
|
55
|
+
|
56
56
|
end
|
57
|
-
|
57
|
+
|
58
58
|
end
|
@@ -27,6 +27,9 @@ module Internals
|
|
27
27
|
|
28
28
|
# Returns a number of possible allocations for the given tokens.
|
29
29
|
#
|
30
|
+
def sorted_allocations_for tokens
|
31
|
+
|
32
|
+
end
|
30
33
|
def allocations_for tokens
|
31
34
|
Allocations.new allocations_ary_for(tokens)
|
32
35
|
end
|
@@ -40,21 +43,10 @@ module Internals
|
|
40
43
|
#
|
41
44
|
possible_combinations = tokens.possible_combinations_in index
|
42
45
|
|
43
|
-
# Optimization for ignoring tokens that allocate to nothing and
|
44
|
-
# can be ignored.
|
45
|
-
# For example in a special search, where "florian" is not
|
46
|
-
# mapped to any category.
|
47
|
-
#
|
48
|
-
possible_combinations.compact!
|
49
|
-
|
50
46
|
# Generate all possible combinations.
|
51
47
|
#
|
52
48
|
expanded_combinations = expand_combinations_from possible_combinations
|
53
49
|
|
54
|
-
# If there are none, try the next allocation.
|
55
|
-
#
|
56
|
-
return [] unless expanded_combinations
|
57
|
-
|
58
50
|
# Add the wrapped possible allocations to the ones we already have.
|
59
51
|
#
|
60
52
|
expanded_combinations.map! do |expanded_combination|
|
@@ -62,7 +54,7 @@ module Internals
|
|
62
54
|
end
|
63
55
|
end
|
64
56
|
|
65
|
-
# This is the core of the search engine.
|
57
|
+
# This is the core of the search engine. No kidding.
|
66
58
|
#
|
67
59
|
# Gets an array of
|
68
60
|
# [
|
@@ -122,7 +114,7 @@ module Internals
|
|
122
114
|
# If an element has size 0, this means one of the
|
123
115
|
# tokens could not be allocated.
|
124
116
|
#
|
125
|
-
return if possible_combinations.any?(&:empty?)
|
117
|
+
return [] if possible_combinations.any?(&:empty?)
|
126
118
|
|
127
119
|
# Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
|
128
120
|
#
|
@@ -170,7 +162,7 @@ module Internals
|
|
170
162
|
combinations
|
171
163
|
end
|
172
164
|
|
173
|
-
return if possible_combinations.empty?
|
165
|
+
return [] if possible_combinations.empty?
|
174
166
|
|
175
167
|
possible_combinations.shift.zip *possible_combinations
|
176
168
|
end
|
@@ -28,14 +28,18 @@ module Internals
|
|
28
28
|
# Note: Use this in the search engine if you need a qualified
|
29
29
|
# and normalized token. I.e. one prepared for a search.
|
30
30
|
#
|
31
|
-
def self.processed text
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
31
|
+
def self.processed text, downcase = true
|
32
|
+
new(text).process downcase
|
33
|
+
end
|
34
|
+
def process downcases = true
|
35
|
+
qualify
|
36
|
+
extract_original
|
37
|
+
downcase if downcases
|
38
|
+
partialize
|
39
|
+
similarize
|
40
|
+
remove_illegals
|
41
|
+
symbolize
|
42
|
+
self
|
39
43
|
end
|
40
44
|
|
41
45
|
# This returns a predefined category name if the user has given one.
|
@@ -56,6 +60,12 @@ module Internals
|
|
56
60
|
@original = @text.dup
|
57
61
|
end
|
58
62
|
|
63
|
+
# Downcases the text.
|
64
|
+
#
|
65
|
+
def downcase
|
66
|
+
@text.downcase!
|
67
|
+
end
|
68
|
+
|
59
69
|
# Partial is a conditional setter.
|
60
70
|
#
|
61
71
|
# It is only settable if it hasn't been set yet.
|
@@ -69,15 +79,19 @@ module Internals
|
|
69
79
|
|
70
80
|
# If the text ends with *, partialize it. If with ", don't.
|
71
81
|
#
|
82
|
+
# The latter wins. So "hello*" will not be partially searched.
|
83
|
+
#
|
72
84
|
@@no_partial = /\"\Z/
|
73
85
|
@@partial = /\*\Z/
|
74
86
|
def partialize
|
75
|
-
self.partial = false and return
|
76
|
-
self.partial = true
|
87
|
+
self.partial = false and return unless @text !~ @@no_partial
|
88
|
+
self.partial = true unless @text !~ @@partial
|
77
89
|
end
|
78
90
|
|
79
91
|
# If the text ends with ~ similarize it. If with ", don't.
|
80
92
|
#
|
93
|
+
# The latter wins.
|
94
|
+
#
|
81
95
|
@@no_similar = /\"\Z/
|
82
96
|
@@similar = /\~\Z/
|
83
97
|
def similarize
|
@@ -96,21 +110,10 @@ module Internals
|
|
96
110
|
@text.gsub! @@illegals, '' unless @text.blank?
|
97
111
|
end
|
98
112
|
|
99
|
-
# Visitor for tokenizer.
|
100
113
|
#
|
101
|
-
# TODO Rewrite!!!
|
102
114
|
#
|
103
|
-
def
|
104
|
-
@text =
|
105
|
-
end
|
106
|
-
# TODO spec!
|
107
|
-
#
|
108
|
-
# TODO Rewrite!!
|
109
|
-
#
|
110
|
-
def tokenized tokenizer
|
111
|
-
tokenizer.tokenize(@text.to_s).each do |text|
|
112
|
-
yield text
|
113
|
-
end
|
115
|
+
def symbolize
|
116
|
+
@text = @text.to_sym
|
114
117
|
end
|
115
118
|
|
116
119
|
# Returns an array of possible combinations.
|
@@ -181,6 +184,12 @@ module Internals
|
|
181
184
|
"#{similar?? :similarity : :index}:#{@text}"
|
182
185
|
end
|
183
186
|
|
187
|
+
# If the originals & the text are the same, they are the same.
|
188
|
+
#
|
189
|
+
def == other
|
190
|
+
self.original == other.original && self.text == other.text
|
191
|
+
end
|
192
|
+
|
184
193
|
# Displays the qualifier text and the text, joined.
|
185
194
|
#
|
186
195
|
# e.g. name:meier
|
@@ -1,31 +1,36 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
3
|
module Internals
|
4
|
-
|
4
|
+
|
5
5
|
#
|
6
6
|
#
|
7
7
|
module Query
|
8
|
-
|
8
|
+
|
9
9
|
# This class primarily handles switching through similar token constellations.
|
10
10
|
#
|
11
11
|
class Tokens # :nodoc:all
|
12
|
-
|
12
|
+
|
13
13
|
# Basically delegates to its internal tokens array.
|
14
14
|
#
|
15
15
|
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
16
|
-
|
16
|
+
|
17
17
|
#
|
18
18
|
#
|
19
19
|
def initialize tokens = []
|
20
20
|
@tokens = tokens
|
21
21
|
end
|
22
|
-
|
22
|
+
def self.processed words, downcase = true
|
23
|
+
new words.collect! { |word| Token.processed word, downcase }
|
24
|
+
end
|
25
|
+
|
26
|
+
# Tokenizes each token.
|
23
27
|
#
|
28
|
+
# Note: Passed tokenizer needs to offer #normalize(text).
|
24
29
|
#
|
25
30
|
def tokenize_with tokenizer
|
26
31
|
@tokens.each { |token| token.tokenize_with(tokenizer) }
|
27
32
|
end
|
28
|
-
|
33
|
+
|
29
34
|
# Generates an array in the form of
|
30
35
|
# [
|
31
36
|
# [combination], # of token 1
|
@@ -33,14 +38,17 @@ module Internals
|
|
33
38
|
# [combination, combination] # of token 3
|
34
39
|
# ]
|
35
40
|
#
|
36
|
-
# TODO If we want token behaviour defined per Query, we can
|
37
|
-
# compact! here
|
38
|
-
#
|
39
41
|
def possible_combinations_in type
|
40
42
|
@tokens.inject([]) do |combinations, token|
|
41
|
-
|
43
|
+
possible_combinations = token.possible_combinations_in type
|
44
|
+
|
45
|
+
# Note: Optimization for ignoring tokens that allocate to nothing and
|
46
|
+
# can be ignored.
|
47
|
+
# For example in a special search, where "florian" is not
|
48
|
+
# mapped to any category.
|
49
|
+
#
|
50
|
+
possible_combinations ? combinations << possible_combinations : combinations
|
42
51
|
end
|
43
|
-
# TODO compact! if ignore_unassigned_tokens
|
44
52
|
end
|
45
53
|
|
46
54
|
# Makes the last of the tokens partial.
|
@@ -57,33 +65,37 @@ module Internals
|
|
57
65
|
def cap? maximum
|
58
66
|
@tokens.size > maximum
|
59
67
|
end
|
60
|
-
|
68
|
+
|
61
69
|
# Rejects blank tokens.
|
62
70
|
#
|
63
71
|
def reject
|
64
72
|
@tokens.reject! &:blank?
|
65
73
|
end
|
66
|
-
|
74
|
+
|
67
75
|
# Returns a solr query.
|
68
76
|
#
|
69
77
|
def to_solr_query
|
70
78
|
@tokens.map(&:to_solr).join ' '
|
71
79
|
end
|
72
|
-
|
80
|
+
|
73
81
|
#
|
74
82
|
#
|
75
83
|
def originals
|
76
84
|
@tokens.map(&:original)
|
77
85
|
end
|
78
|
-
|
86
|
+
|
87
|
+
def == other
|
88
|
+
self.tokens == other.tokens
|
89
|
+
end
|
90
|
+
|
79
91
|
# Just join the token original texts.
|
80
92
|
#
|
81
93
|
def to_s
|
82
94
|
originals.join ' '
|
83
95
|
end
|
84
|
-
|
96
|
+
|
85
97
|
end
|
86
|
-
|
98
|
+
|
87
99
|
end
|
88
|
-
|
100
|
+
|
89
101
|
end
|
@@ -3,19 +3,19 @@ module Query
|
|
3
3
|
# Calculates weights for certain combinations.
|
4
4
|
#
|
5
5
|
class Weights # :nodoc:all
|
6
|
-
|
6
|
+
|
7
7
|
#
|
8
8
|
#
|
9
9
|
def initialize weights = {}
|
10
10
|
@weights = weights
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
# Get the weight of an allocation.
|
14
14
|
#
|
15
15
|
def weight_for clustered
|
16
16
|
@weights[clustered] || 0
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
19
|
# Returns an energy term E for allocation. this turns into a probability
|
20
20
|
# by P(allocation) = 1/Z * exp (-1/T * E(allocation)),
|
21
21
|
# where Z is the normalizing partition function
|
@@ -31,24 +31,26 @@ module Query
|
|
31
31
|
# Note: Cache this if more complicated weighings become necessary.
|
32
32
|
#
|
33
33
|
def score combinations
|
34
|
-
# TODO Or hide: combinations#to_weights_key
|
34
|
+
# TODO Or hide: combinations#to_weights_key (but it's an array, so…)
|
35
35
|
#
|
36
36
|
# TODO combinations could cluster uniq as combinations are added (since combinations don't change).
|
37
37
|
#
|
38
|
+
# TODO Or it could use actual combinations? Could it? Or make combinations comparable to Symbols.
|
39
|
+
#
|
38
40
|
weight_for combinations.map(&:category_name).clustered_uniq_fast
|
39
41
|
end
|
40
|
-
|
42
|
+
|
41
43
|
# Are there any weights defined?
|
42
44
|
#
|
43
45
|
def empty?
|
44
46
|
@weights.empty?
|
45
47
|
end
|
46
|
-
|
48
|
+
|
47
49
|
# Prints out a nice representation of the configured weights.
|
48
50
|
#
|
49
51
|
def to_s
|
50
52
|
@weights.to_s
|
51
53
|
end
|
52
|
-
|
54
|
+
|
53
55
|
end
|
54
56
|
end
|
@@ -20,6 +20,7 @@ Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@r
|
|
20
20
|
Normalizes words: #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
|
21
21
|
Rejects tokens? #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
|
22
22
|
Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-' }
|
23
|
+
Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
23
24
|
TOKENIZER
|
24
25
|
end
|
25
26
|
|
@@ -125,6 +126,13 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
|
|
125
126
|
tokens.reject! &@reject_condition
|
126
127
|
end
|
127
128
|
|
129
|
+
def case_sensitive case_sensitive
|
130
|
+
@case_sensitive = case_sensitive
|
131
|
+
end
|
132
|
+
def downcase?
|
133
|
+
!@case_sensitive
|
134
|
+
end
|
135
|
+
|
128
136
|
# Checks if the right argument type has been given.
|
129
137
|
#
|
130
138
|
def check_argument_in method, type, argument, &condition
|
@@ -156,6 +164,7 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
|
|
156
164
|
normalizes_words options[:normalizes_words] if options[:normalizes_words]
|
157
165
|
removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
|
158
166
|
substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
|
167
|
+
case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil?
|
159
168
|
|
160
169
|
# Defaults.
|
161
170
|
#
|
@@ -163,37 +172,54 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
|
|
163
172
|
reject_token_if &(options[:reject_token_if] || :blank?)
|
164
173
|
end
|
165
174
|
|
166
|
-
#
|
175
|
+
# Default preprocessing hook.
|
167
176
|
#
|
168
|
-
|
169
|
-
#
|
177
|
+
# Does:
|
178
|
+
# 1. Character substitution.
|
179
|
+
# 2. Remove illegal expressions.
|
180
|
+
# 3. Remove non-single stopwords. (Stopwords that occur with other words)
|
170
181
|
#
|
171
|
-
def preprocess text
|
182
|
+
def preprocess text
|
183
|
+
text = substitute_characters text
|
184
|
+
remove_illegals text
|
185
|
+
# We do not remove single stopwords e.g. in the indexer for
|
186
|
+
# an entirely different reason than in the query tokenizer.
|
187
|
+
# An indexed thing with just name "UND" (a possible stopword)
|
188
|
+
# should not lose its name.
|
189
|
+
#
|
190
|
+
remove_non_single_stopwords text
|
191
|
+
text
|
192
|
+
end
|
172
193
|
# Pretokenizing.
|
173
194
|
#
|
174
|
-
|
175
|
-
#
|
195
|
+
# Does:
|
196
|
+
# 1. Split the text into words.
|
197
|
+
# 2. Normalize each word.
|
198
|
+
#
|
199
|
+
def pretokenize text
|
200
|
+
words = split text
|
201
|
+
words.collect! do |word|
|
202
|
+
normalize_with_patterns word
|
203
|
+
word
|
204
|
+
end
|
205
|
+
end
|
206
|
+
# Basic postprocessing (overridden in both query/index tokenizers).
|
176
207
|
#
|
177
208
|
def process tokens
|
178
209
|
reject tokens # Reject any tokens that don't meet criteria
|
179
210
|
tokens
|
180
211
|
end
|
181
212
|
|
182
|
-
# Converts words into real tokens.
|
183
|
-
#
|
184
|
-
def tokens_for words
|
185
|
-
|
186
|
-
end
|
213
|
+
# # Converts words into real tokens.
|
214
|
+
# #
|
215
|
+
# def tokens_for words
|
216
|
+
# Internals::Query::Tokens.new words.collect! { |word| token_for word }
|
217
|
+
# end
|
187
218
|
# Turns non-blank text into symbols.
|
188
219
|
#
|
189
220
|
def symbolize text
|
190
221
|
text.blank? ? nil : text.to_sym
|
191
222
|
end
|
192
|
-
# Returns a tokens object.
|
193
|
-
#
|
194
|
-
def empty_tokens
|
195
|
-
Internals::Query::Tokens.new
|
196
|
-
end
|
197
223
|
|
198
224
|
end
|
199
225
|
|
@@ -15,45 +15,16 @@ module Internals
|
|
15
15
|
@default ||= new
|
16
16
|
end
|
17
17
|
|
18
|
-
# Default indexing preprocessing hook.
|
19
|
-
#
|
20
|
-
# Does:
|
21
|
-
# 1. Character substitution.
|
22
|
-
# 2. Downcasing.
|
23
|
-
# 3. Remove illegal expressions.
|
24
|
-
# 4. Remove non-single stopwords. (Stopwords that occur with other words)
|
25
|
-
#
|
26
|
-
def preprocess text
|
27
|
-
text = substitute_characters text
|
28
|
-
text.downcase!
|
29
|
-
remove_illegals text
|
30
|
-
# we do not remove single stopwords for an entirely different
|
31
|
-
# reason than in the query tokenizer.
|
32
|
-
# An indexed thing with just name "UND" (a possible stopword) should not lose its name.
|
33
|
-
#
|
34
|
-
remove_non_single_stopwords text
|
35
|
-
text
|
36
|
-
end
|
37
|
-
|
38
|
-
# Default indexing pretokenizing hook.
|
39
|
-
#
|
40
|
-
# Does:
|
41
|
-
# 1. Split the text into words.
|
42
|
-
# 2. Normalize each word.
|
43
|
-
#
|
44
|
-
def pretokenize text
|
45
|
-
words = split text
|
46
|
-
words.collect! do |word|
|
47
|
-
normalize_with_patterns word
|
48
|
-
word
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
18
|
# Does not actually return a token, but a
|
53
19
|
# symbol "token".
|
54
20
|
#
|
55
|
-
def
|
56
|
-
|
21
|
+
def tokens_for words
|
22
|
+
words.collect! { |word| word.downcase! if downcase?; word.to_sym }
|
23
|
+
end
|
24
|
+
# Returns empty tokens.
|
25
|
+
#
|
26
|
+
def empty_tokens
|
27
|
+
[]
|
57
28
|
end
|
58
29
|
|
59
30
|
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module Internals
|
4
4
|
|
5
5
|
module Tokenizers
|
6
|
-
|
6
|
+
|
7
7
|
# There are a few class methods that you can use to configure how a query works.
|
8
8
|
#
|
9
9
|
# removes_characters regexp
|
@@ -14,66 +14,46 @@ module Internals
|
|
14
14
|
# normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
|
15
15
|
#
|
16
16
|
class Query < Base
|
17
|
-
|
17
|
+
|
18
18
|
def self.default= new_default
|
19
19
|
@default = new_default
|
20
20
|
end
|
21
21
|
def self.default
|
22
22
|
@default ||= new
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
attr_reader :maximum_tokens
|
26
|
-
|
26
|
+
|
27
27
|
def initialize options = {}
|
28
28
|
super options
|
29
29
|
@maximum_tokens = options[:maximum_tokens] || 5
|
30
30
|
end
|
31
|
-
|
32
|
-
def preprocess text
|
33
|
-
remove_illegals text # Remove illegal characters
|
34
|
-
remove_non_single_stopwords text # remove stop words
|
35
|
-
text
|
36
|
-
end
|
37
|
-
|
38
|
-
# Split the text and put some back together.
|
39
|
-
#
|
40
|
-
# TODO Make the same as in indexing?
|
41
|
-
#
|
42
|
-
def pretokenize text
|
43
|
-
split text
|
44
|
-
end
|
45
|
-
|
31
|
+
|
46
32
|
# Let each token process itself.
|
47
33
|
# Reject, limit, and partialize tokens.
|
48
34
|
#
|
35
|
+
# In querying we work with real tokens (in indexing it's just symbols).
|
36
|
+
#
|
49
37
|
def process tokens
|
50
|
-
tokens.
|
51
|
-
tokens.
|
52
|
-
tokens.
|
53
|
-
tokens.partialize_last # Set certain tokens as partial
|
38
|
+
tokens.reject # Reject any tokens that don't meet criteria.
|
39
|
+
tokens.cap maximum_tokens # Cut off superfluous tokens.
|
40
|
+
tokens.partialize_last # Set certain tokens as partial.
|
54
41
|
tokens
|
55
42
|
end
|
56
|
-
|
57
|
-
#
|
58
|
-
#
|
59
|
-
# TODO Perhaps move to Normalizer?
|
43
|
+
|
44
|
+
# Converts words into real tokens.
|
60
45
|
#
|
61
|
-
def
|
62
|
-
|
63
|
-
text.downcase! # Downcase all text
|
64
|
-
normalize_with_patterns text # normalize
|
65
|
-
text.to_sym # symbolize
|
46
|
+
def tokens_for words
|
47
|
+
Internals::Query::Tokens.processed words, downcase?
|
66
48
|
end
|
67
|
-
|
68
|
-
# Returns a token for a word.
|
69
|
-
# The basic query tokenizer uses new tokens.
|
49
|
+
# Returns a tokens object.
|
70
50
|
#
|
71
|
-
def
|
72
|
-
Internals::Query::
|
51
|
+
def empty_tokens
|
52
|
+
Internals::Query::Tokens.new
|
73
53
|
end
|
74
|
-
|
54
|
+
|
75
55
|
end
|
76
|
-
|
56
|
+
|
77
57
|
end
|
78
|
-
|
58
|
+
|
79
59
|
end
|
data/lib/picky/loader.rb
CHANGED
data/lib/picky/search.rb
CHANGED
data/lib/tasks/server.rake
CHANGED
@@ -1,17 +1,9 @@
|
|
1
|
-
#
|
1
|
+
# Server tasks, like starting/stopping/restarting.
|
2
2
|
#
|
3
3
|
namespace :server do
|
4
|
-
|
5
|
-
def chdir_to_root
|
6
|
-
Dir.chdir PICKY_ROOT
|
7
|
-
end
|
8
|
-
|
9
|
-
def current_pid
|
10
|
-
pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
|
11
|
-
pid.blank? ? nil : pid.chomp
|
12
|
-
end
|
13
|
-
|
4
|
+
|
14
5
|
# desc "Start the unicorns. (Wehee!)"
|
6
|
+
#
|
15
7
|
task :start => :framework do
|
16
8
|
chdir_to_root
|
17
9
|
daemonize = PICKY_ENVIRONMENT == 'production' ? '-D' : ''
|
@@ -19,17 +11,27 @@ namespace :server do
|
|
19
11
|
puts "Running \`#{command}\`."
|
20
12
|
exec command
|
21
13
|
end
|
22
|
-
|
14
|
+
|
23
15
|
# desc "Stop the unicorns. (Blam!)"
|
16
|
+
#
|
24
17
|
task :stop => :framework do
|
25
18
|
`kill -QUIT #{current_pid}` if current_pid
|
26
19
|
end
|
27
|
-
|
20
|
+
|
28
21
|
# desc "Restart the unicorns."
|
29
22
|
task :restart do
|
30
23
|
Rake::Task[:"server:stop"].invoke
|
31
24
|
sleep 5
|
32
25
|
Rake::Task[:"server:start"].invoke
|
33
26
|
end
|
34
|
-
|
27
|
+
|
28
|
+
def chdir_to_root
|
29
|
+
Dir.chdir PICKY_ROOT
|
30
|
+
end
|
31
|
+
|
32
|
+
def current_pid
|
33
|
+
pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
|
34
|
+
pid.blank? ? nil : pid.chomp
|
35
|
+
end
|
36
|
+
|
35
37
|
end
|
data/lib/tasks/todo.rake
CHANGED
@@ -273,14 +273,14 @@ describe Internals::FrontendAdapters::Rack do
|
|
273
273
|
end
|
274
274
|
context 'without app' do
|
275
275
|
context 'with url' do
|
276
|
-
it 'should use the
|
276
|
+
it 'should use the 200 with default_options from the url' do
|
277
277
|
@routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET", :path_info => /some_url/ }
|
278
278
|
|
279
279
|
@rack_adapter.answer 'some_url'
|
280
280
|
end
|
281
281
|
end
|
282
282
|
context 'without url' do
|
283
|
-
it 'should use the
|
283
|
+
it 'should use the 200 with default_options' do
|
284
284
|
@routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET" }
|
285
285
|
|
286
286
|
@rack_adapter.answer
|
@@ -5,7 +5,7 @@ require 'spec_helper'
|
|
5
5
|
describe Internals::Tokenizers::Base do
|
6
6
|
|
7
7
|
context 'with special instance' do
|
8
|
-
let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello } }
|
8
|
+
let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }, case_sensitive: true }
|
9
9
|
it 'rejects tokens with length < 2' do
|
10
10
|
tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
|
11
11
|
end
|
@@ -13,7 +13,7 @@ describe Internals::Tokenizers::Base do
|
|
13
13
|
tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
|
14
14
|
end
|
15
15
|
describe 'to_s' do
|
16
|
-
it '
|
16
|
+
it 'spits out the right text' do
|
17
17
|
tokenizer.to_s.should == <<-EXPECTED
|
18
18
|
Removes characters: -
|
19
19
|
Stopwords: -
|
@@ -22,6 +22,7 @@ Removes chars after split: -
|
|
22
22
|
Normalizes words: -
|
23
23
|
Rejects tokens? Yes, see line 8 in app/application.rb
|
24
24
|
Substitutes chars? -
|
25
|
+
Case sensitive? Yes.
|
25
26
|
EXPECTED
|
26
27
|
end
|
27
28
|
end
|
@@ -31,7 +32,7 @@ EXPECTED
|
|
31
32
|
let(:tokenizer) { described_class.new }
|
32
33
|
|
33
34
|
describe 'to_s' do
|
34
|
-
it '
|
35
|
+
it 'spits out the right text' do
|
35
36
|
tokenizer.to_s.should == <<-EXPECTED
|
36
37
|
Removes characters: -
|
37
38
|
Stopwords: -
|
@@ -40,6 +41,7 @@ Removes chars after split: -
|
|
40
41
|
Normalizes words: -
|
41
42
|
Rejects tokens? -
|
42
43
|
Substitutes chars? -
|
44
|
+
Case sensitive? -
|
43
45
|
EXPECTED
|
44
46
|
end
|
45
47
|
end
|
@@ -41,6 +41,7 @@ describe Internals::Tokenizers::Query do
|
|
41
41
|
it 'should call methods in order' do
|
42
42
|
text = stub :text
|
43
43
|
|
44
|
+
tokenizer.should_receive(:substitute_characters).once.with(text).and_return text
|
44
45
|
tokenizer.should_receive(:remove_illegals).once.ordered.with text
|
45
46
|
tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
|
46
47
|
|
@@ -57,13 +58,7 @@ describe Internals::Tokenizers::Query do
|
|
57
58
|
before(:each) do
|
58
59
|
@tokens = mock :tokens, :null_object => true
|
59
60
|
end
|
60
|
-
it 'should tokenize the tokens' do
|
61
|
-
@tokens.should_receive(:tokenize_with).once.with tokenizer
|
62
|
-
|
63
|
-
tokenizer.process @tokens
|
64
|
-
end
|
65
61
|
it 'should call methods on the tokens in order' do
|
66
|
-
@tokens.should_receive(:tokenize_with).once.ordered
|
67
62
|
@tokens.should_receive(:reject).once.ordered
|
68
63
|
@tokens.should_receive(:cap).once.ordered
|
69
64
|
@tokens.should_receive(:partialize_last).once.ordered
|
@@ -122,13 +117,5 @@ describe Internals::Tokenizers::Query do
|
|
122
117
|
tokenizer.tokenize('').map(&:to_s).should == []
|
123
118
|
end
|
124
119
|
end
|
125
|
-
describe "token_for" do
|
126
|
-
it "should get a preprocessed token" do
|
127
|
-
text = stub(:text)
|
128
|
-
Internals::Query::Token.should_receive(:processed).with text
|
129
|
-
|
130
|
-
tokenizer.token_for text
|
131
|
-
end
|
132
|
-
end
|
133
120
|
|
134
121
|
end
|
@@ -64,7 +64,7 @@ describe 'Query::Combination' do
|
|
64
64
|
|
65
65
|
describe 'ids' do
|
66
66
|
it 'should call ids with the text on bundle' do
|
67
|
-
@bundle.should_receive(:ids).once.with
|
67
|
+
@bundle.should_receive(:ids).once.with :some_text
|
68
68
|
|
69
69
|
@combination.ids
|
70
70
|
end
|
@@ -80,7 +80,7 @@ describe 'Query::Combination' do
|
|
80
80
|
|
81
81
|
describe 'weight' do
|
82
82
|
it 'should call weight with the text on bundle' do
|
83
|
-
@bundle.should_receive(:weight).once.with
|
83
|
+
@bundle.should_receive(:weight).once.with :some_text
|
84
84
|
|
85
85
|
@combination.weight
|
86
86
|
end
|
@@ -44,17 +44,17 @@ describe Internals::Query::Indexes do
|
|
44
44
|
it 'can handle empty combinations' do
|
45
45
|
combinations = [[1,2,3], [:a, :b, :c], []]
|
46
46
|
|
47
|
-
indexes.expand_combinations_from(combinations).should ==
|
47
|
+
indexes.expand_combinations_from(combinations).should == []
|
48
48
|
end
|
49
49
|
it 'can handle empty combinations' do
|
50
50
|
combinations = [[], [:a, :b, :c], []]
|
51
51
|
|
52
|
-
indexes.expand_combinations_from(combinations).should ==
|
52
|
+
indexes.expand_combinations_from(combinations).should == []
|
53
53
|
end
|
54
54
|
it 'can handle totally empty combinations' do
|
55
55
|
combinations = [[], [], []]
|
56
56
|
|
57
|
-
indexes.expand_combinations_from(combinations).should ==
|
57
|
+
indexes.expand_combinations_from(combinations).should == []
|
58
58
|
end
|
59
59
|
it 'is fast in a complicated case' do
|
60
60
|
combinations = [[1,2,3], [:a, :b, :c], [:k, :l]]
|
@@ -64,12 +64,12 @@ describe Internals::Query::Indexes do
|
|
64
64
|
it 'is fast in a simple case' do
|
65
65
|
combinations = [[1], [2], [3]]
|
66
66
|
|
67
|
-
performance_of { indexes.expand_combinations_from(combinations) }.should < 0.
|
67
|
+
performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0006
|
68
68
|
end
|
69
69
|
it 'is very fast in a 1-empty case' do
|
70
70
|
combinations = [[], [2], [3]]
|
71
71
|
|
72
|
-
performance_of { indexes.expand_combinations_from(combinations) }.should < 0.
|
72
|
+
performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0005
|
73
73
|
end
|
74
74
|
it 'is very fast in a all-empty case' do
|
75
75
|
combinations = [[], [], []]
|
@@ -8,6 +8,15 @@ describe Internals::Query::Token do
|
|
8
8
|
Internals::Query::Qualifiers.instance.prepare
|
9
9
|
end
|
10
10
|
|
11
|
+
describe '==' do
|
12
|
+
it 'is equal if the originals are equal' do
|
13
|
+
described_class.processed('similar~').should == described_class.processed('similar~')
|
14
|
+
end
|
15
|
+
it 'is not equal if the originals are not equal' do
|
16
|
+
described_class.processed('similar~').should_not == described_class.processed('similar')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
11
20
|
describe 'next_similar_token' do
|
12
21
|
before(:each) do
|
13
22
|
@bundle = stub :bundle, :similar => [:array, :of, :similar]
|
@@ -157,20 +166,29 @@ describe Internals::Query::Token do
|
|
157
166
|
end
|
158
167
|
|
159
168
|
describe 'processed' do
|
169
|
+
it 'should return a new token' do
|
170
|
+
described_class.processed('some text').should be_kind_of(described_class)
|
171
|
+
end
|
172
|
+
it 'generates a token' do
|
173
|
+
described_class.processed('some text').class.should == described_class
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
describe 'process' do
|
178
|
+
let(:token) { described_class.new 'any_text' }
|
179
|
+
it 'returns itself' do
|
180
|
+
token.process.should == token
|
181
|
+
end
|
160
182
|
it 'should have an order' do
|
161
|
-
token = stub :token
|
162
|
-
described_class.should_receive(:new).once.and_return token
|
163
|
-
|
164
183
|
token.should_receive(:qualify).once.ordered
|
165
184
|
token.should_receive(:extract_original).once.ordered
|
185
|
+
token.should_receive(:downcase).once.ordered
|
166
186
|
token.should_receive(:partialize).once.ordered
|
167
187
|
token.should_receive(:similarize).once.ordered
|
168
188
|
token.should_receive(:remove_illegals).once.ordered
|
189
|
+
token.should_receive(:symbolize).once.ordered
|
169
190
|
|
170
|
-
|
171
|
-
end
|
172
|
-
it 'should return a new token' do
|
173
|
-
described_class.processed('some text').should be_kind_of(described_class)
|
191
|
+
token.process
|
174
192
|
end
|
175
193
|
end
|
176
194
|
|
@@ -352,6 +370,13 @@ describe Internals::Query::Token do
|
|
352
370
|
before(:each) do
|
353
371
|
@token = described_class.processed 'text*'
|
354
372
|
end
|
373
|
+
it 'should not set partial' do
|
374
|
+
@token.instance_variable_set :@partial, false
|
375
|
+
|
376
|
+
@token.partial = true
|
377
|
+
|
378
|
+
@token.instance_variable_get(:@partial).should be_false
|
379
|
+
end
|
355
380
|
it 'should not set partial' do
|
356
381
|
@token.partial = false
|
357
382
|
|
@@ -382,20 +407,20 @@ describe Internals::Query::Token do
|
|
382
407
|
it 'should remove *' do
|
383
408
|
token = described_class.processed 'text*'
|
384
409
|
|
385
|
-
token.text.should ==
|
410
|
+
token.text.should == :text
|
386
411
|
end
|
387
412
|
it 'should remove ~' do
|
388
413
|
token = described_class.processed 'text~'
|
389
414
|
|
390
|
-
token.text.should ==
|
415
|
+
token.text.should == :text
|
391
416
|
end
|
392
417
|
it 'should remove "' do
|
393
418
|
token = described_class.processed 'text"'
|
394
419
|
|
395
|
-
token.text.should ==
|
420
|
+
token.text.should == :text
|
396
421
|
end
|
397
422
|
it "should pass on a processed text" do
|
398
|
-
described_class.processed('text').text.should ==
|
423
|
+
described_class.processed('text').text.should == :text
|
399
424
|
end
|
400
425
|
end
|
401
426
|
|
@@ -7,6 +7,35 @@ describe Internals::Query::Tokens do
|
|
7
7
|
Internals::Query::Qualifiers.instance.prepare
|
8
8
|
end
|
9
9
|
|
10
|
+
describe '.processed' do
|
11
|
+
it 'generates processed tokens from all words' do
|
12
|
+
expected = [
|
13
|
+
Internals::Query::Token.processed('this~'),
|
14
|
+
Internals::Query::Token.processed('is'),
|
15
|
+
Internals::Query::Token.processed('a'),
|
16
|
+
Internals::Query::Token.processed('sp:solr'),
|
17
|
+
Internals::Query::Token.processed('query"')
|
18
|
+
]
|
19
|
+
|
20
|
+
described_class.should_receive(:new).once.with expected
|
21
|
+
|
22
|
+
described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
|
23
|
+
end
|
24
|
+
it 'generates processed tokens from all words' do
|
25
|
+
expected = [
|
26
|
+
Internals::Query::Token.processed('this~', false),
|
27
|
+
Internals::Query::Token.processed('is', false),
|
28
|
+
Internals::Query::Token.processed('a', false),
|
29
|
+
Internals::Query::Token.processed('sp:solr', false),
|
30
|
+
Internals::Query::Token.processed('query"', false)
|
31
|
+
]
|
32
|
+
|
33
|
+
described_class.should_receive(:new).once.with expected
|
34
|
+
|
35
|
+
described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
10
39
|
describe 'to_solr_query' do
|
11
40
|
context 'many tokens' do
|
12
41
|
before(:each) do
|
@@ -151,6 +180,16 @@ describe Internals::Query::Tokens do
|
|
151
180
|
[:combination31, :combination32, :combination33]
|
152
181
|
]
|
153
182
|
end
|
183
|
+
it 'should work correctly' do
|
184
|
+
@token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
|
185
|
+
@token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return nil
|
186
|
+
@token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
|
187
|
+
|
188
|
+
@tokens.possible_combinations_in(:some_index).should == [
|
189
|
+
[:combination11, :combination12],
|
190
|
+
[:combination31, :combination32, :combination33]
|
191
|
+
]
|
192
|
+
end
|
154
193
|
end
|
155
194
|
|
156
195
|
describe 'to_s' do
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.
|
5
|
+
version: 2.1.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Florian Hanke
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-04-07 00:00:00 +10:00
|
14
14
|
default_executable: picky
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|