picky 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/application.rb +0 -2
- data/lib/picky/internals/configuration/index.rb +11 -17
- data/lib/picky/internals/index/redis/string_hash.rb +1 -1
- data/lib/picky/internals/indexed/categories.rb +7 -3
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +9 -9
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +12 -12
- data/lib/picky/internals/query/allocations.rb +3 -5
- data/lib/picky/internals/query/combinations/memory.rb +8 -8
- data/lib/picky/internals/query/indexes.rb +6 -14
- data/lib/picky/internals/query/token.rb +32 -23
- data/lib/picky/internals/query/tokens.rb +30 -18
- data/lib/picky/internals/query/weights.rb +9 -7
- data/lib/picky/internals/tokenizers/base.rb +42 -16
- data/lib/picky/internals/tokenizers/index.rb +7 -36
- data/lib/picky/internals/tokenizers/query.rb +20 -40
- data/lib/picky/loader.rb +0 -2
- data/lib/picky/search.rb +1 -1
- data/lib/tasks/server.rake +16 -14
- data/lib/tasks/todo.rake +1 -1
- data/spec/lib/internals/frontend_adapters/rack_spec.rb +2 -2
- data/spec/lib/internals/tokenizers/base_spec.rb +5 -3
- data/spec/lib/internals/tokenizers/query_spec.rb +1 -14
- data/spec/lib/query/combination_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +5 -5
- data/spec/lib/query/token_spec.rb +36 -11
- data/spec/lib/query/tokens_spec.rb +39 -0
- metadata +2 -2
data/lib/picky/application.rb
CHANGED
@@ -1,32 +1,30 @@
|
|
1
1
|
module Configuration # :nodoc:all
|
2
|
-
|
2
|
+
|
3
3
|
# Holds the configuration for a
|
4
4
|
# index/category combination.
|
5
5
|
#
|
6
|
-
# TODO Rename paths?
|
7
|
-
#
|
8
6
|
class Index
|
9
|
-
|
7
|
+
|
10
8
|
attr_reader :index, :category
|
11
|
-
|
9
|
+
|
12
10
|
def initialize index, category
|
13
11
|
@index = index
|
14
12
|
@category = category
|
15
13
|
end
|
16
|
-
|
14
|
+
|
17
15
|
def index_name
|
18
16
|
@index_name ||= index.name
|
19
17
|
end
|
20
18
|
def category_name
|
21
19
|
@category_name ||= category.name
|
22
20
|
end
|
23
|
-
|
21
|
+
|
24
22
|
#
|
25
23
|
#
|
26
24
|
def index_path bundle_name, name
|
27
25
|
"#{index_directory}/#{category_name}_#{bundle_name}_#{name}"
|
28
26
|
end
|
29
|
-
|
27
|
+
|
30
28
|
# Was: search_index_file_name
|
31
29
|
#
|
32
30
|
def prepared_index_path
|
@@ -36,21 +34,17 @@ module Configuration # :nodoc:all
|
|
36
34
|
@prepared_index_file ||= Internals::Index::File::Text.new prepared_index_path
|
37
35
|
@prepared_index_file.open_for_indexing &block
|
38
36
|
end
|
39
|
-
|
40
|
-
# def file_name
|
41
|
-
# @file_name ||= "#{@index_name}_#{@category_name}"
|
42
|
-
# end
|
43
|
-
|
37
|
+
|
44
38
|
# Identifier for internal use.
|
45
39
|
#
|
46
40
|
def identifier
|
47
41
|
@identifier ||= "#{index_name}:#{category_name}"
|
48
42
|
end
|
49
|
-
|
43
|
+
|
50
44
|
def to_s
|
51
45
|
"#{index_name} #{category_name}"
|
52
46
|
end
|
53
|
-
|
47
|
+
|
54
48
|
def self.index_root
|
55
49
|
@index_root ||= "#{PICKY_ROOT}/index"
|
56
50
|
end
|
@@ -67,7 +61,7 @@ module Configuration # :nodoc:all
|
|
67
61
|
def prepare_index_directory
|
68
62
|
FileUtils.mkdir_p index_directory
|
69
63
|
end
|
70
|
-
|
64
|
+
|
71
65
|
end
|
72
|
-
|
66
|
+
|
73
67
|
end
|
@@ -67,7 +67,7 @@ module Internals
|
|
67
67
|
# for each found similar token.
|
68
68
|
#
|
69
69
|
def similar_possible_for token
|
70
|
-
# Get as many
|
70
|
+
# Get as many tokens as necessary
|
71
71
|
#
|
72
72
|
tokens = similar_tokens_for token
|
73
73
|
# possible combinations
|
@@ -105,9 +105,13 @@ module Internals
|
|
105
105
|
# (Also none of the categories matched, but the ignore unassigned
|
106
106
|
# tokens option is true)
|
107
107
|
#
|
108
|
+
# TODO Could use Combinations class here and remove the inject.
|
109
|
+
#
|
108
110
|
def possible_for token, preselected_categories = nil
|
109
|
-
possible = (preselected_categories || possible_categories(token)).
|
110
|
-
|
111
|
+
possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
|
112
|
+
combination = category.combination_for token
|
113
|
+
combination ? combinations << combination : combinations
|
114
|
+
end
|
111
115
|
# This is an optimization to mark tokens that are ignored.
|
112
116
|
#
|
113
117
|
return if ignore_unassigned_tokens && possible.empty?
|
@@ -1,35 +1,35 @@
|
|
1
1
|
module Indexed
|
2
2
|
module Wrappers
|
3
|
-
|
3
|
+
|
4
4
|
module Bundle
|
5
|
-
|
5
|
+
|
6
6
|
# A calculation rewrites the symbol into a float.
|
7
7
|
#
|
8
|
-
# TODO I really need to allow integers as keys. The code below is just not
|
8
|
+
# TODO I really need to allow integers as keys. The code below is just not up to the needed quality.
|
9
9
|
#
|
10
10
|
class Calculation < Wrapper
|
11
|
-
|
11
|
+
|
12
12
|
#
|
13
13
|
#
|
14
14
|
def recalculate float
|
15
15
|
float
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
#
|
19
19
|
#
|
20
20
|
def ids sym
|
21
21
|
@bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
#
|
25
25
|
#
|
26
26
|
def weight sym
|
27
27
|
@bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
end
|
35
35
|
end
|
@@ -3,18 +3,18 @@ module Internals
|
|
3
3
|
# encoding: utf-8
|
4
4
|
#
|
5
5
|
module Indexed
|
6
|
-
|
6
|
+
|
7
7
|
# TODO Spec
|
8
8
|
#
|
9
9
|
module Wrappers
|
10
|
-
|
10
|
+
|
11
11
|
# This index combines an exact and partial index.
|
12
12
|
# It serves to order the results such that exact hits are found first.
|
13
13
|
#
|
14
14
|
# TODO Need to use the right subtokens. Bake in?
|
15
15
|
#
|
16
|
-
class ExactFirst < Indexed::Bundle::
|
17
|
-
|
16
|
+
class ExactFirst < Indexed::Bundle::Base
|
17
|
+
|
18
18
|
delegate :similar,
|
19
19
|
:identifier,
|
20
20
|
:name,
|
@@ -28,12 +28,12 @@ module Internals
|
|
28
28
|
:dump,
|
29
29
|
:load,
|
30
30
|
:to => :@partial
|
31
|
-
|
31
|
+
|
32
32
|
def initialize category
|
33
33
|
@exact = category.exact
|
34
34
|
@partial = category.partial
|
35
35
|
end
|
36
|
-
|
36
|
+
|
37
37
|
def self.wrap index_or_category
|
38
38
|
if index_or_category.respond_to? :categories
|
39
39
|
wrap_each_of index_or_category.categories
|
@@ -47,19 +47,19 @@ module Internals
|
|
47
47
|
def self.wrap_each_of categories
|
48
48
|
categories.categories.collect! { |category| new(category) }
|
49
49
|
end
|
50
|
-
|
50
|
+
|
51
51
|
def ids text
|
52
52
|
@exact.ids(text) + @partial.ids(text)
|
53
53
|
end
|
54
|
-
|
54
|
+
|
55
55
|
def weight text
|
56
56
|
[@exact.weight(text) || 0, @partial.weight(text) || 0].max
|
57
57
|
end
|
58
|
-
|
58
|
+
|
59
59
|
end
|
60
|
-
|
60
|
+
|
61
61
|
end
|
62
|
-
|
62
|
+
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
end
|
@@ -5,8 +5,6 @@ module Internals
|
|
5
5
|
#
|
6
6
|
class Allocations # :nodoc:all
|
7
7
|
|
8
|
-
# TODO Remove size
|
9
|
-
#
|
10
8
|
delegate :each, :inject, :empty?, :size, :to => :@allocations
|
11
9
|
attr_reader :total
|
12
10
|
|
@@ -23,7 +21,7 @@ module Internals
|
|
23
21
|
end
|
24
22
|
# Sort the allocations.
|
25
23
|
#
|
26
|
-
def sort
|
24
|
+
def sort!
|
27
25
|
@allocations.sort!
|
28
26
|
end
|
29
27
|
|
@@ -116,7 +114,7 @@ module Internals
|
|
116
114
|
end
|
117
115
|
|
118
116
|
end
|
119
|
-
|
117
|
+
|
120
118
|
end
|
121
|
-
|
119
|
+
|
122
120
|
end
|
@@ -8,12 +8,12 @@ module Internals
|
|
8
8
|
# An allocation consists of a number of combinations.
|
9
9
|
#
|
10
10
|
module Combinations # :nodoc:all
|
11
|
-
|
11
|
+
|
12
12
|
# Memory Combinations contain specific methods for
|
13
13
|
# calculating score and ids in memory.
|
14
14
|
#
|
15
15
|
class Memory < Base
|
16
|
-
|
16
|
+
|
17
17
|
# Returns the result ids for the allocation.
|
18
18
|
#
|
19
19
|
# Sorts the ids by size and & through them in the following order (sizes):
|
@@ -24,7 +24,7 @@ module Internals
|
|
24
24
|
# Note: Uses a C-optimized intersection routine for speed and memory efficiency.
|
25
25
|
#
|
26
26
|
# Note: In the memory based version we ignore the (amount) needed hint.
|
27
|
-
#
|
27
|
+
# We might use the fact to optimize the algorithm.
|
28
28
|
#
|
29
29
|
def ids _, _
|
30
30
|
return [] if @combinations.empty?
|
@@ -43,16 +43,16 @@ module Internals
|
|
43
43
|
# this precondition for a fast algorithm is always given.
|
44
44
|
#
|
45
45
|
id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
|
46
|
-
|
46
|
+
|
47
47
|
# Call the optimized C algorithm.
|
48
48
|
#
|
49
49
|
Performant::Array.memory_efficient_intersect id_arrays
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
end
|
53
|
-
|
53
|
+
|
54
54
|
end
|
55
|
-
|
55
|
+
|
56
56
|
end
|
57
|
-
|
57
|
+
|
58
58
|
end
|
@@ -27,6 +27,9 @@ module Internals
|
|
27
27
|
|
28
28
|
# Returns a number of possible allocations for the given tokens.
|
29
29
|
#
|
30
|
+
def sorted_allocations_for tokens
|
31
|
+
|
32
|
+
end
|
30
33
|
def allocations_for tokens
|
31
34
|
Allocations.new allocations_ary_for(tokens)
|
32
35
|
end
|
@@ -40,21 +43,10 @@ module Internals
|
|
40
43
|
#
|
41
44
|
possible_combinations = tokens.possible_combinations_in index
|
42
45
|
|
43
|
-
# Optimization for ignoring tokens that allocate to nothing and
|
44
|
-
# can be ignored.
|
45
|
-
# For example in a special search, where "florian" is not
|
46
|
-
# mapped to any category.
|
47
|
-
#
|
48
|
-
possible_combinations.compact!
|
49
|
-
|
50
46
|
# Generate all possible combinations.
|
51
47
|
#
|
52
48
|
expanded_combinations = expand_combinations_from possible_combinations
|
53
49
|
|
54
|
-
# If there are none, try the next allocation.
|
55
|
-
#
|
56
|
-
return [] unless expanded_combinations
|
57
|
-
|
58
50
|
# Add the wrapped possible allocations to the ones we already have.
|
59
51
|
#
|
60
52
|
expanded_combinations.map! do |expanded_combination|
|
@@ -62,7 +54,7 @@ module Internals
|
|
62
54
|
end
|
63
55
|
end
|
64
56
|
|
65
|
-
# This is the core of the search engine.
|
57
|
+
# This is the core of the search engine. No kidding.
|
66
58
|
#
|
67
59
|
# Gets an array of
|
68
60
|
# [
|
@@ -122,7 +114,7 @@ module Internals
|
|
122
114
|
# If an element has size 0, this means one of the
|
123
115
|
# tokens could not be allocated.
|
124
116
|
#
|
125
|
-
return if possible_combinations.any?(&:empty?)
|
117
|
+
return [] if possible_combinations.any?(&:empty?)
|
126
118
|
|
127
119
|
# Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
|
128
120
|
#
|
@@ -170,7 +162,7 @@ module Internals
|
|
170
162
|
combinations
|
171
163
|
end
|
172
164
|
|
173
|
-
return if possible_combinations.empty?
|
165
|
+
return [] if possible_combinations.empty?
|
174
166
|
|
175
167
|
possible_combinations.shift.zip *possible_combinations
|
176
168
|
end
|
@@ -28,14 +28,18 @@ module Internals
|
|
28
28
|
# Note: Use this in the search engine if you need a qualified
|
29
29
|
# and normalized token. I.e. one prepared for a search.
|
30
30
|
#
|
31
|
-
def self.processed text
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
31
|
+
def self.processed text, downcase = true
|
32
|
+
new(text).process downcase
|
33
|
+
end
|
34
|
+
def process downcases = true
|
35
|
+
qualify
|
36
|
+
extract_original
|
37
|
+
downcase if downcases
|
38
|
+
partialize
|
39
|
+
similarize
|
40
|
+
remove_illegals
|
41
|
+
symbolize
|
42
|
+
self
|
39
43
|
end
|
40
44
|
|
41
45
|
# This returns a predefined category name if the user has given one.
|
@@ -56,6 +60,12 @@ module Internals
|
|
56
60
|
@original = @text.dup
|
57
61
|
end
|
58
62
|
|
63
|
+
# Downcases the text.
|
64
|
+
#
|
65
|
+
def downcase
|
66
|
+
@text.downcase!
|
67
|
+
end
|
68
|
+
|
59
69
|
# Partial is a conditional setter.
|
60
70
|
#
|
61
71
|
# It is only settable if it hasn't been set yet.
|
@@ -69,15 +79,19 @@ module Internals
|
|
69
79
|
|
70
80
|
# If the text ends with *, partialize it. If with ", don't.
|
71
81
|
#
|
82
|
+
# The latter wins. So "hello*" will not be partially searched.
|
83
|
+
#
|
72
84
|
@@no_partial = /\"\Z/
|
73
85
|
@@partial = /\*\Z/
|
74
86
|
def partialize
|
75
|
-
self.partial = false and return
|
76
|
-
self.partial = true
|
87
|
+
self.partial = false and return unless @text !~ @@no_partial
|
88
|
+
self.partial = true unless @text !~ @@partial
|
77
89
|
end
|
78
90
|
|
79
91
|
# If the text ends with ~ similarize it. If with ", don't.
|
80
92
|
#
|
93
|
+
# The latter wins.
|
94
|
+
#
|
81
95
|
@@no_similar = /\"\Z/
|
82
96
|
@@similar = /\~\Z/
|
83
97
|
def similarize
|
@@ -96,21 +110,10 @@ module Internals
|
|
96
110
|
@text.gsub! @@illegals, '' unless @text.blank?
|
97
111
|
end
|
98
112
|
|
99
|
-
# Visitor for tokenizer.
|
100
113
|
#
|
101
|
-
# TODO Rewrite!!!
|
102
114
|
#
|
103
|
-
def
|
104
|
-
@text =
|
105
|
-
end
|
106
|
-
# TODO spec!
|
107
|
-
#
|
108
|
-
# TODO Rewrite!!
|
109
|
-
#
|
110
|
-
def tokenized tokenizer
|
111
|
-
tokenizer.tokenize(@text.to_s).each do |text|
|
112
|
-
yield text
|
113
|
-
end
|
115
|
+
def symbolize
|
116
|
+
@text = @text.to_sym
|
114
117
|
end
|
115
118
|
|
116
119
|
# Returns an array of possible combinations.
|
@@ -181,6 +184,12 @@ module Internals
|
|
181
184
|
"#{similar?? :similarity : :index}:#{@text}"
|
182
185
|
end
|
183
186
|
|
187
|
+
# If the originals & the text are the same, they are the same.
|
188
|
+
#
|
189
|
+
def == other
|
190
|
+
self.original == other.original && self.text == other.text
|
191
|
+
end
|
192
|
+
|
184
193
|
# Displays the qualifier text and the text, joined.
|
185
194
|
#
|
186
195
|
# e.g. name:meier
|
@@ -1,31 +1,36 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
3
|
module Internals
|
4
|
-
|
4
|
+
|
5
5
|
#
|
6
6
|
#
|
7
7
|
module Query
|
8
|
-
|
8
|
+
|
9
9
|
# This class primarily handles switching through similar token constellations.
|
10
10
|
#
|
11
11
|
class Tokens # :nodoc:all
|
12
|
-
|
12
|
+
|
13
13
|
# Basically delegates to its internal tokens array.
|
14
14
|
#
|
15
15
|
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
16
|
-
|
16
|
+
|
17
17
|
#
|
18
18
|
#
|
19
19
|
def initialize tokens = []
|
20
20
|
@tokens = tokens
|
21
21
|
end
|
22
|
-
|
22
|
+
def self.processed words, downcase = true
|
23
|
+
new words.collect! { |word| Token.processed word, downcase }
|
24
|
+
end
|
25
|
+
|
26
|
+
# Tokenizes each token.
|
23
27
|
#
|
28
|
+
# Note: Passed tokenizer needs to offer #normalize(text).
|
24
29
|
#
|
25
30
|
def tokenize_with tokenizer
|
26
31
|
@tokens.each { |token| token.tokenize_with(tokenizer) }
|
27
32
|
end
|
28
|
-
|
33
|
+
|
29
34
|
# Generates an array in the form of
|
30
35
|
# [
|
31
36
|
# [combination], # of token 1
|
@@ -33,14 +38,17 @@ module Internals
|
|
33
38
|
# [combination, combination] # of token 3
|
34
39
|
# ]
|
35
40
|
#
|
36
|
-
# TODO If we want token behaviour defined per Query, we can
|
37
|
-
# compact! here
|
38
|
-
#
|
39
41
|
def possible_combinations_in type
|
40
42
|
@tokens.inject([]) do |combinations, token|
|
41
|
-
|
43
|
+
possible_combinations = token.possible_combinations_in type
|
44
|
+
|
45
|
+
# Note: Optimization for ignoring tokens that allocate to nothing and
|
46
|
+
# can be ignored.
|
47
|
+
# For example in a special search, where "florian" is not
|
48
|
+
# mapped to any category.
|
49
|
+
#
|
50
|
+
possible_combinations ? combinations << possible_combinations : combinations
|
42
51
|
end
|
43
|
-
# TODO compact! if ignore_unassigned_tokens
|
44
52
|
end
|
45
53
|
|
46
54
|
# Makes the last of the tokens partial.
|
@@ -57,33 +65,37 @@ module Internals
|
|
57
65
|
def cap? maximum
|
58
66
|
@tokens.size > maximum
|
59
67
|
end
|
60
|
-
|
68
|
+
|
61
69
|
# Rejects blank tokens.
|
62
70
|
#
|
63
71
|
def reject
|
64
72
|
@tokens.reject! &:blank?
|
65
73
|
end
|
66
|
-
|
74
|
+
|
67
75
|
# Returns a solr query.
|
68
76
|
#
|
69
77
|
def to_solr_query
|
70
78
|
@tokens.map(&:to_solr).join ' '
|
71
79
|
end
|
72
|
-
|
80
|
+
|
73
81
|
#
|
74
82
|
#
|
75
83
|
def originals
|
76
84
|
@tokens.map(&:original)
|
77
85
|
end
|
78
|
-
|
86
|
+
|
87
|
+
def == other
|
88
|
+
self.tokens == other.tokens
|
89
|
+
end
|
90
|
+
|
79
91
|
# Just join the token original texts.
|
80
92
|
#
|
81
93
|
def to_s
|
82
94
|
originals.join ' '
|
83
95
|
end
|
84
|
-
|
96
|
+
|
85
97
|
end
|
86
|
-
|
98
|
+
|
87
99
|
end
|
88
|
-
|
100
|
+
|
89
101
|
end
|
@@ -3,19 +3,19 @@ module Query
|
|
3
3
|
# Calculates weights for certain combinations.
|
4
4
|
#
|
5
5
|
class Weights # :nodoc:all
|
6
|
-
|
6
|
+
|
7
7
|
#
|
8
8
|
#
|
9
9
|
def initialize weights = {}
|
10
10
|
@weights = weights
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
# Get the weight of an allocation.
|
14
14
|
#
|
15
15
|
def weight_for clustered
|
16
16
|
@weights[clustered] || 0
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
19
|
# Returns an energy term E for allocation. this turns into a probability
|
20
20
|
# by P(allocation) = 1/Z * exp (-1/T * E(allocation)),
|
21
21
|
# where Z is the normalizing partition function
|
@@ -31,24 +31,26 @@ module Query
|
|
31
31
|
# Note: Cache this if more complicated weighings become necessary.
|
32
32
|
#
|
33
33
|
def score combinations
|
34
|
-
# TODO Or hide: combinations#to_weights_key
|
34
|
+
# TODO Or hide: combinations#to_weights_key (but it's an array, so…)
|
35
35
|
#
|
36
36
|
# TODO combinations could cluster uniq as combinations are added (since combinations don't change).
|
37
37
|
#
|
38
|
+
# TODO Or it could use actual combinations? Could it? Or make combinations comparable to Symbols.
|
39
|
+
#
|
38
40
|
weight_for combinations.map(&:category_name).clustered_uniq_fast
|
39
41
|
end
|
40
|
-
|
42
|
+
|
41
43
|
# Are there any weights defined?
|
42
44
|
#
|
43
45
|
def empty?
|
44
46
|
@weights.empty?
|
45
47
|
end
|
46
|
-
|
48
|
+
|
47
49
|
# Prints out a nice representation of the configured weights.
|
48
50
|
#
|
49
51
|
def to_s
|
50
52
|
@weights.to_s
|
51
53
|
end
|
52
|
-
|
54
|
+
|
53
55
|
end
|
54
56
|
end
|
@@ -20,6 +20,7 @@ Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@r
|
|
20
20
|
Normalizes words: #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
|
21
21
|
Rejects tokens? #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
|
22
22
|
Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-' }
|
23
|
+
Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
23
24
|
TOKENIZER
|
24
25
|
end
|
25
26
|
|
@@ -125,6 +126,13 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
|
|
125
126
|
tokens.reject! &@reject_condition
|
126
127
|
end
|
127
128
|
|
129
|
+
def case_sensitive case_sensitive
|
130
|
+
@case_sensitive = case_sensitive
|
131
|
+
end
|
132
|
+
def downcase?
|
133
|
+
!@case_sensitive
|
134
|
+
end
|
135
|
+
|
128
136
|
# Checks if the right argument type has been given.
|
129
137
|
#
|
130
138
|
def check_argument_in method, type, argument, &condition
|
@@ -156,6 +164,7 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
|
|
156
164
|
normalizes_words options[:normalizes_words] if options[:normalizes_words]
|
157
165
|
removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
|
158
166
|
substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
|
167
|
+
case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil?
|
159
168
|
|
160
169
|
# Defaults.
|
161
170
|
#
|
@@ -163,37 +172,54 @@ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-'
|
|
163
172
|
reject_token_if &(options[:reject_token_if] || :blank?)
|
164
173
|
end
|
165
174
|
|
166
|
-
#
|
175
|
+
# Default preprocessing hook.
|
167
176
|
#
|
168
|
-
|
169
|
-
#
|
177
|
+
# Does:
|
178
|
+
# 1. Character substitution.
|
179
|
+
# 2. Remove illegal expressions.
|
180
|
+
# 3. Remove non-single stopwords. (Stopwords that occur with other words)
|
170
181
|
#
|
171
|
-
def preprocess text
|
182
|
+
def preprocess text
|
183
|
+
text = substitute_characters text
|
184
|
+
remove_illegals text
|
185
|
+
# We do not remove single stopwords e.g. in the indexer for
|
186
|
+
# an entirely different reason than in the query tokenizer.
|
187
|
+
# An indexed thing with just name "UND" (a possible stopword)
|
188
|
+
# should not lose its name.
|
189
|
+
#
|
190
|
+
remove_non_single_stopwords text
|
191
|
+
text
|
192
|
+
end
|
172
193
|
# Pretokenizing.
|
173
194
|
#
|
174
|
-
|
175
|
-
#
|
195
|
+
# Does:
|
196
|
+
# 1. Split the text into words.
|
197
|
+
# 2. Normalize each word.
|
198
|
+
#
|
199
|
+
def pretokenize text
|
200
|
+
words = split text
|
201
|
+
words.collect! do |word|
|
202
|
+
normalize_with_patterns word
|
203
|
+
word
|
204
|
+
end
|
205
|
+
end
|
206
|
+
# Basic postprocessing (overridden in both query/index tokenizers).
|
176
207
|
#
|
177
208
|
def process tokens
|
178
209
|
reject tokens # Reject any tokens that don't meet criteria
|
179
210
|
tokens
|
180
211
|
end
|
181
212
|
|
182
|
-
# Converts words into real tokens.
|
183
|
-
#
|
184
|
-
def tokens_for words
|
185
|
-
|
186
|
-
end
|
213
|
+
# # Converts words into real tokens.
|
214
|
+
# #
|
215
|
+
# def tokens_for words
|
216
|
+
# Internals::Query::Tokens.new words.collect! { |word| token_for word }
|
217
|
+
# end
|
187
218
|
# Turns non-blank text into symbols.
|
188
219
|
#
|
189
220
|
def symbolize text
|
190
221
|
text.blank? ? nil : text.to_sym
|
191
222
|
end
|
192
|
-
# Returns a tokens object.
|
193
|
-
#
|
194
|
-
def empty_tokens
|
195
|
-
Internals::Query::Tokens.new
|
196
|
-
end
|
197
223
|
|
198
224
|
end
|
199
225
|
|
@@ -15,45 +15,16 @@ module Internals
|
|
15
15
|
@default ||= new
|
16
16
|
end
|
17
17
|
|
18
|
-
# Default indexing preprocessing hook.
|
19
|
-
#
|
20
|
-
# Does:
|
21
|
-
# 1. Character substitution.
|
22
|
-
# 2. Downcasing.
|
23
|
-
# 3. Remove illegal expressions.
|
24
|
-
# 4. Remove non-single stopwords. (Stopwords that occur with other words)
|
25
|
-
#
|
26
|
-
def preprocess text
|
27
|
-
text = substitute_characters text
|
28
|
-
text.downcase!
|
29
|
-
remove_illegals text
|
30
|
-
# we do not remove single stopwords for an entirely different
|
31
|
-
# reason than in the query tokenizer.
|
32
|
-
# An indexed thing with just name "UND" (a possible stopword) should not lose its name.
|
33
|
-
#
|
34
|
-
remove_non_single_stopwords text
|
35
|
-
text
|
36
|
-
end
|
37
|
-
|
38
|
-
# Default indexing pretokenizing hook.
|
39
|
-
#
|
40
|
-
# Does:
|
41
|
-
# 1. Split the text into words.
|
42
|
-
# 2. Normalize each word.
|
43
|
-
#
|
44
|
-
def pretokenize text
|
45
|
-
words = split text
|
46
|
-
words.collect! do |word|
|
47
|
-
normalize_with_patterns word
|
48
|
-
word
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
18
|
# Does not actually return a token, but a
|
53
19
|
# symbol "token".
|
54
20
|
#
|
55
|
-
def
|
56
|
-
|
21
|
+
def tokens_for words
|
22
|
+
words.collect! { |word| word.downcase! if downcase?; word.to_sym }
|
23
|
+
end
|
24
|
+
# Returns empty tokens.
|
25
|
+
#
|
26
|
+
def empty_tokens
|
27
|
+
[]
|
57
28
|
end
|
58
29
|
|
59
30
|
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module Internals
|
4
4
|
|
5
5
|
module Tokenizers
|
6
|
-
|
6
|
+
|
7
7
|
# There are a few class methods that you can use to configure how a query works.
|
8
8
|
#
|
9
9
|
# removes_characters regexp
|
@@ -14,66 +14,46 @@ module Internals
|
|
14
14
|
# normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
|
15
15
|
#
|
16
16
|
class Query < Base
|
17
|
-
|
17
|
+
|
18
18
|
def self.default= new_default
|
19
19
|
@default = new_default
|
20
20
|
end
|
21
21
|
def self.default
|
22
22
|
@default ||= new
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
attr_reader :maximum_tokens
|
26
|
-
|
26
|
+
|
27
27
|
def initialize options = {}
|
28
28
|
super options
|
29
29
|
@maximum_tokens = options[:maximum_tokens] || 5
|
30
30
|
end
|
31
|
-
|
32
|
-
def preprocess text
|
33
|
-
remove_illegals text # Remove illegal characters
|
34
|
-
remove_non_single_stopwords text # remove stop words
|
35
|
-
text
|
36
|
-
end
|
37
|
-
|
38
|
-
# Split the text and put some back together.
|
39
|
-
#
|
40
|
-
# TODO Make the same as in indexing?
|
41
|
-
#
|
42
|
-
def pretokenize text
|
43
|
-
split text
|
44
|
-
end
|
45
|
-
|
31
|
+
|
46
32
|
# Let each token process itself.
|
47
33
|
# Reject, limit, and partialize tokens.
|
48
34
|
#
|
35
|
+
# In querying we work with real tokens (in indexing it's just symbols).
|
36
|
+
#
|
49
37
|
def process tokens
|
50
|
-
tokens.
|
51
|
-
tokens.
|
52
|
-
tokens.
|
53
|
-
tokens.partialize_last # Set certain tokens as partial
|
38
|
+
tokens.reject # Reject any tokens that don't meet criteria.
|
39
|
+
tokens.cap maximum_tokens # Cut off superfluous tokens.
|
40
|
+
tokens.partialize_last # Set certain tokens as partial.
|
54
41
|
tokens
|
55
42
|
end
|
56
|
-
|
57
|
-
#
|
58
|
-
#
|
59
|
-
# TODO Perhaps move to Normalizer?
|
43
|
+
|
44
|
+
# Converts words into real tokens.
|
60
45
|
#
|
61
|
-
def
|
62
|
-
|
63
|
-
text.downcase! # Downcase all text
|
64
|
-
normalize_with_patterns text # normalize
|
65
|
-
text.to_sym # symbolize
|
46
|
+
def tokens_for words
|
47
|
+
Internals::Query::Tokens.processed words, downcase?
|
66
48
|
end
|
67
|
-
|
68
|
-
# Returns a token for a word.
|
69
|
-
# The basic query tokenizer uses new tokens.
|
49
|
+
# Returns a tokens object.
|
70
50
|
#
|
71
|
-
def
|
72
|
-
Internals::Query::
|
51
|
+
def empty_tokens
|
52
|
+
Internals::Query::Tokens.new
|
73
53
|
end
|
74
|
-
|
54
|
+
|
75
55
|
end
|
76
|
-
|
56
|
+
|
77
57
|
end
|
78
|
-
|
58
|
+
|
79
59
|
end
|
data/lib/picky/loader.rb
CHANGED
data/lib/picky/search.rb
CHANGED
data/lib/tasks/server.rake
CHANGED
@@ -1,17 +1,9 @@
|
|
1
|
-
#
|
1
|
+
# Server tasks, like starting/stopping/restarting.
|
2
2
|
#
|
3
3
|
namespace :server do
|
4
|
-
|
5
|
-
def chdir_to_root
|
6
|
-
Dir.chdir PICKY_ROOT
|
7
|
-
end
|
8
|
-
|
9
|
-
def current_pid
|
10
|
-
pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
|
11
|
-
pid.blank? ? nil : pid.chomp
|
12
|
-
end
|
13
|
-
|
4
|
+
|
14
5
|
# desc "Start the unicorns. (Wehee!)"
|
6
|
+
#
|
15
7
|
task :start => :framework do
|
16
8
|
chdir_to_root
|
17
9
|
daemonize = PICKY_ENVIRONMENT == 'production' ? '-D' : ''
|
@@ -19,17 +11,27 @@ namespace :server do
|
|
19
11
|
puts "Running \`#{command}\`."
|
20
12
|
exec command
|
21
13
|
end
|
22
|
-
|
14
|
+
|
23
15
|
# desc "Stop the unicorns. (Blam!)"
|
16
|
+
#
|
24
17
|
task :stop => :framework do
|
25
18
|
`kill -QUIT #{current_pid}` if current_pid
|
26
19
|
end
|
27
|
-
|
20
|
+
|
28
21
|
# desc "Restart the unicorns."
|
29
22
|
task :restart do
|
30
23
|
Rake::Task[:"server:stop"].invoke
|
31
24
|
sleep 5
|
32
25
|
Rake::Task[:"server:start"].invoke
|
33
26
|
end
|
34
|
-
|
27
|
+
|
28
|
+
def chdir_to_root
|
29
|
+
Dir.chdir PICKY_ROOT
|
30
|
+
end
|
31
|
+
|
32
|
+
def current_pid
|
33
|
+
pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
|
34
|
+
pid.blank? ? nil : pid.chomp
|
35
|
+
end
|
36
|
+
|
35
37
|
end
|
data/lib/tasks/todo.rake
CHANGED
@@ -273,14 +273,14 @@ describe Internals::FrontendAdapters::Rack do
|
|
273
273
|
end
|
274
274
|
context 'without app' do
|
275
275
|
context 'with url' do
|
276
|
-
it 'should use the
|
276
|
+
it 'should use the 200 with default_options from the url' do
|
277
277
|
@routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET", :path_info => /some_url/ }
|
278
278
|
|
279
279
|
@rack_adapter.answer 'some_url'
|
280
280
|
end
|
281
281
|
end
|
282
282
|
context 'without url' do
|
283
|
-
it 'should use the
|
283
|
+
it 'should use the 200 with default_options' do
|
284
284
|
@routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET" }
|
285
285
|
|
286
286
|
@rack_adapter.answer
|
@@ -5,7 +5,7 @@ require 'spec_helper'
|
|
5
5
|
describe Internals::Tokenizers::Base do
|
6
6
|
|
7
7
|
context 'with special instance' do
|
8
|
-
let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello } }
|
8
|
+
let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }, case_sensitive: true }
|
9
9
|
it 'rejects tokens with length < 2' do
|
10
10
|
tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
|
11
11
|
end
|
@@ -13,7 +13,7 @@ describe Internals::Tokenizers::Base do
|
|
13
13
|
tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
|
14
14
|
end
|
15
15
|
describe 'to_s' do
|
16
|
-
it '
|
16
|
+
it 'spits out the right text' do
|
17
17
|
tokenizer.to_s.should == <<-EXPECTED
|
18
18
|
Removes characters: -
|
19
19
|
Stopwords: -
|
@@ -22,6 +22,7 @@ Removes chars after split: -
|
|
22
22
|
Normalizes words: -
|
23
23
|
Rejects tokens? Yes, see line 8 in app/application.rb
|
24
24
|
Substitutes chars? -
|
25
|
+
Case sensitive? Yes.
|
25
26
|
EXPECTED
|
26
27
|
end
|
27
28
|
end
|
@@ -31,7 +32,7 @@ EXPECTED
|
|
31
32
|
let(:tokenizer) { described_class.new }
|
32
33
|
|
33
34
|
describe 'to_s' do
|
34
|
-
it '
|
35
|
+
it 'spits out the right text' do
|
35
36
|
tokenizer.to_s.should == <<-EXPECTED
|
36
37
|
Removes characters: -
|
37
38
|
Stopwords: -
|
@@ -40,6 +41,7 @@ Removes chars after split: -
|
|
40
41
|
Normalizes words: -
|
41
42
|
Rejects tokens? -
|
42
43
|
Substitutes chars? -
|
44
|
+
Case sensitive? -
|
43
45
|
EXPECTED
|
44
46
|
end
|
45
47
|
end
|
@@ -41,6 +41,7 @@ describe Internals::Tokenizers::Query do
|
|
41
41
|
it 'should call methods in order' do
|
42
42
|
text = stub :text
|
43
43
|
|
44
|
+
tokenizer.should_receive(:substitute_characters).once.with(text).and_return text
|
44
45
|
tokenizer.should_receive(:remove_illegals).once.ordered.with text
|
45
46
|
tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
|
46
47
|
|
@@ -57,13 +58,7 @@ describe Internals::Tokenizers::Query do
|
|
57
58
|
before(:each) do
|
58
59
|
@tokens = mock :tokens, :null_object => true
|
59
60
|
end
|
60
|
-
it 'should tokenize the tokens' do
|
61
|
-
@tokens.should_receive(:tokenize_with).once.with tokenizer
|
62
|
-
|
63
|
-
tokenizer.process @tokens
|
64
|
-
end
|
65
61
|
it 'should call methods on the tokens in order' do
|
66
|
-
@tokens.should_receive(:tokenize_with).once.ordered
|
67
62
|
@tokens.should_receive(:reject).once.ordered
|
68
63
|
@tokens.should_receive(:cap).once.ordered
|
69
64
|
@tokens.should_receive(:partialize_last).once.ordered
|
@@ -122,13 +117,5 @@ describe Internals::Tokenizers::Query do
|
|
122
117
|
tokenizer.tokenize('').map(&:to_s).should == []
|
123
118
|
end
|
124
119
|
end
|
125
|
-
describe "token_for" do
|
126
|
-
it "should get a preprocessed token" do
|
127
|
-
text = stub(:text)
|
128
|
-
Internals::Query::Token.should_receive(:processed).with text
|
129
|
-
|
130
|
-
tokenizer.token_for text
|
131
|
-
end
|
132
|
-
end
|
133
120
|
|
134
121
|
end
|
@@ -64,7 +64,7 @@ describe 'Query::Combination' do
|
|
64
64
|
|
65
65
|
describe 'ids' do
|
66
66
|
it 'should call ids with the text on bundle' do
|
67
|
-
@bundle.should_receive(:ids).once.with
|
67
|
+
@bundle.should_receive(:ids).once.with :some_text
|
68
68
|
|
69
69
|
@combination.ids
|
70
70
|
end
|
@@ -80,7 +80,7 @@ describe 'Query::Combination' do
|
|
80
80
|
|
81
81
|
describe 'weight' do
|
82
82
|
it 'should call weight with the text on bundle' do
|
83
|
-
@bundle.should_receive(:weight).once.with
|
83
|
+
@bundle.should_receive(:weight).once.with :some_text
|
84
84
|
|
85
85
|
@combination.weight
|
86
86
|
end
|
@@ -44,17 +44,17 @@ describe Internals::Query::Indexes do
|
|
44
44
|
it 'can handle empty combinations' do
|
45
45
|
combinations = [[1,2,3], [:a, :b, :c], []]
|
46
46
|
|
47
|
-
indexes.expand_combinations_from(combinations).should ==
|
47
|
+
indexes.expand_combinations_from(combinations).should == []
|
48
48
|
end
|
49
49
|
it 'can handle empty combinations' do
|
50
50
|
combinations = [[], [:a, :b, :c], []]
|
51
51
|
|
52
|
-
indexes.expand_combinations_from(combinations).should ==
|
52
|
+
indexes.expand_combinations_from(combinations).should == []
|
53
53
|
end
|
54
54
|
it 'can handle totally empty combinations' do
|
55
55
|
combinations = [[], [], []]
|
56
56
|
|
57
|
-
indexes.expand_combinations_from(combinations).should ==
|
57
|
+
indexes.expand_combinations_from(combinations).should == []
|
58
58
|
end
|
59
59
|
it 'is fast in a complicated case' do
|
60
60
|
combinations = [[1,2,3], [:a, :b, :c], [:k, :l]]
|
@@ -64,12 +64,12 @@ describe Internals::Query::Indexes do
|
|
64
64
|
it 'is fast in a simple case' do
|
65
65
|
combinations = [[1], [2], [3]]
|
66
66
|
|
67
|
-
performance_of { indexes.expand_combinations_from(combinations) }.should < 0.
|
67
|
+
performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0006
|
68
68
|
end
|
69
69
|
it 'is very fast in a 1-empty case' do
|
70
70
|
combinations = [[], [2], [3]]
|
71
71
|
|
72
|
-
performance_of { indexes.expand_combinations_from(combinations) }.should < 0.
|
72
|
+
performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0005
|
73
73
|
end
|
74
74
|
it 'is very fast in a all-empty case' do
|
75
75
|
combinations = [[], [], []]
|
@@ -8,6 +8,15 @@ describe Internals::Query::Token do
|
|
8
8
|
Internals::Query::Qualifiers.instance.prepare
|
9
9
|
end
|
10
10
|
|
11
|
+
describe '==' do
|
12
|
+
it 'is equal if the originals are equal' do
|
13
|
+
described_class.processed('similar~').should == described_class.processed('similar~')
|
14
|
+
end
|
15
|
+
it 'is not equal if the originals are not equal' do
|
16
|
+
described_class.processed('similar~').should_not == described_class.processed('similar')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
11
20
|
describe 'next_similar_token' do
|
12
21
|
before(:each) do
|
13
22
|
@bundle = stub :bundle, :similar => [:array, :of, :similar]
|
@@ -157,20 +166,29 @@ describe Internals::Query::Token do
|
|
157
166
|
end
|
158
167
|
|
159
168
|
describe 'processed' do
|
169
|
+
it 'should return a new token' do
|
170
|
+
described_class.processed('some text').should be_kind_of(described_class)
|
171
|
+
end
|
172
|
+
it 'generates a token' do
|
173
|
+
described_class.processed('some text').class.should == described_class
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
describe 'process' do
|
178
|
+
let(:token) { described_class.new 'any_text' }
|
179
|
+
it 'returns itself' do
|
180
|
+
token.process.should == token
|
181
|
+
end
|
160
182
|
it 'should have an order' do
|
161
|
-
token = stub :token
|
162
|
-
described_class.should_receive(:new).once.and_return token
|
163
|
-
|
164
183
|
token.should_receive(:qualify).once.ordered
|
165
184
|
token.should_receive(:extract_original).once.ordered
|
185
|
+
token.should_receive(:downcase).once.ordered
|
166
186
|
token.should_receive(:partialize).once.ordered
|
167
187
|
token.should_receive(:similarize).once.ordered
|
168
188
|
token.should_receive(:remove_illegals).once.ordered
|
189
|
+
token.should_receive(:symbolize).once.ordered
|
169
190
|
|
170
|
-
|
171
|
-
end
|
172
|
-
it 'should return a new token' do
|
173
|
-
described_class.processed('some text').should be_kind_of(described_class)
|
191
|
+
token.process
|
174
192
|
end
|
175
193
|
end
|
176
194
|
|
@@ -352,6 +370,13 @@ describe Internals::Query::Token do
|
|
352
370
|
before(:each) do
|
353
371
|
@token = described_class.processed 'text*'
|
354
372
|
end
|
373
|
+
it 'should not set partial' do
|
374
|
+
@token.instance_variable_set :@partial, false
|
375
|
+
|
376
|
+
@token.partial = true
|
377
|
+
|
378
|
+
@token.instance_variable_get(:@partial).should be_false
|
379
|
+
end
|
355
380
|
it 'should not set partial' do
|
356
381
|
@token.partial = false
|
357
382
|
|
@@ -382,20 +407,20 @@ describe Internals::Query::Token do
|
|
382
407
|
it 'should remove *' do
|
383
408
|
token = described_class.processed 'text*'
|
384
409
|
|
385
|
-
token.text.should ==
|
410
|
+
token.text.should == :text
|
386
411
|
end
|
387
412
|
it 'should remove ~' do
|
388
413
|
token = described_class.processed 'text~'
|
389
414
|
|
390
|
-
token.text.should ==
|
415
|
+
token.text.should == :text
|
391
416
|
end
|
392
417
|
it 'should remove "' do
|
393
418
|
token = described_class.processed 'text"'
|
394
419
|
|
395
|
-
token.text.should ==
|
420
|
+
token.text.should == :text
|
396
421
|
end
|
397
422
|
it "should pass on a processed text" do
|
398
|
-
described_class.processed('text').text.should ==
|
423
|
+
described_class.processed('text').text.should == :text
|
399
424
|
end
|
400
425
|
end
|
401
426
|
|
@@ -7,6 +7,35 @@ describe Internals::Query::Tokens do
|
|
7
7
|
Internals::Query::Qualifiers.instance.prepare
|
8
8
|
end
|
9
9
|
|
10
|
+
describe '.processed' do
|
11
|
+
it 'generates processed tokens from all words' do
|
12
|
+
expected = [
|
13
|
+
Internals::Query::Token.processed('this~'),
|
14
|
+
Internals::Query::Token.processed('is'),
|
15
|
+
Internals::Query::Token.processed('a'),
|
16
|
+
Internals::Query::Token.processed('sp:solr'),
|
17
|
+
Internals::Query::Token.processed('query"')
|
18
|
+
]
|
19
|
+
|
20
|
+
described_class.should_receive(:new).once.with expected
|
21
|
+
|
22
|
+
described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
|
23
|
+
end
|
24
|
+
it 'generates processed tokens from all words' do
|
25
|
+
expected = [
|
26
|
+
Internals::Query::Token.processed('this~', false),
|
27
|
+
Internals::Query::Token.processed('is', false),
|
28
|
+
Internals::Query::Token.processed('a', false),
|
29
|
+
Internals::Query::Token.processed('sp:solr', false),
|
30
|
+
Internals::Query::Token.processed('query"', false)
|
31
|
+
]
|
32
|
+
|
33
|
+
described_class.should_receive(:new).once.with expected
|
34
|
+
|
35
|
+
described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
10
39
|
describe 'to_solr_query' do
|
11
40
|
context 'many tokens' do
|
12
41
|
before(:each) do
|
@@ -151,6 +180,16 @@ describe Internals::Query::Tokens do
|
|
151
180
|
[:combination31, :combination32, :combination33]
|
152
181
|
]
|
153
182
|
end
|
183
|
+
it 'should work correctly' do
|
184
|
+
@token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
|
185
|
+
@token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return nil
|
186
|
+
@token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
|
187
|
+
|
188
|
+
@tokens.possible_combinations_in(:some_index).should == [
|
189
|
+
[:combination11, :combination12],
|
190
|
+
[:combination31, :combination32, :combination33]
|
191
|
+
]
|
192
|
+
end
|
154
193
|
end
|
155
194
|
|
156
195
|
describe 'to_s' do
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.
|
5
|
+
version: 2.1.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Florian Hanke
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-04-07 00:00:00 +10:00
|
14
14
|
default_executable: picky
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|