picky 2.7.0 → 3.0.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
data/lib/picky/query/token.rb
CHANGED
|
@@ -1,202 +1,195 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
# It remembers the original form, and and a normalized form.
|
|
6
|
-
#
|
|
7
|
-
# It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
|
|
8
|
-
#
|
|
9
|
-
class Token # :nodoc:all
|
|
3
|
+
module Query
|
|
10
4
|
|
|
11
|
-
|
|
12
|
-
attr_writer :similar
|
|
13
|
-
|
|
14
|
-
delegate :blank?, :to => :text
|
|
15
|
-
|
|
16
|
-
# Normal initializer.
|
|
17
|
-
#
|
|
18
|
-
# Note: Use this if you do not want a qualified and normalized token.
|
|
19
|
-
#
|
|
20
|
-
# TODO text, qualifiers
|
|
5
|
+
# This is a query token. Together with other tokens it makes up a query.
|
|
21
6
|
#
|
|
22
|
-
|
|
23
|
-
@text = text
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# Returns a qualified and normalized token.
|
|
7
|
+
# It remembers the original form, and and a normalized form.
|
|
27
8
|
#
|
|
28
|
-
#
|
|
29
|
-
# and normalized token. I.e. one prepared for a search.
|
|
9
|
+
# It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
|
|
30
10
|
#
|
|
31
|
-
|
|
32
|
-
new(text).process downcase
|
|
33
|
-
end
|
|
34
|
-
def process downcases = true
|
|
35
|
-
qualify
|
|
36
|
-
extract_original
|
|
37
|
-
downcase if downcases
|
|
38
|
-
partialize
|
|
39
|
-
similarize
|
|
40
|
-
remove_illegals
|
|
41
|
-
symbolize
|
|
42
|
-
self
|
|
43
|
-
end
|
|
11
|
+
class Token # :nodoc:all
|
|
44
12
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def user_defined_category_names
|
|
48
|
-
@qualifiers
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Extracts a qualifier for this token and pre-assigns an allocation.
|
|
52
|
-
#
|
|
53
|
-
# Note: Removes the qualifier if it is not allowed.
|
|
54
|
-
#
|
|
55
|
-
# TODO Extract this sind it is Search-based.
|
|
56
|
-
#
|
|
57
|
-
def qualify
|
|
58
|
-
@qualifiers, @text = split @text
|
|
59
|
-
@qualifiers && @qualifiers.collect! { |qualifier| Query::Qualifiers.instance.normalize qualifier }.compact!
|
|
60
|
-
@qualifiers
|
|
61
|
-
end
|
|
62
|
-
def extract_original
|
|
63
|
-
@original = @text.dup
|
|
64
|
-
end
|
|
13
|
+
attr_reader :text, :original, :qualifiers, :user_defined_categories
|
|
14
|
+
attr_writer :similar
|
|
65
15
|
|
|
66
|
-
|
|
67
|
-
#
|
|
68
|
-
def downcase
|
|
69
|
-
@text.downcase!
|
|
70
|
-
end
|
|
16
|
+
delegate :blank?, :to => :text
|
|
71
17
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def partial?
|
|
80
|
-
!@similar && @partial
|
|
81
|
-
end
|
|
18
|
+
# Normal initializer.
|
|
19
|
+
#
|
|
20
|
+
# Note: Use this if you do not want a normalized token.
|
|
21
|
+
#
|
|
22
|
+
def initialize text
|
|
23
|
+
@text = text
|
|
24
|
+
end
|
|
82
25
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
26
|
+
# Returns a qualified and normalized token.
|
|
27
|
+
#
|
|
28
|
+
# Note: Use this in the search engine if you need a qualified
|
|
29
|
+
# and normalized token. I.e. one prepared for a search.
|
|
30
|
+
#
|
|
31
|
+
def self.processed text, downcase = true
|
|
32
|
+
new(text).process downcase
|
|
33
|
+
end
|
|
34
|
+
def process downcased = true
|
|
35
|
+
qualify
|
|
36
|
+
extract_original
|
|
37
|
+
downcase if downcased
|
|
38
|
+
partialize
|
|
39
|
+
similarize
|
|
40
|
+
remove_illegals
|
|
41
|
+
symbolize
|
|
42
|
+
self
|
|
43
|
+
end
|
|
93
44
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
end
|
|
45
|
+
# Translates this token's qualifiers into actual categories.
|
|
46
|
+
#
|
|
47
|
+
# Note: If this is not done, there is no mapping.
|
|
48
|
+
#
|
|
49
|
+
def categorize mapper
|
|
50
|
+
@user_defined_categories = @qualifiers && @qualifiers.map do |qualifier|
|
|
51
|
+
mapper.map qualifier
|
|
52
|
+
end.compact
|
|
53
|
+
end
|
|
104
54
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
55
|
+
# Dups the original text.
|
|
56
|
+
#
|
|
57
|
+
def extract_original
|
|
58
|
+
@original = @text.dup
|
|
59
|
+
end
|
|
108
60
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
end
|
|
61
|
+
# Downcases the text.
|
|
62
|
+
#
|
|
63
|
+
def downcase
|
|
64
|
+
@text.downcase!
|
|
65
|
+
end
|
|
115
66
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
67
|
+
# Partial is a conditional setter.
|
|
68
|
+
#
|
|
69
|
+
# It is only settable if it hasn't been set yet.
|
|
70
|
+
#
|
|
71
|
+
def partial= partial
|
|
72
|
+
@partial = partial if @partial.nil?
|
|
73
|
+
end
|
|
74
|
+
def partial?
|
|
75
|
+
!@similar && @partial
|
|
76
|
+
end
|
|
121
77
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
78
|
+
# If the text ends with *, partialize it. If with ", don't.
|
|
79
|
+
#
|
|
80
|
+
# The latter wins. So "hello*" will not be partially searched.
|
|
81
|
+
#
|
|
82
|
+
@@no_partial = /\"\Z/
|
|
83
|
+
@@partial = /\*\Z/
|
|
84
|
+
def partialize
|
|
85
|
+
self.partial = false and return unless @text !~ @@no_partial
|
|
86
|
+
self.partial = true unless @text !~ @@partial
|
|
87
|
+
end
|
|
127
88
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
# Note: Also overrides the original.
|
|
139
|
-
#
|
|
140
|
-
def next_similar bundle
|
|
141
|
-
@text = @original = (similarity(bundle).shift || return) if similar?
|
|
142
|
-
end
|
|
143
|
-
# Lazy similar reader.
|
|
144
|
-
#
|
|
145
|
-
def similarity bundle = nil
|
|
146
|
-
@similarity || @similarity = generate_similarity_for(bundle)
|
|
147
|
-
end
|
|
148
|
-
# Returns an enumerator that traverses over the similar.
|
|
149
|
-
#
|
|
150
|
-
# Note: The dup isn't too nice – since it is needed on account of the shift, above.
|
|
151
|
-
# (We avoid a StopIteration exception. Which of both is less evil?)
|
|
152
|
-
#
|
|
153
|
-
def generate_similarity_for bundle
|
|
154
|
-
bundle.similar(@text).dup || []
|
|
155
|
-
end
|
|
89
|
+
# If the text ends with ~ similarize it. If with ", don't.
|
|
90
|
+
#
|
|
91
|
+
# The latter wins.
|
|
92
|
+
#
|
|
93
|
+
@@no_similar = /\"\Z/
|
|
94
|
+
@@similar = /\~\Z/
|
|
95
|
+
def similarize
|
|
96
|
+
self.similar = false and return if @text =~ @@no_similar
|
|
97
|
+
self.similar = true if @text =~ @@similar
|
|
98
|
+
end
|
|
156
99
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
[@original, @text]
|
|
161
|
-
end
|
|
100
|
+
def similar?
|
|
101
|
+
@similar
|
|
102
|
+
end
|
|
162
103
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
104
|
+
# Normalizes this token's text.
|
|
105
|
+
#
|
|
106
|
+
@@illegals = /["*~]/
|
|
107
|
+
def remove_illegals
|
|
108
|
+
@text.gsub! @@illegals, '' unless @text.blank?
|
|
109
|
+
end
|
|
168
110
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
111
|
+
#
|
|
112
|
+
#
|
|
113
|
+
def symbolize
|
|
114
|
+
@text = @text.to_sym
|
|
115
|
+
end
|
|
174
116
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
@@split_qualifiers = ','
|
|
181
|
-
def to_s
|
|
182
|
-
[@qualifiers && @qualifiers.join(@@split_qualifiers), @text].compact.join @@split_qualifier_text
|
|
183
|
-
end
|
|
117
|
+
# Returns an array of possible combinations.
|
|
118
|
+
#
|
|
119
|
+
def possible_combinations_in index
|
|
120
|
+
index.possible_combinations self
|
|
121
|
+
end
|
|
184
122
|
|
|
185
|
-
|
|
123
|
+
# Returns a token with the next similar text.
|
|
124
|
+
#
|
|
125
|
+
# THINK Rewrite this. It is hard to understand. Also spec performance.
|
|
126
|
+
#
|
|
127
|
+
def next_similar_token category
|
|
128
|
+
token = self.dup
|
|
129
|
+
token if token.next_similar category.bundle_for(token)
|
|
130
|
+
end
|
|
131
|
+
# Sets and returns the next similar word.
|
|
132
|
+
#
|
|
133
|
+
# Note: Also overrides the original.
|
|
134
|
+
#
|
|
135
|
+
def next_similar bundle
|
|
136
|
+
@text = @original = (similarity(bundle).shift || return) if similar?
|
|
137
|
+
end
|
|
138
|
+
# Lazy similar reader.
|
|
139
|
+
#
|
|
140
|
+
def similarity bundle = nil
|
|
141
|
+
@similarity || @similarity = generate_similarity_for(bundle)
|
|
142
|
+
end
|
|
143
|
+
# Returns an enumerator that traverses over the similar.
|
|
144
|
+
#
|
|
145
|
+
# Note: The dup isn't too nice – since it is needed on account of the shift, above.
|
|
146
|
+
# (We avoid a StopIteration exception. Which of both is less evil?)
|
|
147
|
+
#
|
|
148
|
+
def generate_similarity_for bundle
|
|
149
|
+
bundle.similar(@text).dup || []
|
|
150
|
+
end
|
|
186
151
|
|
|
187
152
|
# Splits text into a qualifier and text.
|
|
188
153
|
#
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
def
|
|
192
|
-
qualifiers, text = (
|
|
193
|
-
if text.blank?
|
|
194
|
-
[nil, (qualifiers || '')]
|
|
154
|
+
@@split_qualifier_text = ':'
|
|
155
|
+
@@split_qualifiers = ','
|
|
156
|
+
def qualify
|
|
157
|
+
@qualifiers, @text = (@text || '').split(@@split_qualifier_text, 2)
|
|
158
|
+
@qualifiers, @text = if @text.blank?
|
|
159
|
+
[nil, (@qualifiers || '')]
|
|
195
160
|
else
|
|
196
|
-
[qualifiers.split(@@split_qualifiers), text]
|
|
161
|
+
[@qualifiers.split(@@split_qualifiers), @text]
|
|
197
162
|
end
|
|
198
163
|
end
|
|
199
164
|
|
|
165
|
+
#
|
|
166
|
+
#
|
|
167
|
+
def to_result
|
|
168
|
+
[@original, @text]
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Internal identifier.
|
|
172
|
+
#
|
|
173
|
+
def identifier
|
|
174
|
+
"#{similar?? :similarity : :inverted}:#{@text}"
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# If the originals & the text are the same, they are the same.
|
|
178
|
+
#
|
|
179
|
+
def == other
|
|
180
|
+
self.original == other.original && self.text == other.text
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Displays the text and the qualifiers.
|
|
184
|
+
#
|
|
185
|
+
# e.g. name:meier
|
|
186
|
+
#
|
|
187
|
+
def to_s
|
|
188
|
+
"#{self.class}(#{[@text, (@qualifiers.inspect unless @qualifiers.blank?)].compact.join(', ')})"
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
end
|
|
192
|
+
|
|
200
193
|
end
|
|
201
194
|
|
|
202
195
|
end
|
data/lib/picky/query/tokens.rb
CHANGED
|
@@ -1,101 +1,105 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
module Query
|
|
1
|
+
module Picky
|
|
4
2
|
|
|
5
|
-
#
|
|
3
|
+
# encoding: utf-8
|
|
6
4
|
#
|
|
7
|
-
|
|
5
|
+
module Query
|
|
8
6
|
|
|
9
|
-
#
|
|
7
|
+
# This class primarily handles switching through similar token constellations.
|
|
10
8
|
#
|
|
11
|
-
|
|
9
|
+
class Tokens # :nodoc:all
|
|
12
10
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@tokens = tokens
|
|
17
|
-
end
|
|
11
|
+
# Basically delegates to its internal tokens array.
|
|
12
|
+
#
|
|
13
|
+
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
|
18
14
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def self.processed words, downcase = true
|
|
25
|
-
new words.collect! { |word| Token.processed word, downcase }
|
|
26
|
-
end
|
|
15
|
+
# Create a new Tokens object with the array of tokens passed in.
|
|
16
|
+
#
|
|
17
|
+
def initialize tokens = []
|
|
18
|
+
@tokens = tokens
|
|
19
|
+
end
|
|
27
20
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
21
|
+
# Creates a new Tokens object from a number of Strings.
|
|
22
|
+
#
|
|
23
|
+
# Options:
|
|
24
|
+
# * downcase: Whether to downcase the passed strings (default is true)
|
|
25
|
+
#
|
|
26
|
+
def self.processed words, downcase = true
|
|
27
|
+
new words.collect! { |word| Token.processed word, downcase }
|
|
28
|
+
end
|
|
35
29
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
#
|
|
43
|
-
def possible_combinations_in index
|
|
44
|
-
@tokens.inject([]) do |combinations, token|
|
|
45
|
-
possible_combinations = token.possible_combinations_in index
|
|
46
|
-
|
|
47
|
-
# TODO Could move the ignore_unassigned_tokens here!
|
|
48
|
-
#
|
|
49
|
-
# Note: Optimization for ignoring tokens that allocate to nothing and
|
|
50
|
-
# can be ignored.
|
|
51
|
-
# For example in a special search, where "florian" is not
|
|
52
|
-
# mapped to any category.
|
|
53
|
-
#
|
|
54
|
-
possible_combinations ? combinations << possible_combinations : combinations
|
|
30
|
+
# Tokenizes each token.
|
|
31
|
+
#
|
|
32
|
+
# Note: Passed tokenizer needs to offer #normalize(text).
|
|
33
|
+
#
|
|
34
|
+
def tokenize_with tokenizer
|
|
35
|
+
@tokens.each { |token| token.tokenize_with(tokenizer) }
|
|
55
36
|
end
|
|
56
|
-
end
|
|
57
37
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
38
|
+
# Generates an array in the form of
|
|
39
|
+
# [
|
|
40
|
+
# [combination], # of token 1
|
|
41
|
+
# [combination, combination, combination], # of token 2
|
|
42
|
+
# [combination, combination] # of token 3
|
|
43
|
+
# ]
|
|
44
|
+
#
|
|
45
|
+
def possible_combinations_in index
|
|
46
|
+
@tokens.inject([]) do |combinations, token|
|
|
47
|
+
possible_combinations = token.possible_combinations_in index
|
|
48
|
+
|
|
49
|
+
# TODO Could move the ignore_unassigned_tokens here!
|
|
50
|
+
#
|
|
51
|
+
# Note: Optimization for ignoring tokens that allocate to nothing and
|
|
52
|
+
# can be ignored.
|
|
53
|
+
# For example in a special search, where "florian" is not
|
|
54
|
+
# mapped to any category.
|
|
55
|
+
#
|
|
56
|
+
possible_combinations ? combinations << possible_combinations : combinations
|
|
57
|
+
end
|
|
58
|
+
end
|
|
63
59
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def cap? maximum
|
|
70
|
-
@tokens.size > maximum
|
|
71
|
-
end
|
|
60
|
+
# Makes the last of the tokens partial.
|
|
61
|
+
#
|
|
62
|
+
def partialize_last
|
|
63
|
+
@tokens.last.partial = true unless empty?
|
|
64
|
+
end
|
|
72
65
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
66
|
+
# Caps the tokens to the maximum.
|
|
67
|
+
#
|
|
68
|
+
def cap maximum
|
|
69
|
+
@tokens.slice!(maximum..-1) if cap?(maximum)
|
|
70
|
+
end
|
|
71
|
+
def cap? maximum
|
|
72
|
+
@tokens.size > maximum
|
|
73
|
+
end
|
|
78
74
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
75
|
+
# Rejects blank tokens.
|
|
76
|
+
#
|
|
77
|
+
def reject
|
|
78
|
+
@tokens.reject! &:blank?
|
|
79
|
+
end
|
|
84
80
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
81
|
+
#
|
|
82
|
+
#
|
|
83
|
+
def categorize mapper
|
|
84
|
+
@tokens.each { |token| token.categorize mapper }
|
|
85
|
+
end
|
|
90
86
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
87
|
+
#
|
|
88
|
+
#
|
|
89
|
+
def originals
|
|
90
|
+
@tokens.map(&:original)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def == other
|
|
94
|
+
self.tokens == other.tokens
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Just join the token original texts.
|
|
98
|
+
#
|
|
99
|
+
def to_s
|
|
100
|
+
originals.join ' '
|
|
101
|
+
end
|
|
94
102
|
|
|
95
|
-
# Just join the token original texts.
|
|
96
|
-
#
|
|
97
|
-
def to_s
|
|
98
|
-
originals.join ' '
|
|
99
103
|
end
|
|
100
104
|
|
|
101
105
|
end
|
data/lib/picky/query/weights.rb
CHANGED
|
@@ -1,62 +1,58 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
class Weights # :nodoc:all
|
|
3
|
+
module Query
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
#
|
|
5
|
+
# Calculates weights for certain combinations.
|
|
10
6
|
#
|
|
11
|
-
|
|
12
|
-
@weights = weights
|
|
13
|
-
end
|
|
7
|
+
class Weights # :nodoc:all
|
|
14
8
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@weights[clustered] || 0
|
|
19
|
-
end
|
|
9
|
+
attr_reader :weights
|
|
10
|
+
|
|
11
|
+
delegate :empty?, :to => :weights
|
|
20
12
|
|
|
21
|
-
# Returns an energy term E for allocation. this turns into a probability
|
|
22
|
-
# by P(allocation) = 1/Z * exp (-1/T * E(allocation)),
|
|
23
|
-
# where Z is the normalizing partition function
|
|
24
|
-
# sum_allocations exp(-1/T *E(allocation)), and T is a temperature constant.
|
|
25
|
-
# If T is high the distribution will be close to equally distributed.
|
|
26
|
-
# If T is low, the distribution will be the indicator function
|
|
27
|
-
# for min (E(allocation))…
|
|
28
|
-
#
|
|
29
|
-
# ...
|
|
30
|
-
#
|
|
31
|
-
# Just kidding. It's far more complicated than that. Ha ha ha ha ;)
|
|
32
|
-
#
|
|
33
|
-
# Note: Cache this if more complicated weighings become necessary.
|
|
34
|
-
#
|
|
35
|
-
def score combinations
|
|
36
|
-
# TODO Or hide: combinations#to_weights_key (but it's an array, so…)
|
|
37
13
|
#
|
|
38
|
-
# TODO combinations could cluster uniq as combinations are added (since combinations don't change).
|
|
39
14
|
#
|
|
40
|
-
|
|
15
|
+
def initialize weights = {}
|
|
16
|
+
@weights = weights
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Get the weight of an allocation.
|
|
41
20
|
#
|
|
42
|
-
weight_for
|
|
43
|
-
|
|
21
|
+
def weight_for clustered
|
|
22
|
+
@weights[clustered] || 0
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Returns an energy term E for allocation. this turns into a probability
|
|
26
|
+
# by P(allocation) = 1/Z * exp (-1/T * E(allocation)),
|
|
27
|
+
# where Z is the normalizing partition function
|
|
28
|
+
# sum_allocations exp(-1/T *E(allocation)), and T is a temperature constant.
|
|
29
|
+
# If T is high the distribution will be close to equally distributed.
|
|
30
|
+
# If T is low, the distribution will be the indicator function
|
|
31
|
+
# for min (E(allocation))…
|
|
32
|
+
#
|
|
33
|
+
# ...
|
|
34
|
+
#
|
|
35
|
+
# Just kidding. It's far more complicated than that. Ha ha ha ha ;)
|
|
36
|
+
#
|
|
37
|
+
# Note: Cache this if more complicated weighings become necessary.
|
|
38
|
+
#
|
|
39
|
+
def score combinations
|
|
40
|
+
# TODO Or it could use actual combinations? Could it? Or make combinations comparable to Symbols.
|
|
41
|
+
#
|
|
42
|
+
weight_for combinations.map(&:category_name).clustered_uniq_fast
|
|
43
|
+
end
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
@weights.empty?
|
|
49
|
-
end
|
|
45
|
+
def == other
|
|
46
|
+
@weights == other.weights
|
|
47
|
+
end
|
|
50
48
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
# Prints out a nice representation of the configured weights.
|
|
50
|
+
#
|
|
51
|
+
def to_s
|
|
52
|
+
"#{self.class}(#{@weights})"
|
|
53
|
+
end
|
|
54
54
|
|
|
55
|
-
# Prints out a nice representation of the configured weights.
|
|
56
|
-
#
|
|
57
|
-
def to_s
|
|
58
|
-
@weights.to_s
|
|
59
55
|
end
|
|
60
|
-
|
|
61
56
|
end
|
|
57
|
+
|
|
62
58
|
end
|