picky 2.7.0 → 3.0.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. data/lib/picky/adapters/rack/base.rb +20 -16
  2. data/lib/picky/adapters/rack/live_parameters.rb +28 -24
  3. data/lib/picky/adapters/rack/search.rb +67 -0
  4. data/lib/picky/adapters/rack.rb +27 -23
  5. data/lib/picky/application.rb +246 -236
  6. data/lib/picky/backend/base.rb +115 -119
  7. data/lib/picky/backend/file/basic.rb +102 -98
  8. data/lib/picky/backend/file/json.rb +27 -23
  9. data/lib/picky/backend/file/marshal.rb +32 -28
  10. data/lib/picky/backend/file/text.rb +45 -41
  11. data/lib/picky/backend/files.rb +19 -15
  12. data/lib/picky/backend/redis/basic.rb +76 -72
  13. data/lib/picky/backend/redis/list_hash.rb +40 -36
  14. data/lib/picky/backend/redis/string_hash.rb +30 -26
  15. data/lib/picky/backend/redis.rb +32 -28
  16. data/lib/picky/bundle.rb +82 -57
  17. data/lib/{bundling.rb → picky/bundling.rb} +0 -0
  18. data/lib/picky/calculations/location.rb +51 -47
  19. data/lib/picky/categories.rb +60 -56
  20. data/lib/picky/categories_indexed.rb +73 -82
  21. data/lib/picky/categories_indexing.rb +12 -8
  22. data/lib/picky/category.rb +109 -120
  23. data/lib/picky/category_indexed.rb +39 -41
  24. data/lib/picky/category_indexing.rb +123 -125
  25. data/lib/picky/character_substituters/west_european.rb +32 -26
  26. data/lib/{constants.rb → picky/constants.rb} +0 -0
  27. data/lib/picky/cores.rb +96 -92
  28. data/lib/{deployment.rb → picky/deployment.rb} +0 -0
  29. data/lib/picky/frontend_adapters/rack.rb +133 -118
  30. data/lib/picky/generators/aliases.rb +5 -3
  31. data/lib/picky/generators/base.rb +11 -7
  32. data/lib/picky/generators/partial/default.rb +7 -3
  33. data/lib/picky/generators/partial/none.rb +24 -20
  34. data/lib/picky/generators/partial/strategy.rb +20 -16
  35. data/lib/picky/generators/partial/substring.rb +94 -90
  36. data/lib/picky/generators/partial_generator.rb +11 -7
  37. data/lib/picky/generators/similarity/default.rb +9 -5
  38. data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
  39. data/lib/picky/generators/similarity/metaphone.rb +20 -16
  40. data/lib/picky/generators/similarity/none.rb +23 -19
  41. data/lib/picky/generators/similarity/phonetic.rb +49 -45
  42. data/lib/picky/generators/similarity/soundex.rb +20 -16
  43. data/lib/picky/generators/similarity/strategy.rb +10 -6
  44. data/lib/picky/generators/similarity_generator.rb +11 -7
  45. data/lib/picky/generators/strategy.rb +14 -10
  46. data/lib/picky/generators/weights/default.rb +9 -5
  47. data/lib/picky/generators/weights/logarithmic.rb +30 -26
  48. data/lib/picky/generators/weights/strategy.rb +10 -6
  49. data/lib/picky/generators/weights_generator.rb +11 -7
  50. data/lib/picky/helpers/measuring.rb +20 -16
  51. data/lib/picky/indexed/bundle/base.rb +39 -37
  52. data/lib/picky/indexed/bundle/memory.rb +68 -64
  53. data/lib/picky/indexed/bundle/redis.rb +73 -69
  54. data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
  55. data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
  56. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
  57. data/lib/picky/indexed/wrappers/category/location.rb +17 -13
  58. data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
  59. data/lib/picky/indexers/base.rb +26 -22
  60. data/lib/picky/indexers/parallel.rb +62 -58
  61. data/lib/picky/indexers/serial.rb +41 -37
  62. data/lib/picky/indexes/index.rb +400 -0
  63. data/lib/picky/indexes/index_indexed.rb +24 -0
  64. data/lib/picky/indexes/index_indexing.rb +138 -0
  65. data/lib/picky/indexes/memory.rb +20 -0
  66. data/lib/picky/indexes/redis.rb +20 -0
  67. data/lib/picky/indexes.rb +68 -61
  68. data/lib/picky/indexes_indexed.rb +16 -12
  69. data/lib/picky/indexes_indexing.rb +41 -37
  70. data/lib/picky/indexing/bundle/base.rb +216 -205
  71. data/lib/picky/indexing/bundle/memory.rb +16 -11
  72. data/lib/picky/indexing/bundle/redis.rb +14 -12
  73. data/lib/picky/indexing/wrappers/category/location.rb +17 -13
  74. data/lib/picky/interfaces/live_parameters.rb +159 -154
  75. data/lib/picky/loader.rb +267 -304
  76. data/lib/picky/loggers/search.rb +20 -13
  77. data/lib/picky/no_source_specified_exception.rb +7 -3
  78. data/lib/picky/performant.rb +6 -2
  79. data/lib/picky/query/allocation.rb +71 -67
  80. data/lib/picky/query/allocations.rb +99 -94
  81. data/lib/picky/query/combination.rb +70 -66
  82. data/lib/picky/query/combinations/base.rb +56 -52
  83. data/lib/picky/query/combinations/memory.rb +36 -32
  84. data/lib/picky/query/combinations/redis.rb +66 -62
  85. data/lib/picky/query/indexes.rb +175 -160
  86. data/lib/picky/query/qualifier_category_mapper.rb +43 -0
  87. data/lib/picky/query/token.rb +165 -172
  88. data/lib/picky/query/tokens.rb +86 -82
  89. data/lib/picky/query/weights.rb +44 -48
  90. data/lib/picky/query.rb +5 -1
  91. data/lib/picky/rack/harakiri.rb +51 -47
  92. data/lib/picky/results.rb +81 -77
  93. data/lib/picky/search.rb +169 -158
  94. data/lib/picky/sinatra.rb +34 -0
  95. data/lib/picky/sources/base.rb +73 -70
  96. data/lib/picky/sources/couch.rb +61 -57
  97. data/lib/picky/sources/csv.rb +68 -64
  98. data/lib/picky/sources/db.rb +139 -135
  99. data/lib/picky/sources/delicious.rb +52 -48
  100. data/lib/picky/sources/mongo.rb +68 -63
  101. data/lib/picky/sources/wrappers/base.rb +20 -16
  102. data/lib/picky/sources/wrappers/location.rb +37 -33
  103. data/lib/picky/statistics.rb +46 -43
  104. data/lib/picky/tasks.rb +3 -0
  105. data/lib/picky/tokenizers/base.rb +192 -187
  106. data/lib/picky/tokenizers/index.rb +25 -21
  107. data/lib/picky/tokenizers/location.rb +33 -29
  108. data/lib/picky/tokenizers/query.rb +49 -43
  109. data/lib/picky.rb +21 -13
  110. data/lib/tasks/application.rake +1 -1
  111. data/lib/tasks/index.rake +3 -3
  112. data/lib/tasks/routes.rake +1 -1
  113. data/lib/tasks/server.rake +1 -1
  114. data/spec/lib/adapters/rack/base_spec.rb +1 -1
  115. data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
  116. data/spec/lib/adapters/rack/query_spec.rb +1 -1
  117. data/spec/lib/application_spec.rb +39 -32
  118. data/spec/lib/backend/file/basic_spec.rb +2 -2
  119. data/spec/lib/backend/file/json_spec.rb +2 -2
  120. data/spec/lib/backend/file/marshal_spec.rb +2 -2
  121. data/spec/lib/backend/file/text_spec.rb +1 -1
  122. data/spec/lib/backend/files_spec.rb +14 -24
  123. data/spec/lib/backend/redis/basic_spec.rb +2 -2
  124. data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
  125. data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
  126. data/spec/lib/backend/redis_spec.rb +20 -13
  127. data/spec/lib/calculations/location_spec.rb +1 -1
  128. data/spec/lib/categories_indexed_spec.rb +16 -34
  129. data/spec/lib/category_indexed_spec.rb +9 -27
  130. data/spec/lib/category_indexing_spec.rb +2 -3
  131. data/spec/lib/category_spec.rb +10 -10
  132. data/spec/lib/character_substituters/west_european_spec.rb +6 -5
  133. data/spec/lib/cores_spec.rb +17 -17
  134. data/spec/lib/extensions/symbol_spec.rb +15 -1
  135. data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
  136. data/spec/lib/generators/aliases_spec.rb +3 -3
  137. data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
  138. data/spec/lib/generators/partial/default_spec.rb +3 -3
  139. data/spec/lib/generators/partial/none_spec.rb +2 -2
  140. data/spec/lib/generators/partial/substring_spec.rb +1 -1
  141. data/spec/lib/generators/partial_generator_spec.rb +3 -3
  142. data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
  143. data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
  144. data/spec/lib/generators/similarity/none_spec.rb +1 -1
  145. data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
  146. data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
  147. data/spec/lib/generators/similarity_generator_spec.rb +2 -2
  148. data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
  149. data/spec/lib/generators/weights_generator_spec.rb +1 -1
  150. data/spec/lib/helpers/measuring_spec.rb +2 -2
  151. data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
  152. data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
  153. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
  154. data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
  155. data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
  156. data/spec/lib/indexers/base_spec.rb +1 -1
  157. data/spec/lib/indexers/parallel_spec.rb +1 -1
  158. data/spec/lib/indexers/serial_spec.rb +1 -1
  159. data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
  160. data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
  161. data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
  162. data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
  163. data/spec/lib/indexes_class_spec.rb +2 -2
  164. data/spec/lib/indexes_indexed_spec.rb +1 -1
  165. data/spec/lib/indexes_indexing_spec.rb +1 -1
  166. data/spec/lib/indexes_spec.rb +1 -1
  167. data/spec/lib/indexing/bundle/base_spec.rb +7 -5
  168. data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
  169. data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
  170. data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
  171. data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
  172. data/spec/lib/loader_spec.rb +17 -19
  173. data/spec/lib/loggers/search_spec.rb +2 -2
  174. data/spec/lib/query/allocation_spec.rb +1 -1
  175. data/spec/lib/query/allocations_spec.rb +1 -1
  176. data/spec/lib/query/combination_spec.rb +4 -4
  177. data/spec/lib/query/combinations/base_spec.rb +1 -1
  178. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  179. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  180. data/spec/lib/query/indexes_spec.rb +7 -2
  181. data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
  182. data/spec/lib/query/token_spec.rb +32 -53
  183. data/spec/lib/query/tokens_spec.rb +30 -35
  184. data/spec/lib/query/weights_spec.rb +16 -16
  185. data/spec/lib/rack/harakiri_spec.rb +5 -5
  186. data/spec/lib/results_spec.rb +1 -1
  187. data/spec/lib/search_spec.rb +24 -22
  188. data/spec/lib/sinatra_spec.rb +36 -0
  189. data/spec/lib/sources/base_spec.rb +1 -1
  190. data/spec/lib/sources/couch_spec.rb +9 -9
  191. data/spec/lib/sources/csv_spec.rb +7 -7
  192. data/spec/lib/sources/db_spec.rb +2 -2
  193. data/spec/lib/sources/delicious_spec.rb +5 -5
  194. data/spec/lib/sources/mongo_spec.rb +7 -7
  195. data/spec/lib/sources/wrappers/base_spec.rb +2 -2
  196. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  197. data/spec/lib/statistics_spec.rb +1 -1
  198. data/spec/lib/tokenizers/base_spec.rb +2 -2
  199. data/spec/lib/tokenizers/index_spec.rb +1 -1
  200. data/spec/lib/tokenizers/query_spec.rb +1 -1
  201. metadata +30 -30
  202. data/lib/picky/adapters/rack/query.rb +0 -65
  203. data/lib/picky/index/base.rb +0 -409
  204. data/lib/picky/index/base_indexed.rb +0 -29
  205. data/lib/picky/index/base_indexing.rb +0 -127
  206. data/lib/picky/index/memory.rb +0 -16
  207. data/lib/picky/index/redis.rb +0 -16
  208. data/lib/picky/query/qualifiers.rb +0 -76
  209. data/lib/picky/query/solr.rb +0 -60
  210. data/lib/picky/signals.rb +0 -8
  211. data/lib/picky-tasks.rb +0 -6
  212. data/lib/tasks/spec.rake +0 -11
  213. data/spec/lib/query/qualifiers_spec.rb +0 -31
@@ -1,202 +1,195 @@
1
- module Query
1
+ module Picky
2
2
 
3
- # This is a query token. Together with other tokens it makes up a query.
4
- #
5
- # It remembers the original form, and and a normalized form.
6
- #
7
- # It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
8
- #
9
- class Token # :nodoc:all
3
+ module Query
10
4
 
11
- attr_reader :text, :original
12
- attr_writer :similar
13
-
14
- delegate :blank?, :to => :text
15
-
16
- # Normal initializer.
17
- #
18
- # Note: Use this if you do not want a qualified and normalized token.
19
- #
20
- # TODO text, qualifiers
5
+ # This is a query token. Together with other tokens it makes up a query.
21
6
  #
22
- def initialize text
23
- @text = text
24
- end
25
-
26
- # Returns a qualified and normalized token.
7
+ # It remembers the original form, and and a normalized form.
27
8
  #
28
- # Note: Use this in the search engine if you need a qualified
29
- # and normalized token. I.e. one prepared for a search.
9
+ # It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
30
10
  #
31
- def self.processed text, downcase = true
32
- new(text).process downcase
33
- end
34
- def process downcases = true
35
- qualify
36
- extract_original
37
- downcase if downcases
38
- partialize
39
- similarize
40
- remove_illegals
41
- symbolize
42
- self
43
- end
11
+ class Token # :nodoc:all
44
12
 
45
- # This returns an array of predefined category names if the user has given any.
46
- #
47
- def user_defined_category_names
48
- @qualifiers
49
- end
50
-
51
- # Extracts a qualifier for this token and pre-assigns an allocation.
52
- #
53
- # Note: Removes the qualifier if it is not allowed.
54
- #
55
- # TODO Extract this sind it is Search-based.
56
- #
57
- def qualify
58
- @qualifiers, @text = split @text
59
- @qualifiers && @qualifiers.collect! { |qualifier| Query::Qualifiers.instance.normalize qualifier }.compact!
60
- @qualifiers
61
- end
62
- def extract_original
63
- @original = @text.dup
64
- end
13
+ attr_reader :text, :original, :qualifiers, :user_defined_categories
14
+ attr_writer :similar
65
15
 
66
- # Downcases the text.
67
- #
68
- def downcase
69
- @text.downcase!
70
- end
16
+ delegate :blank?, :to => :text
71
17
 
72
- # Partial is a conditional setter.
73
- #
74
- # It is only settable if it hasn't been set yet.
75
- #
76
- def partial= partial
77
- @partial = partial if @partial.nil?
78
- end
79
- def partial?
80
- !@similar && @partial
81
- end
18
+ # Normal initializer.
19
+ #
20
+ # Note: Use this if you do not want a normalized token.
21
+ #
22
+ def initialize text
23
+ @text = text
24
+ end
82
25
 
83
- # If the text ends with *, partialize it. If with ", don't.
84
- #
85
- # The latter wins. So "hello*" will not be partially searched.
86
- #
87
- @@no_partial = /\"\Z/
88
- @@partial = /\*\Z/
89
- def partialize
90
- self.partial = false and return unless @text !~ @@no_partial
91
- self.partial = true unless @text !~ @@partial
92
- end
26
+ # Returns a qualified and normalized token.
27
+ #
28
+ # Note: Use this in the search engine if you need a qualified
29
+ # and normalized token. I.e. one prepared for a search.
30
+ #
31
+ def self.processed text, downcase = true
32
+ new(text).process downcase
33
+ end
34
+ def process downcased = true
35
+ qualify
36
+ extract_original
37
+ downcase if downcased
38
+ partialize
39
+ similarize
40
+ remove_illegals
41
+ symbolize
42
+ self
43
+ end
93
44
 
94
- # If the text ends with ~ similarize it. If with ", don't.
95
- #
96
- # The latter wins.
97
- #
98
- @@no_similar = /\"\Z/
99
- @@similar = /\~\Z/
100
- def similarize
101
- self.similar = false and return if @text =~ @@no_similar
102
- self.similar = true if @text =~ @@similar
103
- end
45
+ # Translates this token's qualifiers into actual categories.
46
+ #
47
+ # Note: If this is not done, there is no mapping.
48
+ #
49
+ def categorize mapper
50
+ @user_defined_categories = @qualifiers && @qualifiers.map do |qualifier|
51
+ mapper.map qualifier
52
+ end.compact
53
+ end
104
54
 
105
- def similar?
106
- @similar
107
- end
55
+ # Dups the original text.
56
+ #
57
+ def extract_original
58
+ @original = @text.dup
59
+ end
108
60
 
109
- # Normalizes this token's text.
110
- #
111
- @@illegals = /["*~]/
112
- def remove_illegals
113
- @text.gsub! @@illegals, '' unless @text.blank?
114
- end
61
+ # Downcases the text.
62
+ #
63
+ def downcase
64
+ @text.downcase!
65
+ end
115
66
 
116
- #
117
- #
118
- def symbolize
119
- @text = @text.to_sym
120
- end
67
+ # Partial is a conditional setter.
68
+ #
69
+ # It is only settable if it hasn't been set yet.
70
+ #
71
+ def partial= partial
72
+ @partial = partial if @partial.nil?
73
+ end
74
+ def partial?
75
+ !@similar && @partial
76
+ end
121
77
 
122
- # Returns an array of possible combinations.
123
- #
124
- def possible_combinations_in index
125
- index.possible_combinations self
126
- end
78
+ # If the text ends with *, partialize it. If with ", don't.
79
+ #
80
+ # The latter wins. So "hello*" will not be partially searched.
81
+ #
82
+ @@no_partial = /\"\Z/
83
+ @@partial = /\*\Z/
84
+ def partialize
85
+ self.partial = false and return unless @text !~ @@no_partial
86
+ self.partial = true unless @text !~ @@partial
87
+ end
127
88
 
128
- # Returns a token with the next similar text.
129
- #
130
- # TODO Rewrite this. It is hard to understand. Also spec performance.
131
- #
132
- def next_similar_token category
133
- token = self.dup
134
- token if token.next_similar category.bundle_for(token)
135
- end
136
- # Sets and returns the next similar word.
137
- #
138
- # Note: Also overrides the original.
139
- #
140
- def next_similar bundle
141
- @text = @original = (similarity(bundle).shift || return) if similar?
142
- end
143
- # Lazy similar reader.
144
- #
145
- def similarity bundle = nil
146
- @similarity || @similarity = generate_similarity_for(bundle)
147
- end
148
- # Returns an enumerator that traverses over the similar.
149
- #
150
- # Note: The dup isn't too nice – since it is needed on account of the shift, above.
151
- # (We avoid a StopIteration exception. Which of both is less evil?)
152
- #
153
- def generate_similarity_for bundle
154
- bundle.similar(@text).dup || []
155
- end
89
+ # If the text ends with ~ similarize it. If with ", don't.
90
+ #
91
+ # The latter wins.
92
+ #
93
+ @@no_similar = /\"\Z/
94
+ @@similar = /\~\Z/
95
+ def similarize
96
+ self.similar = false and return if @text =~ @@no_similar
97
+ self.similar = true if @text =~ @@similar
98
+ end
156
99
 
157
- #
158
- #
159
- def to_result
160
- [@original, @text]
161
- end
100
+ def similar?
101
+ @similar
102
+ end
162
103
 
163
- # Internal identifier.
164
- #
165
- def identifier
166
- "#{similar?? :similarity : :inverted}:#{@text}"
167
- end
104
+ # Normalizes this token's text.
105
+ #
106
+ @@illegals = /["*~]/
107
+ def remove_illegals
108
+ @text.gsub! @@illegals, '' unless @text.blank?
109
+ end
168
110
 
169
- # If the originals & the text are the same, they are the same.
170
- #
171
- def == other
172
- self.original == other.original && self.text == other.text
173
- end
111
+ #
112
+ #
113
+ def symbolize
114
+ @text = @text.to_sym
115
+ end
174
116
 
175
- # Displays the qualifier text and the text, joined.
176
- #
177
- # e.g. name:meier
178
- #
179
- @@split_qualifier_text = ':'
180
- @@split_qualifiers = ','
181
- def to_s
182
- [@qualifiers && @qualifiers.join(@@split_qualifiers), @text].compact.join @@split_qualifier_text
183
- end
117
+ # Returns an array of possible combinations.
118
+ #
119
+ def possible_combinations_in index
120
+ index.possible_combinations self
121
+ end
184
122
 
185
- private
123
+ # Returns a token with the next similar text.
124
+ #
125
+ # THINK Rewrite this. It is hard to understand. Also spec performance.
126
+ #
127
+ def next_similar_token category
128
+ token = self.dup
129
+ token if token.next_similar category.bundle_for(token)
130
+ end
131
+ # Sets and returns the next similar word.
132
+ #
133
+ # Note: Also overrides the original.
134
+ #
135
+ def next_similar bundle
136
+ @text = @original = (similarity(bundle).shift || return) if similar?
137
+ end
138
+ # Lazy similar reader.
139
+ #
140
+ def similarity bundle = nil
141
+ @similarity || @similarity = generate_similarity_for(bundle)
142
+ end
143
+ # Returns an enumerator that traverses over the similar.
144
+ #
145
+ # Note: The dup isn't too nice – since it is needed on account of the shift, above.
146
+ # (We avoid a StopIteration exception. Which of both is less evil?)
147
+ #
148
+ def generate_similarity_for bundle
149
+ bundle.similar(@text).dup || []
150
+ end
186
151
 
187
152
  # Splits text into a qualifier and text.
188
153
  #
189
- # Returns [qualifier, text].
190
- #
191
- def split unqualified_text
192
- qualifiers, text = (unqualified_text || '').split(@@split_qualifier_text, 2)
193
- if text.blank?
194
- [nil, (qualifiers || '')]
154
+ @@split_qualifier_text = ':'
155
+ @@split_qualifiers = ','
156
+ def qualify
157
+ @qualifiers, @text = (@text || '').split(@@split_qualifier_text, 2)
158
+ @qualifiers, @text = if @text.blank?
159
+ [nil, (@qualifiers || '')]
195
160
  else
196
- [qualifiers.split(@@split_qualifiers), text]
161
+ [@qualifiers.split(@@split_qualifiers), @text]
197
162
  end
198
163
  end
199
164
 
165
+ #
166
+ #
167
+ def to_result
168
+ [@original, @text]
169
+ end
170
+
171
+ # Internal identifier.
172
+ #
173
+ def identifier
174
+ "#{similar?? :similarity : :inverted}:#{@text}"
175
+ end
176
+
177
+ # If the originals & the text are the same, they are the same.
178
+ #
179
+ def == other
180
+ self.original == other.original && self.text == other.text
181
+ end
182
+
183
+ # Displays the text and the qualifiers.
184
+ #
185
+ # e.g. name:meier
186
+ #
187
+ def to_s
188
+ "#{self.class}(#{[@text, (@qualifiers.inspect unless @qualifiers.blank?)].compact.join(', ')})"
189
+ end
190
+
191
+ end
192
+
200
193
  end
201
194
 
202
195
  end
@@ -1,101 +1,105 @@
1
- # encoding: utf-8
2
- #
3
- module Query
1
+ module Picky
4
2
 
5
- # This class primarily handles switching through similar token constellations.
3
+ # encoding: utf-8
6
4
  #
7
- class Tokens # :nodoc:all
5
+ module Query
8
6
 
9
- # Basically delegates to its internal tokens array.
7
+ # This class primarily handles switching through similar token constellations.
10
8
  #
11
- self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
9
+ class Tokens # :nodoc:all
12
10
 
13
- # Create a new Tokens object with the array of tokens passed in.
14
- #
15
- def initialize tokens = []
16
- @tokens = tokens
17
- end
11
+ # Basically delegates to its internal tokens array.
12
+ #
13
+ self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
18
14
 
19
- # Creates a new Tokens object from a number of Strings.
20
- #
21
- # Options:
22
- # * downcase: Whether to downcase the passed strings (default is true)
23
- #
24
- def self.processed words, downcase = true
25
- new words.collect! { |word| Token.processed word, downcase }
26
- end
15
+ # Create a new Tokens object with the array of tokens passed in.
16
+ #
17
+ def initialize tokens = []
18
+ @tokens = tokens
19
+ end
27
20
 
28
- # Tokenizes each token.
29
- #
30
- # Note: Passed tokenizer needs to offer #normalize(text).
31
- #
32
- def tokenize_with tokenizer
33
- @tokens.each { |token| token.tokenize_with(tokenizer) }
34
- end
21
+ # Creates a new Tokens object from a number of Strings.
22
+ #
23
+ # Options:
24
+ # * downcase: Whether to downcase the passed strings (default is true)
25
+ #
26
+ def self.processed words, downcase = true
27
+ new words.collect! { |word| Token.processed word, downcase }
28
+ end
35
29
 
36
- # Generates an array in the form of
37
- # [
38
- # [combination], # of token 1
39
- # [combination, combination, combination], # of token 2
40
- # [combination, combination] # of token 3
41
- # ]
42
- #
43
- def possible_combinations_in index
44
- @tokens.inject([]) do |combinations, token|
45
- possible_combinations = token.possible_combinations_in index
46
-
47
- # TODO Could move the ignore_unassigned_tokens here!
48
- #
49
- # Note: Optimization for ignoring tokens that allocate to nothing and
50
- # can be ignored.
51
- # For example in a special search, where "florian" is not
52
- # mapped to any category.
53
- #
54
- possible_combinations ? combinations << possible_combinations : combinations
30
+ # Tokenizes each token.
31
+ #
32
+ # Note: Passed tokenizer needs to offer #normalize(text).
33
+ #
34
+ def tokenize_with tokenizer
35
+ @tokens.each { |token| token.tokenize_with(tokenizer) }
55
36
  end
56
- end
57
37
 
58
- # Makes the last of the tokens partial.
59
- #
60
- def partialize_last
61
- @tokens.last.partial = true unless empty?
62
- end
38
+ # Generates an array in the form of
39
+ # [
40
+ # [combination], # of token 1
41
+ # [combination, combination, combination], # of token 2
42
+ # [combination, combination] # of token 3
43
+ # ]
44
+ #
45
+ def possible_combinations_in index
46
+ @tokens.inject([]) do |combinations, token|
47
+ possible_combinations = token.possible_combinations_in index
48
+
49
+ # TODO Could move the ignore_unassigned_tokens here!
50
+ #
51
+ # Note: Optimization for ignoring tokens that allocate to nothing and
52
+ # can be ignored.
53
+ # For example in a special search, where "florian" is not
54
+ # mapped to any category.
55
+ #
56
+ possible_combinations ? combinations << possible_combinations : combinations
57
+ end
58
+ end
63
59
 
64
- # Caps the tokens to the maximum.
65
- #
66
- def cap maximum
67
- @tokens.slice!(maximum..-1) if cap?(maximum)
68
- end
69
- def cap? maximum
70
- @tokens.size > maximum
71
- end
60
+ # Makes the last of the tokens partial.
61
+ #
62
+ def partialize_last
63
+ @tokens.last.partial = true unless empty?
64
+ end
72
65
 
73
- # Rejects blank tokens.
74
- #
75
- def reject
76
- @tokens.reject! &:blank?
77
- end
66
+ # Caps the tokens to the maximum.
67
+ #
68
+ def cap maximum
69
+ @tokens.slice!(maximum..-1) if cap?(maximum)
70
+ end
71
+ def cap? maximum
72
+ @tokens.size > maximum
73
+ end
78
74
 
79
- # Returns a solr query.
80
- #
81
- def to_solr_query
82
- @tokens.map(&:to_solr).join ' '
83
- end
75
+ # Rejects blank tokens.
76
+ #
77
+ def reject
78
+ @tokens.reject! &:blank?
79
+ end
84
80
 
85
- #
86
- #
87
- def originals
88
- @tokens.map(&:original)
89
- end
81
+ #
82
+ #
83
+ def categorize mapper
84
+ @tokens.each { |token| token.categorize mapper }
85
+ end
90
86
 
91
- def == other
92
- self.tokens == other.tokens
93
- end
87
+ #
88
+ #
89
+ def originals
90
+ @tokens.map(&:original)
91
+ end
92
+
93
+ def == other
94
+ self.tokens == other.tokens
95
+ end
96
+
97
+ # Just join the token original texts.
98
+ #
99
+ def to_s
100
+ originals.join ' '
101
+ end
94
102
 
95
- # Just join the token original texts.
96
- #
97
- def to_s
98
- originals.join ' '
99
103
  end
100
104
 
101
105
  end
@@ -1,62 +1,58 @@
1
- module Query
1
+ module Picky
2
2
 
3
- # Calculates weights for certain combinations.
4
- #
5
- class Weights # :nodoc:all
3
+ module Query
6
4
 
7
- attr_reader :weights
8
-
9
- #
5
+ # Calculates weights for certain combinations.
10
6
  #
11
- def initialize weights = {}
12
- @weights = weights
13
- end
7
+ class Weights # :nodoc:all
14
8
 
15
- # Get the weight of an allocation.
16
- #
17
- def weight_for clustered
18
- @weights[clustered] || 0
19
- end
9
+ attr_reader :weights
10
+
11
+ delegate :empty?, :to => :weights
20
12
 
21
- # Returns an energy term E for allocation. this turns into a probability
22
- # by P(allocation) = 1/Z * exp (-1/T * E(allocation)),
23
- # where Z is the normalizing partition function
24
- # sum_allocations exp(-1/T *E(allocation)), and T is a temperature constant.
25
- # If T is high the distribution will be close to equally distributed.
26
- # If T is low, the distribution will be the indicator function
27
- # for min (E(allocation))…
28
- #
29
- # ...
30
- #
31
- # Just kidding. It's far more complicated than that. Ha ha ha ha ;)
32
- #
33
- # Note: Cache this if more complicated weighings become necessary.
34
- #
35
- def score combinations
36
- # TODO Or hide: combinations#to_weights_key (but it's an array, so…)
37
13
  #
38
- # TODO combinations could cluster uniq as combinations are added (since combinations don't change).
39
14
  #
40
- # TODO Or it could use actual combinations? Could it? Or make combinations comparable to Symbols.
15
+ def initialize weights = {}
16
+ @weights = weights
17
+ end
18
+
19
+ # Get the weight of an allocation.
41
20
  #
42
- weight_for combinations.map(&:category_name).clustered_uniq_fast
43
- end
21
+ def weight_for clustered
22
+ @weights[clustered] || 0
23
+ end
24
+
25
+ # Returns an energy term E for allocation. this turns into a probability
26
+ # by P(allocation) = 1/Z * exp (-1/T * E(allocation)),
27
+ # where Z is the normalizing partition function
28
+ # sum_allocations exp(-1/T *E(allocation)), and T is a temperature constant.
29
+ # If T is high the distribution will be close to equally distributed.
30
+ # If T is low, the distribution will be the indicator function
31
+ # for min (E(allocation))…
32
+ #
33
+ # ...
34
+ #
35
+ # Just kidding. It's far more complicated than that. Ha ha ha ha ;)
36
+ #
37
+ # Note: Cache this if more complicated weighings become necessary.
38
+ #
39
+ def score combinations
40
+ # TODO Or it could use actual combinations? Could it? Or make combinations comparable to Symbols.
41
+ #
42
+ weight_for combinations.map(&:category_name).clustered_uniq_fast
43
+ end
44
44
 
45
- # Are there any weights defined?
46
- #
47
- def empty?
48
- @weights.empty?
49
- end
45
+ def == other
46
+ @weights == other.weights
47
+ end
50
48
 
51
- def == other
52
- @weights == other.weights
53
- end
49
+ # Prints out a nice representation of the configured weights.
50
+ #
51
+ def to_s
52
+ "#{self.class}(#{@weights})"
53
+ end
54
54
 
55
- # Prints out a nice representation of the configured weights.
56
- #
57
- def to_s
58
- @weights.to_s
59
55
  end
60
-
61
56
  end
57
+
62
58
  end
data/lib/picky/query.rb CHANGED
@@ -1,2 +1,6 @@
1
- module Query # :nodoc:all
1
+ module Picky
2
+
3
+ module Query # :nodoc:all
4
+ end
5
+
2
6
  end