picky 2.7.0 → 3.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (213) hide show
  1. data/lib/picky/adapters/rack/base.rb +20 -16
  2. data/lib/picky/adapters/rack/live_parameters.rb +28 -24
  3. data/lib/picky/adapters/rack/search.rb +67 -0
  4. data/lib/picky/adapters/rack.rb +27 -23
  5. data/lib/picky/application.rb +246 -236
  6. data/lib/picky/backend/base.rb +115 -119
  7. data/lib/picky/backend/file/basic.rb +102 -98
  8. data/lib/picky/backend/file/json.rb +27 -23
  9. data/lib/picky/backend/file/marshal.rb +32 -28
  10. data/lib/picky/backend/file/text.rb +45 -41
  11. data/lib/picky/backend/files.rb +19 -15
  12. data/lib/picky/backend/redis/basic.rb +76 -72
  13. data/lib/picky/backend/redis/list_hash.rb +40 -36
  14. data/lib/picky/backend/redis/string_hash.rb +30 -26
  15. data/lib/picky/backend/redis.rb +32 -28
  16. data/lib/picky/bundle.rb +82 -57
  17. data/lib/{bundling.rb → picky/bundling.rb} +0 -0
  18. data/lib/picky/calculations/location.rb +51 -47
  19. data/lib/picky/categories.rb +60 -56
  20. data/lib/picky/categories_indexed.rb +73 -82
  21. data/lib/picky/categories_indexing.rb +12 -8
  22. data/lib/picky/category.rb +109 -120
  23. data/lib/picky/category_indexed.rb +39 -41
  24. data/lib/picky/category_indexing.rb +123 -125
  25. data/lib/picky/character_substituters/west_european.rb +32 -26
  26. data/lib/{constants.rb → picky/constants.rb} +0 -0
  27. data/lib/picky/cores.rb +96 -92
  28. data/lib/{deployment.rb → picky/deployment.rb} +0 -0
  29. data/lib/picky/frontend_adapters/rack.rb +133 -118
  30. data/lib/picky/generators/aliases.rb +5 -3
  31. data/lib/picky/generators/base.rb +11 -7
  32. data/lib/picky/generators/partial/default.rb +7 -3
  33. data/lib/picky/generators/partial/none.rb +24 -20
  34. data/lib/picky/generators/partial/strategy.rb +20 -16
  35. data/lib/picky/generators/partial/substring.rb +94 -90
  36. data/lib/picky/generators/partial_generator.rb +11 -7
  37. data/lib/picky/generators/similarity/default.rb +9 -5
  38. data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
  39. data/lib/picky/generators/similarity/metaphone.rb +20 -16
  40. data/lib/picky/generators/similarity/none.rb +23 -19
  41. data/lib/picky/generators/similarity/phonetic.rb +49 -45
  42. data/lib/picky/generators/similarity/soundex.rb +20 -16
  43. data/lib/picky/generators/similarity/strategy.rb +10 -6
  44. data/lib/picky/generators/similarity_generator.rb +11 -7
  45. data/lib/picky/generators/strategy.rb +14 -10
  46. data/lib/picky/generators/weights/default.rb +9 -5
  47. data/lib/picky/generators/weights/logarithmic.rb +30 -26
  48. data/lib/picky/generators/weights/strategy.rb +10 -6
  49. data/lib/picky/generators/weights_generator.rb +11 -7
  50. data/lib/picky/helpers/measuring.rb +20 -16
  51. data/lib/picky/indexed/bundle/base.rb +39 -37
  52. data/lib/picky/indexed/bundle/memory.rb +68 -64
  53. data/lib/picky/indexed/bundle/redis.rb +73 -69
  54. data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
  55. data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
  56. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
  57. data/lib/picky/indexed/wrappers/category/location.rb +17 -13
  58. data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
  59. data/lib/picky/indexers/base.rb +26 -22
  60. data/lib/picky/indexers/parallel.rb +62 -58
  61. data/lib/picky/indexers/serial.rb +41 -37
  62. data/lib/picky/indexes/index.rb +400 -0
  63. data/lib/picky/indexes/index_indexed.rb +24 -0
  64. data/lib/picky/indexes/index_indexing.rb +138 -0
  65. data/lib/picky/indexes/memory.rb +20 -0
  66. data/lib/picky/indexes/redis.rb +20 -0
  67. data/lib/picky/indexes.rb +68 -61
  68. data/lib/picky/indexes_indexed.rb +16 -12
  69. data/lib/picky/indexes_indexing.rb +41 -37
  70. data/lib/picky/indexing/bundle/base.rb +216 -205
  71. data/lib/picky/indexing/bundle/memory.rb +16 -11
  72. data/lib/picky/indexing/bundle/redis.rb +14 -12
  73. data/lib/picky/indexing/wrappers/category/location.rb +17 -13
  74. data/lib/picky/interfaces/live_parameters.rb +159 -154
  75. data/lib/picky/loader.rb +267 -304
  76. data/lib/picky/loggers/search.rb +20 -13
  77. data/lib/picky/no_source_specified_exception.rb +7 -3
  78. data/lib/picky/performant.rb +6 -2
  79. data/lib/picky/query/allocation.rb +71 -67
  80. data/lib/picky/query/allocations.rb +99 -94
  81. data/lib/picky/query/combination.rb +70 -66
  82. data/lib/picky/query/combinations/base.rb +56 -52
  83. data/lib/picky/query/combinations/memory.rb +36 -32
  84. data/lib/picky/query/combinations/redis.rb +66 -62
  85. data/lib/picky/query/indexes.rb +175 -160
  86. data/lib/picky/query/qualifier_category_mapper.rb +43 -0
  87. data/lib/picky/query/token.rb +165 -172
  88. data/lib/picky/query/tokens.rb +86 -82
  89. data/lib/picky/query/weights.rb +44 -48
  90. data/lib/picky/query.rb +5 -1
  91. data/lib/picky/rack/harakiri.rb +51 -47
  92. data/lib/picky/results.rb +81 -77
  93. data/lib/picky/search.rb +169 -158
  94. data/lib/picky/sinatra.rb +34 -0
  95. data/lib/picky/sources/base.rb +73 -70
  96. data/lib/picky/sources/couch.rb +61 -57
  97. data/lib/picky/sources/csv.rb +68 -64
  98. data/lib/picky/sources/db.rb +139 -135
  99. data/lib/picky/sources/delicious.rb +52 -48
  100. data/lib/picky/sources/mongo.rb +68 -63
  101. data/lib/picky/sources/wrappers/base.rb +20 -16
  102. data/lib/picky/sources/wrappers/location.rb +37 -33
  103. data/lib/picky/statistics.rb +46 -43
  104. data/lib/picky/tasks.rb +3 -0
  105. data/lib/picky/tokenizers/base.rb +192 -187
  106. data/lib/picky/tokenizers/index.rb +25 -21
  107. data/lib/picky/tokenizers/location.rb +33 -29
  108. data/lib/picky/tokenizers/query.rb +49 -43
  109. data/lib/picky.rb +21 -13
  110. data/lib/tasks/application.rake +1 -1
  111. data/lib/tasks/index.rake +3 -3
  112. data/lib/tasks/routes.rake +1 -1
  113. data/lib/tasks/server.rake +1 -1
  114. data/spec/lib/adapters/rack/base_spec.rb +1 -1
  115. data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
  116. data/spec/lib/adapters/rack/query_spec.rb +1 -1
  117. data/spec/lib/application_spec.rb +39 -32
  118. data/spec/lib/backend/file/basic_spec.rb +2 -2
  119. data/spec/lib/backend/file/json_spec.rb +2 -2
  120. data/spec/lib/backend/file/marshal_spec.rb +2 -2
  121. data/spec/lib/backend/file/text_spec.rb +1 -1
  122. data/spec/lib/backend/files_spec.rb +14 -24
  123. data/spec/lib/backend/redis/basic_spec.rb +2 -2
  124. data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
  125. data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
  126. data/spec/lib/backend/redis_spec.rb +20 -13
  127. data/spec/lib/calculations/location_spec.rb +1 -1
  128. data/spec/lib/categories_indexed_spec.rb +16 -34
  129. data/spec/lib/category_indexed_spec.rb +9 -27
  130. data/spec/lib/category_indexing_spec.rb +2 -3
  131. data/spec/lib/category_spec.rb +10 -10
  132. data/spec/lib/character_substituters/west_european_spec.rb +6 -5
  133. data/spec/lib/cores_spec.rb +17 -17
  134. data/spec/lib/extensions/symbol_spec.rb +15 -1
  135. data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
  136. data/spec/lib/generators/aliases_spec.rb +3 -3
  137. data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
  138. data/spec/lib/generators/partial/default_spec.rb +3 -3
  139. data/spec/lib/generators/partial/none_spec.rb +2 -2
  140. data/spec/lib/generators/partial/substring_spec.rb +1 -1
  141. data/spec/lib/generators/partial_generator_spec.rb +3 -3
  142. data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
  143. data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
  144. data/spec/lib/generators/similarity/none_spec.rb +1 -1
  145. data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
  146. data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
  147. data/spec/lib/generators/similarity_generator_spec.rb +2 -2
  148. data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
  149. data/spec/lib/generators/weights_generator_spec.rb +1 -1
  150. data/spec/lib/helpers/measuring_spec.rb +2 -2
  151. data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
  152. data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
  153. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
  154. data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
  155. data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
  156. data/spec/lib/indexers/base_spec.rb +1 -1
  157. data/spec/lib/indexers/parallel_spec.rb +1 -1
  158. data/spec/lib/indexers/serial_spec.rb +1 -1
  159. data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
  160. data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
  161. data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
  162. data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
  163. data/spec/lib/indexes_class_spec.rb +2 -2
  164. data/spec/lib/indexes_indexed_spec.rb +1 -1
  165. data/spec/lib/indexes_indexing_spec.rb +1 -1
  166. data/spec/lib/indexes_spec.rb +1 -1
  167. data/spec/lib/indexing/bundle/base_spec.rb +7 -5
  168. data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
  169. data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
  170. data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
  171. data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
  172. data/spec/lib/loader_spec.rb +17 -19
  173. data/spec/lib/loggers/search_spec.rb +2 -2
  174. data/spec/lib/query/allocation_spec.rb +1 -1
  175. data/spec/lib/query/allocations_spec.rb +1 -1
  176. data/spec/lib/query/combination_spec.rb +4 -4
  177. data/spec/lib/query/combinations/base_spec.rb +1 -1
  178. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  179. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  180. data/spec/lib/query/indexes_spec.rb +7 -2
  181. data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
  182. data/spec/lib/query/token_spec.rb +32 -53
  183. data/spec/lib/query/tokens_spec.rb +30 -35
  184. data/spec/lib/query/weights_spec.rb +16 -16
  185. data/spec/lib/rack/harakiri_spec.rb +5 -5
  186. data/spec/lib/results_spec.rb +1 -1
  187. data/spec/lib/search_spec.rb +24 -22
  188. data/spec/lib/sinatra_spec.rb +36 -0
  189. data/spec/lib/sources/base_spec.rb +1 -1
  190. data/spec/lib/sources/couch_spec.rb +9 -9
  191. data/spec/lib/sources/csv_spec.rb +7 -7
  192. data/spec/lib/sources/db_spec.rb +2 -2
  193. data/spec/lib/sources/delicious_spec.rb +5 -5
  194. data/spec/lib/sources/mongo_spec.rb +7 -7
  195. data/spec/lib/sources/wrappers/base_spec.rb +2 -2
  196. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  197. data/spec/lib/statistics_spec.rb +1 -1
  198. data/spec/lib/tokenizers/base_spec.rb +2 -2
  199. data/spec/lib/tokenizers/index_spec.rb +1 -1
  200. data/spec/lib/tokenizers/query_spec.rb +1 -1
  201. metadata +30 -30
  202. data/lib/picky/adapters/rack/query.rb +0 -65
  203. data/lib/picky/index/base.rb +0 -409
  204. data/lib/picky/index/base_indexed.rb +0 -29
  205. data/lib/picky/index/base_indexing.rb +0 -127
  206. data/lib/picky/index/memory.rb +0 -16
  207. data/lib/picky/index/redis.rb +0 -16
  208. data/lib/picky/query/qualifiers.rb +0 -76
  209. data/lib/picky/query/solr.rb +0 -60
  210. data/lib/picky/signals.rb +0 -8
  211. data/lib/picky-tasks.rb +0 -6
  212. data/lib/tasks/spec.rake +0 -11
  213. data/spec/lib/query/qualifiers_spec.rb +0 -31
@@ -1,409 +0,0 @@
1
- # = Picky Indexes
2
- #
3
- # A Picky Index defines
4
- # * where its data comes from (a data source).
5
- # * how this data it is indexed.
6
- # * a number of categories that may or may not map directly to data categories.
7
- #
8
- # == Howto
9
- #
10
- # This is a step-by-step description on how to create an index.
11
- #
12
- # Start by choosing an <tt>Index::Memory</tt> or an <tt>Index::Redis</tt>.
13
- # In the example, we will be using an in-memory index, <tt>Index::Memory</tt>.
14
- #
15
- # books = Index::Memory.new(:books)
16
- #
17
- # That in itself won't do much good, that's why we add a data source:
18
- #
19
- # books = Index::Memory.new(:books) do
20
- # source Sources::CSV.new(:title, :author, file: 'data/books.csv')
21
- # end
22
- #
23
- # In the example, we use an explicit <tt>Sources::CSV</tt> of Picky.
24
- # However, anything that responds to <tt>#each</tt>, and returns an object that
25
- # answers to <tt>#id</tt>, works.
26
- #
27
- # For example, a 3.0 ActiveRecord class:
28
- #
29
- # books = Index::Memory.new(:books) do
30
- # source Book.order('isbn ASC')
31
- # end
32
- #
33
- # Now we know where the data comes from, but not, how to categorize it.
34
- #
35
- # Let's add a few categories:
36
- #
37
- # books = Index::Memory.new(:books) do
38
- # source Book.order('isbn ASC')
39
- # category :title
40
- # category :author
41
- # category :isbn
42
- # end
43
- #
44
- # Categories offer quite a few options, see <tt>Index::Base#category</tt> for details.
45
- #
46
- # After adding more options, it might look like this:
47
- #
48
- # books = Index::Memory.new(:books) do
49
- # source Book.order('isbn ASC')
50
- # category :title,
51
- # partial: Partial::Substring.new(from: 1),
52
- # similarity: Similarity::DoubleMetaphone.new(3),
53
- # qualifiers: [:t, :title, :titulo]
54
- # category :author,
55
- # similarity: Similarity::Metaphone.new(2)
56
- # category :isbn,
57
- # partial: Partial::None.new,
58
- # from: :legacy_isbn_name
59
- # end
60
- #
61
- # For this to work, a <tt>Book</tt> should support methods <tt>#title</tt>, <tt>#author</tt> and <tt>#legacy_isbn_name</tt>.
62
- #
63
- # If it uses <tt>String</tt> ids, use <tt>#key_format</tt> to define a formatting method:
64
- #
65
- # books = Index::Memory.new(:books) do
66
- # key_format :to_s
67
- # source Book.order('isbn ASC')
68
- # category :title
69
- # category :author
70
- # category :isbn
71
- # end
72
- #
73
- # Finally, use the index for a <tt>Search</tt>:
74
- #
75
- # route %r{^/media$} => Search.new(books, dvds, mp3s)
76
- #
77
- module Index
78
-
79
- # This class defines the indexing and index API that is exposed to the user
80
- # as the #index method inside the Application class.
81
- #
82
- # It provides a single front for both indexing and index options. We suggest to always use the index API.
83
- #
84
- # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Index*.
85
- #
86
- class Base
87
-
88
- attr_reader :name,
89
- :categories
90
-
91
- delegate :[],
92
- :each_category,
93
- :to => :categories
94
-
95
- # Create a new index with a given source.
96
- #
97
- # === Parameters
98
- # * name: A name that will be used for the index directory and in the Picky front end.
99
- #
100
- # === Options
101
- # * source: Where the data comes from, e.g. Sources::CSV.new(...). Optional, can be defined in the block using #source.
102
- # * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
103
- # * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
104
- # * tokenizer: The tokenizer to use for this index. Optional, can be defined in the block using #indexing.
105
- # * key_format: The format the ids of this index are in. Optional, can be defined in the block using #key_format.
106
- #
107
- # Examples:
108
- # my_index = Index::Memory.new(:my_index, source: some_source) do
109
- # category :bla
110
- # end
111
- #
112
- # my_index = Index::Memory.new(:my_index) do
113
- # source Sources::CSV.new(file: 'data/index.csv')
114
- # category :bla
115
- # end
116
- #
117
- #
118
- def initialize name, options = {}
119
- check_name name
120
- @name = name.to_sym
121
-
122
- check_options options
123
-
124
- @source = options[:source]
125
-
126
- @after_indexing = options[:after_indexing]
127
- @indexing_bundle_class = options[:indexing_bundle_class] # TODO This should probably be a fixed parameter.
128
- @tokenizer = options[:tokenizer]
129
- @key_format = options[:key_format]
130
-
131
- # Indexed.
132
- #
133
- @result_identifier = options[:result_identifier] || name
134
- @indexed_bundle_class = options[:indexed_bundle_class] # TODO This should probably be a fixed parameter.
135
-
136
- # TODO Move ignore_unassigned_tokens to query, somehow.
137
- #
138
- @categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
139
-
140
- # Centralized registry.
141
- #
142
- Indexes.register self
143
-
144
- #
145
- #
146
- instance_eval(&Proc.new) if block_given?
147
-
148
- # Check if any source has been given in the block or the options.
149
- #
150
- check_source @source
151
- end
152
-
153
- # Default bundles.
154
- #
155
- def indexing_bundle_class
156
- Indexing::Bundle::Memory
157
- end
158
- def indexed_bundle_class
159
- Indexed::Bundle::Memory
160
- end
161
-
162
- # Defines a searchable category on the index.
163
- #
164
- # === Parameters
165
- # * category_name: This identifier is used in the front end, but also to categorize query text. For example, “title:hobbit” will narrow the hobbit query on categories with the identifier :title.
166
- #
167
- # === Options
168
- # * partial: Partial::None.new or Partial::Substring.new(from: starting_char, to: ending_char). Default is Partial::Substring.new(from: -3, to: -1).
169
- # * similarity: Similarity::None.new or Similarity::DoubleMetaphone.new(similar_words_searched). Default is Similarity::None.new.
170
- # * qualifiers: An array of qualifiers with which you can define which category you’d like to search, for example “title:hobbit” will search for hobbit in just title categories. Example: qualifiers: [:t, :titre, :title] (use it for example with multiple languages). Default is the name of the category.
171
- # * qualifier: Convenience options if you just need a single qualifier, see above. Example: qualifiers => :title. Default is the name of the category.
172
- # * source: Use a different source than the index uses. If you think you need that, there might be a better solution to your problem. Please post to the mailing list first with your application.rb :)
173
- # * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
174
- #
175
- def category category_name, options = {}
176
- options = default_category_options.merge options
177
-
178
- new_category = Category.new category_name.to_sym, self, options
179
- categories << new_category
180
-
181
- new_category = yield new_category if block_given?
182
-
183
- new_category
184
- end
185
- alias define_category category
186
-
187
- # By default, the category uses
188
- # * the index's bundle type.
189
- #
190
- def default_category_options
191
- {
192
- :indexed_bundle_class => @indexed_bundle_class
193
- }
194
- end
195
-
196
- # Make this category range searchable with a fixed range. If you need other
197
- # ranges, define another category with a different range value.
198
- #
199
- # Example:
200
- # You have data values inside 1..100, and you want to have Picky return
201
- # not only the results for 47 if you search for 47, but also results for
202
- # 45, 46, or 47.2, 48.9, in a range of 2 around 47, so (45..49).
203
- #
204
- # Then you use:
205
- # ranged_category :values_inside_1_100, 2
206
- #
207
- # Optionally, you give it a precision value to reduce the error margin
208
- # around 47 (Picky is a bit liberal).
209
- # Index::Memory.new :range do
210
- # ranged_category :values_inside_1_100, 2, precision: 5
211
- # end
212
- #
213
- # This will force Picky to maximally be wrong 5% of the given range value
214
- # (5% of 2 = 0.1) instead of the default 20% (20% of 2 = 0.4).
215
- #
216
- # We suggest not to use much more than 5 as a higher precision is more
217
- # performance intensive for less and less precision gain.
218
- #
219
- # == Protip 1
220
- #
221
- # Create two ranged categories to make an area search:
222
- # Index::Memory.new :area do
223
- # ranged_category :x, 1
224
- # ranged_category :y, 1
225
- # end
226
- #
227
- # Search for it using for example:
228
- # x:133, y:120
229
- #
230
- # This will search this square area (* = 133, 120: The "search" point entered):
231
- #
232
- # 132 134
233
- # | |
234
- # --|---------|-- 121
235
- # | |
236
- # | * |
237
- # | |
238
- # --|---------|-- 119
239
- # | |
240
- #
241
- # Note: The area does not need to be square, but can be rectangular.
242
- #
243
- # == Protip 2
244
- #
245
- # Create three ranged categories to make a volume search.
246
- #
247
- # Or go crazy and use 4 ranged categories for a space/time search! ;)
248
- #
249
- # === Parameters
250
- # * category_name: The category_name as used in #define_category.
251
- # * range: The range (in the units of your data values) around the query point where we search for results.
252
- #
253
- # -----|<- range ->*------------|-----
254
- #
255
- # === Options
256
- # * precision: Default is 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
257
- # * ... all options of #define_category.
258
- #
259
- def ranged_category category_name, range, options = {}
260
- precision = options[:precision] || 1
261
-
262
- options = { partial: Partial::None.new }.merge options
263
-
264
- define_category category_name, options do |category|
265
- Indexing::Wrappers::Category::Location.install_on category, range, precision
266
- Indexed::Wrappers::Category::Location.install_on category, range, precision
267
- end
268
- end
269
- alias define_ranged_category ranged_category
270
-
271
- # HIGHLY EXPERIMENTAL Not correctly working yet. Try it if you feel "beta".
272
- #
273
- # Also a range search see #ranged_category, but on the earth's surface.
274
- #
275
- # Parameters:
276
- # * lat_name: The latitude's name as used in #define_category.
277
- # * lng_name: The longitude's name as used in #define_category.
278
- # * radius: The distance (in km) around the query point which we search for results.
279
- #
280
- # Note: Picky uses a square, not a circle. That should be ok for most usages.
281
- #
282
- # -----------------------------
283
- # | |
284
- # | |
285
- # | |
286
- # | |
287
- # | |
288
- # | *<- radius ->|
289
- # | |
290
- # | |
291
- # | |
292
- # | |
293
- # | |
294
- # -----------------------------
295
- #
296
- # Options
297
- # * precision: Default 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
298
- # * lat_from: The data category to take the data for the latitude from.
299
- # * lng_from: The data category to take the data for the longitude from.
300
- #
301
- # TODO Will have to write a wrapper that combines two categories that are
302
- # indexed simultaneously, since lat/lng are correlated.
303
- #
304
- def geo_categories lat_name, lng_name, radius, options = {} # :nodoc:
305
-
306
- # Extract lat/lng specific options.
307
- #
308
- lat_from = options.delete :lat_from
309
- lng_from = options.delete :lng_from
310
-
311
- # One can be a normal ranged_category.
312
- #
313
- ranged_category lat_name, radius*0.00898312, options.merge(from: lat_from)
314
-
315
- # The other needs to adapt the radius depending on the one.
316
- #
317
- # Depending on the latitude, the radius of the longitude
318
- # needs to enlarge, the closer we get to the pole.
319
- #
320
- # In our simplified case, the radius is given as if all the
321
- # locations were on the 45 degree line.
322
- #
323
- # This calculates km -> longitude (degrees).
324
- #
325
- # A degree on the 45 degree line is equal to ~222.6398 km.
326
- # So a km on the 45 degree line is equal to 0.01796624 degrees.
327
- #
328
- ranged_category lng_name, radius*0.01796624, options.merge(from: lng_from)
329
-
330
- end
331
- alias define_geo_categories geo_categories
332
-
333
- #
334
- # Since this is an API, we fail hard quickly.
335
- #
336
- def check_name name # :nodoc:
337
- raise ArgumentError.new(<<-NAME
338
-
339
-
340
- The index identifier (you gave "#{name}") for Index::Memory/Index::Redis should be a Symbol/String,
341
- Examples:
342
- Index::Memory.new(:my_cool_index) # Recommended
343
- Index::Redis.new("a-redis-index")
344
- NAME
345
-
346
-
347
- ) unless name.respond_to?(:to_sym)
348
- end
349
- def check_options options # :nodoc:
350
- raise ArgumentError.new(<<-OPTIONS
351
-
352
-
353
- Sources are not passed in as second parameter for #{self.class.name} anymore, but either
354
- * as :source option:
355
- #{self.class.name}.new(#{name.inspect}, source: #{options})
356
- or
357
- * given to the #source method inside the config block:
358
- #{self.class.name}.new(#{name.inspect}) do
359
- source #{options}
360
- end
361
-
362
- Sorry about that breaking change (in 2.2.0), didn't want to go to 3.0.0 yet!
363
-
364
- All the best
365
- -- Picky
366
-
367
-
368
- OPTIONS
369
- ) unless options.respond_to?(:[])
370
- end
371
- def check_source source # :nodoc:
372
- raise ArgumentError.new(<<-SOURCE
373
-
374
-
375
- The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text).
376
- Or it could use one of the built-in sources:
377
- Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
378
- Sources::')}
379
-
380
-
381
- SOURCE
382
- ) unless source.respond_to?(:each) || source.respond_to?(:harvest)
383
- end
384
-
385
- def to_stats # :nodoc:
386
- stats = <<-INDEX
387
- #{name} (#{self.class}):
388
- #{"source: #{source}".indented_to_s}
389
- #{"categories: #{categories.map(&:name).join(', ')}".indented_to_s}
390
- INDEX
391
- stats << " result identifier: \"#{result_identifier}\"".indented_to_s unless result_identifier.to_s == name.to_s
392
- stats
393
- end
394
-
395
- # Identifier used for technical output.
396
- #
397
- def identifier
398
- "#{PICKY_ENVIRONMENT}:#{name}"
399
- end
400
-
401
- #
402
- #
403
- def to_s
404
- "#{self.class}(#{name}, result_id: #{result_identifier}, source: #{source}, categories: #{categories})"
405
- end
406
-
407
- end
408
-
409
- end
@@ -1,29 +0,0 @@
1
- module Index
2
-
3
- #
4
- #
5
- class Base
6
-
7
- attr_reader :result_identifier,
8
- :combinator
9
-
10
- delegate :load_from_cache,
11
- :analyze,
12
- :reindex,
13
- :to => :categories
14
-
15
- alias reload load_from_cache
16
-
17
- # Return the possible combinations for this token.
18
- #
19
- # A combination is a tuple <token, index_bundle>.
20
- #
21
- # TODO Rename and delegate.
22
- #
23
- def possible_combinations token
24
- categories.possible_combinations_for token
25
- end
26
-
27
- end
28
-
29
- end
@@ -1,127 +0,0 @@
1
- module Index
2
-
3
- #
4
- #
5
- class Base
6
-
7
- attr_reader :after_indexing,
8
- :bundle_class
9
-
10
- # Delegators for indexing.
11
- #
12
- delegate :cache,
13
- :check,
14
- :clear,
15
- :backup,
16
- :restore,
17
- :to => :categories
18
-
19
- # Calling index on an index will call index
20
- # on every category.
21
- #
22
- # Decides whether to use a parallel indexer or whether to
23
- # delegate to each category to index themselves.
24
- #
25
- def index
26
- if source.respond_to?(:each)
27
- check_source_empty
28
- index_in_parallel
29
- else
30
- with_data_snapshot do
31
- categories.each &:index
32
- end
33
- end
34
- end
35
-
36
- # Check if the given enumerable source is empty.
37
- #
38
- # Note: Checking as early as possible to tell the
39
- # user as early as possible.
40
- #
41
- def check_source_empty
42
- warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
43
- end
44
-
45
- # Note: Duplicated in category_indexing.rb.
46
- #
47
- # Take a data snapshot if the source offers it.
48
- #
49
- def with_data_snapshot
50
- if source.respond_to? :with_snapshot
51
- source.with_snapshot(self) do
52
- yield
53
- end
54
- else
55
- yield
56
- end
57
- end
58
-
59
- # Indexes the categories in parallel.
60
- #
61
- # Only use where the category does have a #each source defined.
62
- #
63
- def index_in_parallel
64
- indexer = Indexers::Parallel.new self
65
- indexer.index categories
66
- categories.each &:cache
67
- end
68
-
69
- # Define an index tokenizer on the index.
70
- #
71
- # Parameters are the exact same as for indexing.
72
- #
73
- def indexing options = {}
74
- @tokenizer = Tokenizers::Index.new options
75
- end
76
- alias define_indexing indexing
77
-
78
- # Returns the installed tokenizer or the default.
79
- #
80
- # TODO Spec.
81
- #
82
- def tokenizer
83
- @tokenizer || Indexes.tokenizer
84
- end
85
-
86
- # Define a source on the index.
87
- #
88
- # Parameter is a source, either one of the standard sources or
89
- # anything responding to #each and returning objects that
90
- # respond to id and the category names (or the category from option).
91
- #
92
- def source some_source = nil
93
- some_source ? define_source(some_source) : (@source || raise_no_source)
94
- end
95
- def define_source source
96
- @source = source
97
- end
98
- def raise_no_source
99
- raise NoSourceSpecifiedException.new(<<-NO_SOURCE
100
-
101
-
102
- No source given for index #{name}. An index needs a source.
103
- Example:
104
- Index::Memory.new(:with_source) do
105
- source Sources::CSV.new(:title, file: 'data/books.csv')
106
- category :title
107
- category :author
108
- end
109
-
110
- NO_SOURCE
111
- )
112
- end
113
-
114
- # Define a key_format on the index.
115
- #
116
- # Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
117
- #
118
- def key_format format = nil
119
- format ? define_key_format(format) : (@key_format || :to_i)
120
- end
121
- def define_key_format key_format
122
- @key_format = key_format
123
- end
124
-
125
- end
126
-
127
- end
@@ -1,16 +0,0 @@
1
- module Index
2
-
3
- # An index that is persisted in files, loaded at startup and kept in memory at runtime.
4
- #
5
- class Memory < Base
6
-
7
- def indexing_bundle_class
8
- Indexing::Bundle::Memory
9
- end
10
- def indexed_bundle_class
11
- Indexed::Bundle::Memory
12
- end
13
-
14
- end
15
-
16
- end
@@ -1,16 +0,0 @@
1
- module Index
2
-
3
- # An index that is persisted in Redis.
4
- #
5
- class Redis < Base
6
-
7
- def indexing_bundle_class
8
- Indexing::Bundle::Redis
9
- end
10
- def indexed_bundle_class
11
- Indexed::Bundle::Redis
12
- end
13
-
14
- end
15
-
16
- end
@@ -1,76 +0,0 @@
1
- # coding: utf-8
2
- #
3
- module Query
4
-
5
- # A single qualifier.
6
- #
7
- class Qualifier # :nodoc:all
8
-
9
- attr_reader :normalized_qualifier, :codes
10
-
11
- #
12
- #
13
- # codes is an array.
14
- #
15
- def initialize normalized_qualifier, codes
16
- @normalized_qualifier = normalized_qualifier
17
- @codes = codes.map &:to_sym
18
- end
19
-
20
- # Will overwrite if the key is present in the hash.
21
- #
22
- def inject_into hash
23
- codes.each do |code|
24
- hash[code] = normalized_qualifier
25
- end
26
- end
27
-
28
- end
29
-
30
- # Collection class for qualifiers.
31
- #
32
- class Qualifiers # :nodoc:all
33
-
34
- attr_reader :qualifiers, :normalization_mapping
35
-
36
- delegate :<<, :to => :qualifiers
37
-
38
- #
39
- #
40
- def initialize
41
- @qualifiers = []
42
- @normalization_mapping = {}
43
- end
44
- def self.instance
45
- @instanec ||= new
46
- end
47
-
48
- # TODO Spec.
49
- #
50
- def self.add name, qualifiers
51
- instance << Qualifier.new(name, qualifiers)
52
- end
53
-
54
- # Uses the qualifiers to prepare (optimize) the qualifier handling.
55
- #
56
- def prepare
57
- qualifiers.each do |qualifier|
58
- qualifier.inject_into normalization_mapping
59
- end
60
- end
61
-
62
- # Normalizes the given qualifier.
63
- #
64
- # Returns nil if it is not allowed, the normalized qualifier if it is.
65
- #
66
- # Note: Normalizes.
67
- #
68
- def normalize qualifier
69
- return nil if qualifier.blank?
70
-
71
- normalization_mapping[qualifier.to_sym]
72
- end
73
-
74
- end
75
-
76
- end