picky 2.7.0 → 3.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (213) hide show
  1. data/lib/picky/adapters/rack/base.rb +20 -16
  2. data/lib/picky/adapters/rack/live_parameters.rb +28 -24
  3. data/lib/picky/adapters/rack/search.rb +67 -0
  4. data/lib/picky/adapters/rack.rb +27 -23
  5. data/lib/picky/application.rb +246 -236
  6. data/lib/picky/backend/base.rb +115 -119
  7. data/lib/picky/backend/file/basic.rb +102 -98
  8. data/lib/picky/backend/file/json.rb +27 -23
  9. data/lib/picky/backend/file/marshal.rb +32 -28
  10. data/lib/picky/backend/file/text.rb +45 -41
  11. data/lib/picky/backend/files.rb +19 -15
  12. data/lib/picky/backend/redis/basic.rb +76 -72
  13. data/lib/picky/backend/redis/list_hash.rb +40 -36
  14. data/lib/picky/backend/redis/string_hash.rb +30 -26
  15. data/lib/picky/backend/redis.rb +32 -28
  16. data/lib/picky/bundle.rb +82 -57
  17. data/lib/{bundling.rb → picky/bundling.rb} +0 -0
  18. data/lib/picky/calculations/location.rb +51 -47
  19. data/lib/picky/categories.rb +60 -56
  20. data/lib/picky/categories_indexed.rb +73 -82
  21. data/lib/picky/categories_indexing.rb +12 -8
  22. data/lib/picky/category.rb +109 -120
  23. data/lib/picky/category_indexed.rb +39 -41
  24. data/lib/picky/category_indexing.rb +123 -125
  25. data/lib/picky/character_substituters/west_european.rb +32 -26
  26. data/lib/{constants.rb → picky/constants.rb} +0 -0
  27. data/lib/picky/cores.rb +96 -92
  28. data/lib/{deployment.rb → picky/deployment.rb} +0 -0
  29. data/lib/picky/frontend_adapters/rack.rb +133 -118
  30. data/lib/picky/generators/aliases.rb +5 -3
  31. data/lib/picky/generators/base.rb +11 -7
  32. data/lib/picky/generators/partial/default.rb +7 -3
  33. data/lib/picky/generators/partial/none.rb +24 -20
  34. data/lib/picky/generators/partial/strategy.rb +20 -16
  35. data/lib/picky/generators/partial/substring.rb +94 -90
  36. data/lib/picky/generators/partial_generator.rb +11 -7
  37. data/lib/picky/generators/similarity/default.rb +9 -5
  38. data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
  39. data/lib/picky/generators/similarity/metaphone.rb +20 -16
  40. data/lib/picky/generators/similarity/none.rb +23 -19
  41. data/lib/picky/generators/similarity/phonetic.rb +49 -45
  42. data/lib/picky/generators/similarity/soundex.rb +20 -16
  43. data/lib/picky/generators/similarity/strategy.rb +10 -6
  44. data/lib/picky/generators/similarity_generator.rb +11 -7
  45. data/lib/picky/generators/strategy.rb +14 -10
  46. data/lib/picky/generators/weights/default.rb +9 -5
  47. data/lib/picky/generators/weights/logarithmic.rb +30 -26
  48. data/lib/picky/generators/weights/strategy.rb +10 -6
  49. data/lib/picky/generators/weights_generator.rb +11 -7
  50. data/lib/picky/helpers/measuring.rb +20 -16
  51. data/lib/picky/indexed/bundle/base.rb +39 -37
  52. data/lib/picky/indexed/bundle/memory.rb +68 -64
  53. data/lib/picky/indexed/bundle/redis.rb +73 -69
  54. data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
  55. data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
  56. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
  57. data/lib/picky/indexed/wrappers/category/location.rb +17 -13
  58. data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
  59. data/lib/picky/indexers/base.rb +26 -22
  60. data/lib/picky/indexers/parallel.rb +62 -58
  61. data/lib/picky/indexers/serial.rb +41 -37
  62. data/lib/picky/indexes/index.rb +400 -0
  63. data/lib/picky/indexes/index_indexed.rb +24 -0
  64. data/lib/picky/indexes/index_indexing.rb +138 -0
  65. data/lib/picky/indexes/memory.rb +20 -0
  66. data/lib/picky/indexes/redis.rb +20 -0
  67. data/lib/picky/indexes.rb +68 -61
  68. data/lib/picky/indexes_indexed.rb +16 -12
  69. data/lib/picky/indexes_indexing.rb +41 -37
  70. data/lib/picky/indexing/bundle/base.rb +216 -205
  71. data/lib/picky/indexing/bundle/memory.rb +16 -11
  72. data/lib/picky/indexing/bundle/redis.rb +14 -12
  73. data/lib/picky/indexing/wrappers/category/location.rb +17 -13
  74. data/lib/picky/interfaces/live_parameters.rb +159 -154
  75. data/lib/picky/loader.rb +267 -304
  76. data/lib/picky/loggers/search.rb +20 -13
  77. data/lib/picky/no_source_specified_exception.rb +7 -3
  78. data/lib/picky/performant.rb +6 -2
  79. data/lib/picky/query/allocation.rb +71 -67
  80. data/lib/picky/query/allocations.rb +99 -94
  81. data/lib/picky/query/combination.rb +70 -66
  82. data/lib/picky/query/combinations/base.rb +56 -52
  83. data/lib/picky/query/combinations/memory.rb +36 -32
  84. data/lib/picky/query/combinations/redis.rb +66 -62
  85. data/lib/picky/query/indexes.rb +175 -160
  86. data/lib/picky/query/qualifier_category_mapper.rb +43 -0
  87. data/lib/picky/query/token.rb +165 -172
  88. data/lib/picky/query/tokens.rb +86 -82
  89. data/lib/picky/query/weights.rb +44 -48
  90. data/lib/picky/query.rb +5 -1
  91. data/lib/picky/rack/harakiri.rb +51 -47
  92. data/lib/picky/results.rb +81 -77
  93. data/lib/picky/search.rb +169 -158
  94. data/lib/picky/sinatra.rb +34 -0
  95. data/lib/picky/sources/base.rb +73 -70
  96. data/lib/picky/sources/couch.rb +61 -57
  97. data/lib/picky/sources/csv.rb +68 -64
  98. data/lib/picky/sources/db.rb +139 -135
  99. data/lib/picky/sources/delicious.rb +52 -48
  100. data/lib/picky/sources/mongo.rb +68 -63
  101. data/lib/picky/sources/wrappers/base.rb +20 -16
  102. data/lib/picky/sources/wrappers/location.rb +37 -33
  103. data/lib/picky/statistics.rb +46 -43
  104. data/lib/picky/tasks.rb +3 -0
  105. data/lib/picky/tokenizers/base.rb +192 -187
  106. data/lib/picky/tokenizers/index.rb +25 -21
  107. data/lib/picky/tokenizers/location.rb +33 -29
  108. data/lib/picky/tokenizers/query.rb +49 -43
  109. data/lib/picky.rb +21 -13
  110. data/lib/tasks/application.rake +1 -1
  111. data/lib/tasks/index.rake +3 -3
  112. data/lib/tasks/routes.rake +1 -1
  113. data/lib/tasks/server.rake +1 -1
  114. data/spec/lib/adapters/rack/base_spec.rb +1 -1
  115. data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
  116. data/spec/lib/adapters/rack/query_spec.rb +1 -1
  117. data/spec/lib/application_spec.rb +39 -32
  118. data/spec/lib/backend/file/basic_spec.rb +2 -2
  119. data/spec/lib/backend/file/json_spec.rb +2 -2
  120. data/spec/lib/backend/file/marshal_spec.rb +2 -2
  121. data/spec/lib/backend/file/text_spec.rb +1 -1
  122. data/spec/lib/backend/files_spec.rb +14 -24
  123. data/spec/lib/backend/redis/basic_spec.rb +2 -2
  124. data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
  125. data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
  126. data/spec/lib/backend/redis_spec.rb +20 -13
  127. data/spec/lib/calculations/location_spec.rb +1 -1
  128. data/spec/lib/categories_indexed_spec.rb +16 -34
  129. data/spec/lib/category_indexed_spec.rb +9 -27
  130. data/spec/lib/category_indexing_spec.rb +2 -3
  131. data/spec/lib/category_spec.rb +10 -10
  132. data/spec/lib/character_substituters/west_european_spec.rb +6 -5
  133. data/spec/lib/cores_spec.rb +17 -17
  134. data/spec/lib/extensions/symbol_spec.rb +15 -1
  135. data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
  136. data/spec/lib/generators/aliases_spec.rb +3 -3
  137. data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
  138. data/spec/lib/generators/partial/default_spec.rb +3 -3
  139. data/spec/lib/generators/partial/none_spec.rb +2 -2
  140. data/spec/lib/generators/partial/substring_spec.rb +1 -1
  141. data/spec/lib/generators/partial_generator_spec.rb +3 -3
  142. data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
  143. data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
  144. data/spec/lib/generators/similarity/none_spec.rb +1 -1
  145. data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
  146. data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
  147. data/spec/lib/generators/similarity_generator_spec.rb +2 -2
  148. data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
  149. data/spec/lib/generators/weights_generator_spec.rb +1 -1
  150. data/spec/lib/helpers/measuring_spec.rb +2 -2
  151. data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
  152. data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
  153. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
  154. data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
  155. data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
  156. data/spec/lib/indexers/base_spec.rb +1 -1
  157. data/spec/lib/indexers/parallel_spec.rb +1 -1
  158. data/spec/lib/indexers/serial_spec.rb +1 -1
  159. data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
  160. data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
  161. data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
  162. data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
  163. data/spec/lib/indexes_class_spec.rb +2 -2
  164. data/spec/lib/indexes_indexed_spec.rb +1 -1
  165. data/spec/lib/indexes_indexing_spec.rb +1 -1
  166. data/spec/lib/indexes_spec.rb +1 -1
  167. data/spec/lib/indexing/bundle/base_spec.rb +7 -5
  168. data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
  169. data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
  170. data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
  171. data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
  172. data/spec/lib/loader_spec.rb +17 -19
  173. data/spec/lib/loggers/search_spec.rb +2 -2
  174. data/spec/lib/query/allocation_spec.rb +1 -1
  175. data/spec/lib/query/allocations_spec.rb +1 -1
  176. data/spec/lib/query/combination_spec.rb +4 -4
  177. data/spec/lib/query/combinations/base_spec.rb +1 -1
  178. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  179. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  180. data/spec/lib/query/indexes_spec.rb +7 -2
  181. data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
  182. data/spec/lib/query/token_spec.rb +32 -53
  183. data/spec/lib/query/tokens_spec.rb +30 -35
  184. data/spec/lib/query/weights_spec.rb +16 -16
  185. data/spec/lib/rack/harakiri_spec.rb +5 -5
  186. data/spec/lib/results_spec.rb +1 -1
  187. data/spec/lib/search_spec.rb +24 -22
  188. data/spec/lib/sinatra_spec.rb +36 -0
  189. data/spec/lib/sources/base_spec.rb +1 -1
  190. data/spec/lib/sources/couch_spec.rb +9 -9
  191. data/spec/lib/sources/csv_spec.rb +7 -7
  192. data/spec/lib/sources/db_spec.rb +2 -2
  193. data/spec/lib/sources/delicious_spec.rb +5 -5
  194. data/spec/lib/sources/mongo_spec.rb +7 -7
  195. data/spec/lib/sources/wrappers/base_spec.rb +2 -2
  196. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  197. data/spec/lib/statistics_spec.rb +1 -1
  198. data/spec/lib/tokenizers/base_spec.rb +2 -2
  199. data/spec/lib/tokenizers/index_spec.rb +1 -1
  200. data/spec/lib/tokenizers/query_spec.rb +1 -1
  201. metadata +30 -30
  202. data/lib/picky/adapters/rack/query.rb +0 -65
  203. data/lib/picky/index/base.rb +0 -409
  204. data/lib/picky/index/base_indexed.rb +0 -29
  205. data/lib/picky/index/base_indexing.rb +0 -127
  206. data/lib/picky/index/memory.rb +0 -16
  207. data/lib/picky/index/redis.rb +0 -16
  208. data/lib/picky/query/qualifiers.rb +0 -76
  209. data/lib/picky/query/solr.rb +0 -60
  210. data/lib/picky/signals.rb +0 -8
  211. data/lib/picky-tasks.rb +0 -6
  212. data/lib/tasks/spec.rake +0 -11
  213. data/spec/lib/query/qualifiers_spec.rb +0 -31
@@ -0,0 +1,400 @@
1
+ module Picky
2
+
3
+ # = Picky Indexes
4
+ #
5
+ # A Picky Index defines
6
+ # * where its data comes from (a data source).
7
+ # * how this data it is indexed.
8
+ # * a number of categories that may or may not map directly to data categories.
9
+ #
10
+ # == Howto
11
+ #
12
+ # This is a step-by-step description on how to create an index.
13
+ #
14
+ # Start by choosing an <tt>Indexes::Memory</tt> or an <tt>Indexes::Redis</tt>.
15
+ # In the example, we will be using an in-memory index, <tt>Indexes::Memory</tt>.
16
+ #
17
+ # books = Indexes::Memory.new(:books)
18
+ #
19
+ # That in itself won't do much good, that's why we add a data source:
20
+ #
21
+ # books = Indexes::Memory.new(:books) do
22
+ # source Sources::CSV.new(:title, :author, file: 'data/books.csv')
23
+ # end
24
+ #
25
+ # In the example, we use an explicit <tt>Sources::CSV</tt> of Picky.
26
+ # However, anything that responds to <tt>#each</tt>, and returns an object that
27
+ # answers to <tt>#id</tt>, works.
28
+ #
29
+ # For example, a 3.0 ActiveRecord class:
30
+ #
31
+ # books = Indexes::Memory.new(:books) do
32
+ # source Book.order('isbn ASC')
33
+ # end
34
+ #
35
+ # Now we know where the data comes from, but not, how to categorize it.
36
+ #
37
+ # Let's add a few categories:
38
+ #
39
+ # books = Indexes::Memory.new(:books) do
40
+ # source Book.order('isbn ASC')
41
+ # category :title
42
+ # category :author
43
+ # category :isbn
44
+ # end
45
+ #
46
+ # Categories offer quite a few options, see <tt>Indexes::Base#category</tt> for details.
47
+ #
48
+ # After adding more options, it might look like this:
49
+ #
50
+ # books = Indexes::Memory.new(:books) do
51
+ # source Book.order('isbn ASC')
52
+ # category :title,
53
+ # partial: Partial::Substring.new(from: 1),
54
+ # similarity: Similarity::DoubleMetaphone.new(3),
55
+ # qualifiers: [:t, :title, :titulo]
56
+ # category :author,
57
+ # similarity: Similarity::Metaphone.new(2)
58
+ # category :isbn,
59
+ # partial: Partial::None.new,
60
+ # from: :legacy_isbn_name
61
+ # end
62
+ #
63
+ # For this to work, a <tt>Book</tt> should support methods <tt>#title</tt>, <tt>#author</tt> and <tt>#legacy_isbn_name</tt>.
64
+ #
65
+ # If it uses <tt>String</tt> ids, use <tt>#key_format</tt> to define a formatting method:
66
+ #
67
+ # books = Indexes::Memory.new(:books) do
68
+ # key_format :to_s
69
+ # source Book.order('isbn ASC')
70
+ # category :title
71
+ # category :author
72
+ # category :isbn
73
+ # end
74
+ #
75
+ # Finally, use the index for a <tt>Search</tt>:
76
+ #
77
+ # route %r{^/media$} => Search.new(books, dvds, mp3s)
78
+ #
79
+ class Indexes
80
+
81
+ # This class defines the indexing and index API that is exposed to the user
82
+ # as the #index method inside the Application class.
83
+ #
84
+ # It provides a single front for both indexing and index options. We suggest to always use the index API.
85
+ #
86
+ # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Index*.
87
+ #
88
+ class Index
89
+
90
+ attr_reader :name,
91
+ :categories
92
+
93
+ delegate :[],
94
+ :each_category,
95
+ :to => :categories
96
+
97
+ # Create a new index with a given source.
98
+ #
99
+ # === Parameters
100
+ # * name: A name that will be used for the index directory and in the Picky front end.
101
+ #
102
+ # === Options
103
+ # * source: Where the data comes from, e.g. Sources::CSV.new(...). Optional, can be defined in the block using #source.
104
+ # * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
105
+ # * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
106
+ # * tokenizer: The tokenizer to use for this index. Optional, can be defined in the block using #indexing.
107
+ # * key_format: The format the ids of this index are in. Optional, can be defined in the block using #key_format.
108
+ #
109
+ # Examples:
110
+ # my_index = Indexes::Memory.new(:my_index, source: some_source) do
111
+ # category :bla
112
+ # end
113
+ #
114
+ # my_index = Indexes::Memory.new(:my_index) do
115
+ # source Sources::CSV.new(file: 'data/index.csv')
116
+ # category :bla
117
+ # end
118
+ #
119
+ #
120
+ def initialize name, options = {}
121
+ check_name name
122
+ @name = name.to_sym
123
+
124
+ check_options options
125
+
126
+ @source = options[:source]
127
+
128
+ @after_indexing = options[:after_indexing]
129
+ @tokenizer = options[:tokenizer]
130
+ @key_format = options[:key_format]
131
+
132
+ # Indexed.
133
+ #
134
+ @result_identifier = options[:result_identifier] || name
135
+
136
+ # TODO Move ignore_unassigned_tokens to query, somehow.
137
+ #
138
+ @categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
139
+
140
+ # Centralized registry.
141
+ #
142
+ Indexes.register self
143
+
144
+ #
145
+ #
146
+ instance_eval(&Proc.new) if block_given?
147
+
148
+ # Check if any source has been given in the block or the options.
149
+ #
150
+ check_source @source
151
+ end
152
+
153
+ # Default bundles.
154
+ #
155
+ def indexing_bundle_class
156
+ Indexing::Bundle::Memory
157
+ end
158
+ def indexed_bundle_class
159
+ Indexed::Bundle::Memory
160
+ end
161
+
162
+ # Defines a searchable category on the index.
163
+ #
164
+ # === Parameters
165
+ # * category_name: This identifier is used in the front end, but also to categorize query text. For example, “title:hobbit” will narrow the hobbit query on categories with the identifier :title.
166
+ #
167
+ # === Options
168
+ # * partial: Partial::None.new or Partial::Substring.new(from: starting_char, to: ending_char). Default is Partial::Substring.new(from: -3, to: -1).
169
+ # * similarity: Similarity::None.new or Similarity::DoubleMetaphone.new(similar_words_searched). Default is Similarity::None.new.
170
+ # * qualifiers: An array of qualifiers with which you can define which category you’d like to search, for example “title:hobbit” will search for hobbit in just title categories. Example: qualifiers: [:t, :titre, :title] (use it for example with multiple languages). Default is the name of the category.
171
+ # * qualifier: Convenience options if you just need a single qualifier, see above. Example: qualifiers => :title. Default is the name of the category.
172
+ # * source: Use a different source than the index uses. If you think you need that, there might be a better solution to your problem. Please post to the mailing list first with your application.rb :)
173
+ # * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
174
+ #
175
+ def category category_name, options = {}
176
+ new_category = Category.new category_name.to_sym, self, options
177
+ categories << new_category
178
+
179
+ new_category = yield new_category if block_given?
180
+
181
+ new_category
182
+ end
183
+ alias define_category category
184
+
185
+ # Make this category range searchable with a fixed range. If you need other
186
+ # ranges, define another category with a different range value.
187
+ #
188
+ # Example:
189
+ # You have data values inside 1..100, and you want to have Picky return
190
+ # not only the results for 47 if you search for 47, but also results for
191
+ # 45, 46, or 47.2, 48.9, in a range of 2 around 47, so (45..49).
192
+ #
193
+ # Then you use:
194
+ # ranged_category :values_inside_1_100, 2
195
+ #
196
+ # Optionally, you give it a precision value to reduce the error margin
197
+ # around 47 (Picky is a bit liberal).
198
+ # Indexes::Memory.new :range do
199
+ # ranged_category :values_inside_1_100, 2, precision: 5
200
+ # end
201
+ #
202
+ # This will force Picky to maximally be wrong 5% of the given range value
203
+ # (5% of 2 = 0.1) instead of the default 20% (20% of 2 = 0.4).
204
+ #
205
+ # We suggest not to use much more than 5 as a higher precision is more
206
+ # performance intensive for less and less precision gain.
207
+ #
208
+ # == Protip 1
209
+ #
210
+ # Create two ranged categories to make an area search:
211
+ # Indexes::Memory.new :area do
212
+ # ranged_category :x, 1
213
+ # ranged_category :y, 1
214
+ # end
215
+ #
216
+ # Search for it using for example:
217
+ # x:133, y:120
218
+ #
219
+ # This will search this square area (* = 133, 120: The "search" point entered):
220
+ #
221
+ # 132 134
222
+ # | |
223
+ # --|---------|-- 121
224
+ # | |
225
+ # | * |
226
+ # | |
227
+ # --|---------|-- 119
228
+ # | |
229
+ #
230
+ # Note: The area does not need to be square, but can be rectangular.
231
+ #
232
+ # == Protip 2
233
+ #
234
+ # Create three ranged categories to make a volume search.
235
+ #
236
+ # Or go crazy and use 4 ranged categories for a space/time search! ;)
237
+ #
238
+ # === Parameters
239
+ # * category_name: The category_name as used in #define_category.
240
+ # * range: The range (in the units of your data values) around the query point where we search for results.
241
+ #
242
+ # -----|<- range ->*------------|-----
243
+ #
244
+ # === Options
245
+ # * precision: Default is 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
246
+ # * ... all options of #define_category.
247
+ #
248
+ def ranged_category category_name, range, options = {}
249
+ precision = options[:precision] || 1
250
+
251
+ options = { partial: Partial::None.new }.merge options
252
+
253
+ define_category category_name, options do |category|
254
+ Indexing::Wrappers::Category::Location.install_on category, range, precision
255
+ Indexed::Wrappers::Category::Location.install_on category, range, precision
256
+ end
257
+ end
258
+ alias define_ranged_category ranged_category
259
+
260
+ # HIGHLY EXPERIMENTAL Not correctly working yet. Try it if you feel "beta".
261
+ #
262
+ # Also a range search see #ranged_category, but on the earth's surface.
263
+ #
264
+ # Parameters:
265
+ # * lat_name: The latitude's name as used in #define_category.
266
+ # * lng_name: The longitude's name as used in #define_category.
267
+ # * radius: The distance (in km) around the query point which we search for results.
268
+ #
269
+ # Note: Picky uses a square, not a circle. That should be ok for most usages.
270
+ #
271
+ # -----------------------------
272
+ # | |
273
+ # | |
274
+ # | |
275
+ # | |
276
+ # | |
277
+ # | *<- radius ->|
278
+ # | |
279
+ # | |
280
+ # | |
281
+ # | |
282
+ # | |
283
+ # -----------------------------
284
+ #
285
+ # Options
286
+ # * precision: Default 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
287
+ # * lat_from: The data category to take the data for the latitude from.
288
+ # * lng_from: The data category to take the data for the longitude from.
289
+ #
290
+ # TODO Will have to write a wrapper that combines two categories that are
291
+ # indexed simultaneously, since lat/lng are correlated.
292
+ #
293
+ def geo_categories lat_name, lng_name, radius, options = {} # :nodoc:
294
+
295
+ # Extract lat/lng specific options.
296
+ #
297
+ lat_from = options.delete :lat_from
298
+ lng_from = options.delete :lng_from
299
+
300
+ # One can be a normal ranged_category.
301
+ #
302
+ ranged_category lat_name, radius*0.00898312, options.merge(from: lat_from)
303
+
304
+ # The other needs to adapt the radius depending on the one.
305
+ #
306
+ # Depending on the latitude, the radius of the longitude
307
+ # needs to enlarge, the closer we get to the pole.
308
+ #
309
+ # In our simplified case, the radius is given as if all the
310
+ # locations were on the 45 degree line.
311
+ #
312
+ # This calculates km -> longitude (degrees).
313
+ #
314
+ # A degree on the 45 degree line is equal to ~222.6398 km.
315
+ # So a km on the 45 degree line is equal to 0.01796624 degrees.
316
+ #
317
+ ranged_category lng_name, radius*0.01796624, options.merge(from: lng_from)
318
+
319
+ end
320
+ alias define_geo_categories geo_categories
321
+
322
+ #
323
+ # Since this is an API, we fail hard quickly.
324
+ #
325
+ def check_name name # :nodoc:
326
+ raise ArgumentError.new(<<-NAME
327
+
328
+
329
+ The index identifier (you gave "#{name}") for Indexes::Memory/Indexes::Redis should be a Symbol/String,
330
+ Examples:
331
+ Indexes::Memory.new(:my_cool_index) # Recommended
332
+ Indexes::Redis.new("a-redis-index")
333
+ NAME
334
+
335
+
336
+ ) unless name.respond_to?(:to_sym)
337
+ end
338
+ def check_options options # :nodoc:
339
+ raise ArgumentError.new(<<-OPTIONS
340
+
341
+
342
+ Sources are not passed in as second parameter for #{self.class.name} anymore, but either
343
+ * as :source option:
344
+ #{self.class.name}.new(#{name.inspect}, source: #{options})
345
+ or
346
+ * given to the #source method inside the config block:
347
+ #{self.class.name}.new(#{name.inspect}) do
348
+ source #{options}
349
+ end
350
+
351
+ Sorry about that breaking change (in 2.2.0), didn't want to go to 3.0.0 yet!
352
+
353
+ All the best
354
+ -- Picky
355
+
356
+
357
+ OPTIONS
358
+ ) unless options.respond_to?(:[])
359
+ end
360
+ def check_source source # :nodoc:
361
+ raise ArgumentError.new(<<-SOURCE
362
+
363
+
364
+ The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text), OR it can be a lambda/block, returning such a source.
365
+ Or it could use one of the built-in sources:
366
+ Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
367
+ Sources::')}
368
+
369
+
370
+ SOURCE
371
+ ) unless source.respond_to?(:each) || source.respond_to?(:harvest) || source.respond_to?(:call)
372
+ end
373
+
374
+ def to_stats # :nodoc:
375
+ stats = <<-INDEX
376
+ #{name} (#{self.class}):
377
+ #{"source: #{source}".indented_to_s}
378
+ #{"categories: #{categories.map(&:name).join(', ')}".indented_to_s}
379
+ INDEX
380
+ stats << " result identifier: \"#{result_identifier}\"".indented_to_s unless result_identifier.to_s == name.to_s
381
+ stats
382
+ end
383
+
384
+ # Identifier used for technical output.
385
+ #
386
+ def identifier
387
+ "#{PICKY_ENVIRONMENT}:#{name}"
388
+ end
389
+
390
+ #
391
+ #
392
+ def to_s
393
+ "#{self.class}(#{name}, result_id: #{result_identifier}, source: #{source}, categories: #{categories})"
394
+ end
395
+
396
+ end
397
+
398
+ end
399
+
400
+ end
@@ -0,0 +1,24 @@
1
+ module Picky
2
+
3
+ class Indexes
4
+
5
+ #
6
+ #
7
+ class Index
8
+
9
+ attr_reader :result_identifier,
10
+ :combinator
11
+
12
+ delegate :load_from_cache,
13
+ :analyze,
14
+ :reindex,
15
+ :possible_combinations,
16
+ :to => :categories
17
+
18
+ alias reload load_from_cache
19
+
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -0,0 +1,138 @@
1
+ module Picky
2
+
3
+ class Indexes
4
+
5
+ #
6
+ #
7
+ class Index
8
+
9
+ attr_reader :after_indexing,
10
+ :bundle_class
11
+
12
+ # Delegators for indexing.
13
+ #
14
+ delegate :cache,
15
+ :check,
16
+ :clear,
17
+ :backup,
18
+ :restore,
19
+ :to => :categories
20
+
21
+ # Calling index on an index will call index
22
+ # on every category.
23
+ #
24
+ # Decides whether to use a parallel indexer or whether to
25
+ # delegate to each category to index themselves.
26
+ #
27
+ def index
28
+ if source.respond_to?(:each)
29
+ check_source_empty
30
+ index_in_parallel
31
+ else
32
+ with_data_snapshot do
33
+ categories.each &:index
34
+ end
35
+ end
36
+ end
37
+
38
+ # Check if the given enumerable source is empty.
39
+ #
40
+ # Note: Checking as early as possible to tell the
41
+ # user as early as possible.
42
+ #
43
+ def check_source_empty
44
+ warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
45
+ end
46
+
47
+ # Note: Duplicated in category_indexing.rb.
48
+ #
49
+ # Take a data snapshot if the source offers it.
50
+ #
51
+ def with_data_snapshot
52
+ if source.respond_to? :with_snapshot
53
+ source.with_snapshot(self) do
54
+ yield
55
+ end
56
+ else
57
+ yield
58
+ end
59
+ end
60
+
61
+ # Indexes the categories in parallel.
62
+ #
63
+ # Only use where the category does have a #each source defined.
64
+ #
65
+ def index_in_parallel
66
+ indexer = Indexers::Parallel.new self
67
+ indexer.index categories
68
+ categories.each &:cache
69
+ end
70
+
71
+ # Define an index tokenizer on the index.
72
+ #
73
+ # Parameters are the exact same as for indexing.
74
+ #
75
+ def indexing options = {}
76
+ @tokenizer = Tokenizers::Index.new options
77
+ end
78
+ alias define_indexing indexing
79
+
80
+ # Returns the installed tokenizer or the default.
81
+ #
82
+ def tokenizer
83
+ @tokenizer || Indexes.tokenizer
84
+ end
85
+
86
+ # Define a source on the index.
87
+ #
88
+ # Parameter is a source, either one of the standard sources or
89
+ # anything responding to #each and returning objects that
90
+ # respond to id and the category names (or the category from option).
91
+ #
92
+ def source some_source = nil, &block
93
+ some_source ||= block
94
+ some_source ? define_source(some_source) : (@source && extract_source || raise_no_source)
95
+ end
96
+ # Extract the actual source if it is wrapped in a time
97
+ # capsule, i.e. a block/lambda.
98
+ #
99
+ # TODO Extract into module.
100
+ #
101
+ def extract_source
102
+ @source = @source.respond_to?(:call) ? @source.call : @source
103
+ end
104
+ def define_source source
105
+ @source = source
106
+ end
107
+ def raise_no_source
108
+ raise NoSourceSpecifiedException.new(<<-NO_SOURCE
109
+
110
+
111
+ No source given for index #{name}. An index needs a source.
112
+ Example:
113
+ Indexes::Memory.new(:with_source) do
114
+ source Sources::CSV.new(:title, file: 'data/books.csv')
115
+ category :title
116
+ category :author
117
+ end
118
+
119
+ NO_SOURCE
120
+ )
121
+ end
122
+
123
+ # Define a key_format on the index.
124
+ #
125
+ # Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
126
+ #
127
+ def key_format format = nil
128
+ format ? define_key_format(format) : @key_format
129
+ end
130
+ def define_key_format key_format
131
+ @key_format = key_format
132
+ end
133
+
134
+ end
135
+
136
+ end
137
+
138
+ end
@@ -0,0 +1,20 @@
1
+ module Picky
2
+
3
+ class Indexes
4
+
5
+ # An index that is persisted in files, loaded at startup and kept in memory at runtime.
6
+ #
7
+ class Memory < Index
8
+
9
+ def indexing_bundle_class
10
+ Indexing::Bundle::Memory
11
+ end
12
+ def indexed_bundle_class
13
+ Indexed::Bundle::Memory
14
+ end
15
+
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,20 @@
1
+ module Picky
2
+
3
+ class Indexes
4
+
5
+ # An index that is persisted in Redis.
6
+ #
7
+ class Redis < Index
8
+
9
+ def indexing_bundle_class
10
+ Indexing::Bundle::Redis
11
+ end
12
+ def indexed_bundle_class
13
+ Indexed::Bundle::Redis
14
+ end
15
+
16
+ end
17
+
18
+ end
19
+
20
+ end