picky 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
  2. data/lib/picky/application.rb +18 -19
  3. data/lib/picky/cores.rb +1 -1
  4. data/lib/picky/generators/aliases.rb +3 -0
  5. data/lib/picky/index/base.rb +179 -0
  6. data/lib/picky/index/memory.rb +28 -0
  7. data/lib/picky/index/redis.rb +28 -0
  8. data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
  9. data/lib/picky/indexed/indexes.rb +11 -7
  10. data/lib/picky/indexing/indexes.rb +14 -8
  11. data/lib/picky/internals/adapters/rack/base.rb +27 -0
  12. data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
  13. data/lib/picky/internals/adapters/rack/query.rb +63 -0
  14. data/lib/picky/internals/adapters/rack.rb +34 -0
  15. data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
  16. data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
  17. data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
  18. data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
  19. data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
  20. data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
  21. data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
  22. data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
  23. data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
  24. data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
  25. data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
  26. data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
  27. data/lib/picky/internals/generators/base.rb +19 -0
  28. data/lib/picky/internals/generators/partial/default.rb +7 -0
  29. data/lib/picky/internals/generators/partial/none.rb +35 -0
  30. data/lib/picky/internals/generators/partial/strategy.rb +29 -0
  31. data/lib/picky/internals/generators/partial/substring.rb +122 -0
  32. data/lib/picky/internals/generators/partial_generator.rb +19 -0
  33. data/lib/picky/internals/generators/similarity/default.rb +9 -0
  34. data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
  35. data/lib/picky/internals/generators/similarity/none.rb +35 -0
  36. data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
  37. data/lib/picky/internals/generators/similarity_generator.rb +19 -0
  38. data/lib/picky/internals/generators/strategy.rb +18 -0
  39. data/lib/picky/internals/generators/weights/default.rb +9 -0
  40. data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
  41. data/lib/picky/internals/generators/weights/strategy.rb +11 -0
  42. data/lib/picky/internals/generators/weights_generator.rb +19 -0
  43. data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
  44. data/lib/picky/internals/index/backend.rb +113 -0
  45. data/lib/picky/internals/index/file/basic.rb +101 -0
  46. data/lib/picky/internals/index/file/json.rb +38 -0
  47. data/lib/picky/internals/index/file/marshal.rb +38 -0
  48. data/lib/picky/internals/index/file/text.rb +60 -0
  49. data/lib/picky/internals/index/files.rb +24 -0
  50. data/lib/picky/internals/index/redis/basic.rb +77 -0
  51. data/lib/picky/internals/index/redis/list_hash.rb +46 -0
  52. data/lib/picky/internals/index/redis/string_hash.rb +35 -0
  53. data/lib/picky/internals/index/redis.rb +44 -0
  54. data/lib/picky/internals/indexed/bundle/base.rb +72 -0
  55. data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
  56. data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
  57. data/lib/picky/internals/indexed/categories.rb +135 -0
  58. data/lib/picky/internals/indexed/category.rb +90 -0
  59. data/lib/picky/internals/indexed/index.rb +57 -0
  60. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
  61. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
  62. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
  63. data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
  64. data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
  65. data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
  66. data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
  67. data/lib/picky/internals/indexing/bundle/base.rb +219 -0
  68. data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
  69. data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
  70. data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
  71. data/lib/picky/internals/indexing/categories.rb +42 -0
  72. data/lib/picky/internals/indexing/category.rb +120 -0
  73. data/lib/picky/internals/indexing/index.rb +67 -0
  74. data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
  75. data/lib/picky/internals/query/allocation.rb +88 -0
  76. data/lib/picky/internals/query/allocations.rb +137 -0
  77. data/lib/picky/internals/query/combination.rb +80 -0
  78. data/lib/picky/internals/query/combinations/base.rb +84 -0
  79. data/lib/picky/internals/query/combinations/memory.rb +58 -0
  80. data/lib/picky/internals/query/combinations/redis.rb +59 -0
  81. data/lib/picky/internals/query/indexes.rb +180 -0
  82. data/lib/picky/internals/query/qualifiers.rb +81 -0
  83. data/lib/picky/internals/query/token.rb +215 -0
  84. data/lib/picky/internals/query/tokens.rb +89 -0
  85. data/lib/picky/{query → internals/query}/weights.rb +0 -0
  86. data/lib/picky/internals/results/base.rb +106 -0
  87. data/lib/picky/internals/results/full.rb +17 -0
  88. data/lib/picky/internals/results/live.rb +17 -0
  89. data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
  90. data/lib/picky/internals/tokenizers/base.rb +166 -0
  91. data/lib/picky/internals/tokenizers/index.rb +63 -0
  92. data/lib/picky/internals/tokenizers/query.rb +79 -0
  93. data/lib/picky/loader.rb +148 -112
  94. data/lib/picky/query/base.rb +57 -26
  95. data/lib/picky/query/full.rb +1 -1
  96. data/lib/picky/query/live.rb +1 -1
  97. data/lib/picky/sources/db.rb +27 -6
  98. data/lib/tasks/index.rake +3 -3
  99. data/lib/tasks/try.rake +2 -2
  100. data/spec/lib/aliases_spec.rb +9 -0
  101. data/spec/lib/application_spec.rb +3 -3
  102. data/spec/lib/generators/aliases_spec.rb +1 -0
  103. data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
  104. data/spec/lib/index_bundle_spec.rb +71 -0
  105. data/spec/lib/indexed/indexes_spec.rb +61 -0
  106. data/spec/lib/indexing/indexes_spec.rb +94 -24
  107. data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
  108. data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
  109. data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
  110. data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
  111. data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
  112. data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
  113. data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
  114. data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
  115. data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
  116. data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
  117. data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
  118. data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
  119. data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
  120. data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
  121. data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
  122. data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
  123. data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
  124. data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
  125. data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
  126. data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
  127. data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
  128. data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
  129. data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
  130. data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
  131. data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
  132. data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
  133. data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
  134. data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
  135. data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
  136. data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
  137. data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
  138. data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
  139. data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
  140. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
  141. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
  142. data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
  143. data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
  144. data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
  145. data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
  146. data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
  147. data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
  148. data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
  149. data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
  150. data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
  151. data/spec/lib/internals/results/base_spec.rb +105 -0
  152. data/spec/lib/internals/results/full_spec.rb +78 -0
  153. data/spec/lib/internals/results/live_spec.rb +88 -0
  154. data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
  155. data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
  156. data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
  157. data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
  158. data/spec/lib/query/allocation_spec.rb +12 -12
  159. data/spec/lib/query/allocations_spec.rb +19 -19
  160. data/spec/lib/query/base_spec.rb +28 -4
  161. data/spec/lib/query/combination_spec.rb +8 -9
  162. data/spec/lib/query/combinations/base_spec.rb +116 -0
  163. data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
  164. data/spec/lib/query/combinations/redis_spec.rb +132 -0
  165. data/spec/lib/query/full_spec.rb +2 -2
  166. data/spec/lib/query/indexes_spec.rb +81 -0
  167. data/spec/lib/query/live_spec.rb +3 -3
  168. data/spec/lib/query/qualifiers_spec.rb +6 -6
  169. data/spec/lib/query/token_spec.rb +38 -38
  170. data/spec/lib/query/tokens_spec.rb +35 -35
  171. data/spec/lib/sources/db_spec.rb +23 -18
  172. metadata +212 -181
  173. data/lib/picky/adapters/rack/base.rb +0 -23
  174. data/lib/picky/adapters/rack/live_parameters.rb +0 -33
  175. data/lib/picky/adapters/rack/query.rb +0 -59
  176. data/lib/picky/adapters/rack.rb +0 -28
  177. data/lib/picky/cacher/convenience.rb +0 -3
  178. data/lib/picky/cacher/generator.rb +0 -15
  179. data/lib/picky/cacher/partial/default.rb +0 -5
  180. data/lib/picky/cacher/partial/none.rb +0 -31
  181. data/lib/picky/cacher/partial/strategy.rb +0 -21
  182. data/lib/picky/cacher/partial/substring.rb +0 -118
  183. data/lib/picky/cacher/partial_generator.rb +0 -15
  184. data/lib/picky/cacher/similarity/default.rb +0 -7
  185. data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
  186. data/lib/picky/cacher/similarity/none.rb +0 -31
  187. data/lib/picky/cacher/similarity/strategy.rb +0 -9
  188. data/lib/picky/cacher/similarity_generator.rb +0 -15
  189. data/lib/picky/cacher/strategy.rb +0 -12
  190. data/lib/picky/cacher/weights/default.rb +0 -7
  191. data/lib/picky/cacher/weights/logarithmic.rb +0 -39
  192. data/lib/picky/cacher/weights/strategy.rb +0 -9
  193. data/lib/picky/cacher/weights_generator.rb +0 -15
  194. data/lib/picky/frontend_adapters/rack.rb +0 -150
  195. data/lib/picky/index/bundle.rb +0 -54
  196. data/lib/picky/index/file/basic.rb +0 -97
  197. data/lib/picky/index/file/json.rb +0 -34
  198. data/lib/picky/index/file/marshal.rb +0 -34
  199. data/lib/picky/index/file/text.rb +0 -56
  200. data/lib/picky/index/files.rb +0 -118
  201. data/lib/picky/index_api.rb +0 -175
  202. data/lib/picky/indexed/bundle.rb +0 -54
  203. data/lib/picky/indexed/categories.rb +0 -131
  204. data/lib/picky/indexed/category.rb +0 -85
  205. data/lib/picky/indexed/index.rb +0 -39
  206. data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
  207. data/lib/picky/indexing/bundle.rb +0 -213
  208. data/lib/picky/indexing/categories.rb +0 -38
  209. data/lib/picky/indexing/category.rb +0 -117
  210. data/lib/picky/indexing/index.rb +0 -55
  211. data/lib/picky/query/allocation.rb +0 -82
  212. data/lib/picky/query/allocations.rb +0 -130
  213. data/lib/picky/query/combination.rb +0 -74
  214. data/lib/picky/query/combinations.rb +0 -105
  215. data/lib/picky/query/qualifiers.rb +0 -77
  216. data/lib/picky/query/token.rb +0 -202
  217. data/lib/picky/query/tokens.rb +0 -86
  218. data/lib/picky/query/weigher.rb +0 -165
  219. data/lib/picky/results/base.rb +0 -102
  220. data/lib/picky/results/full.rb +0 -13
  221. data/lib/picky/results/live.rb +0 -13
  222. data/lib/picky/tokenizers/base.rb +0 -161
  223. data/lib/picky/tokenizers/index.rb +0 -58
  224. data/lib/picky/tokenizers/query.rb +0 -74
  225. data/spec/lib/cacher/partial/default_spec.rb +0 -15
  226. data/spec/lib/cacher/partial/none_spec.rb +0 -17
  227. data/spec/lib/cacher/weights_generator_spec.rb +0 -21
  228. data/spec/lib/results/base_spec.rb +0 -257
  229. data/spec/lib/results/live_spec.rb +0 -15
@@ -1,165 +0,0 @@
1
- module Query
2
-
3
- # Weighs the given tokens, generates Allocations -> Allocation -> Combinations.
4
- #
5
- class Weigher # :nodoc:all
6
-
7
- attr_reader :indexes
8
-
9
- # A weigher has a number of typed indexes, for which it generates allocations.
10
- #
11
- def initialize types
12
- @indexes = types
13
- end
14
-
15
- #
16
- #
17
- def allocations_for tokens
18
- Allocations.new(indexes.inject([]) do |previous_allocations, index|
19
- # Expand the combinations.
20
- #
21
- possible_combinations = tokens.possible_combinations_in index
22
-
23
- # Optimization for ignoring tokens that allocate to nothing and
24
- # can be ignored.
25
- # For example in a special search, where "florian" is not
26
- # mapped to city, zip, or category.
27
- #
28
- possible_combinations.compact!
29
- expanded_combinations = expand_combinations_from possible_combinations
30
-
31
- #
32
- #
33
- next previous_allocations if expanded_combinations.empty?
34
-
35
- # The recombination part, where
36
- # [
37
- # [a,a,b,b,c,c]
38
- # [d,e,d,e,d,e]
39
- # ]
40
- # becomes
41
- # [
42
- # [a,d],
43
- # [a,e],
44
- # [b,d],
45
- # [b,e],
46
- # [c,d],
47
- # [c,e]
48
- # ]
49
- #
50
- # TODO Use transpose?
51
- #
52
- expanded_combinations = expanded_combinations.shift.zip *expanded_combinations
53
-
54
- # Wrap into a real combination.
55
- #
56
- # expanded_combinations.map! { |expanded_combination| Combinations.new(expanded_combination).pack_into_allocation(index.result_identifier) }
57
-
58
- # Add the possible allocations to the ones we already have.
59
- #
60
- # previous_allocations + expanded_combinations.map(&:pack_into_allocation)
61
-
62
-
63
- # Add the wrapped possible allocations to the ones we already have.
64
- #
65
- previous_allocations + expanded_combinations.map! do |expanded_combination|
66
- Combinations.new(expanded_combination).pack_into_allocation(index.result_identifier) # TODO Do not extract result_identifier. Remove pack_into_allocation.
67
- end
68
- end)
69
- end
70
-
71
- # This is the core of the search engine.
72
- #
73
- # Gets an array of
74
- # [
75
- # [<combinations for token1>],
76
- # [<combinations for token2>],
77
- # [<combinations for token3>]
78
- # ]
79
- #
80
- # Generates all possible allocations of combinations.
81
- # [
82
- # [first combination of token1, first c of t2, first c of t3],
83
- # [first combination of token1, first c of t2, second c of t3]
84
- # ...
85
- # ]
86
- #
87
- # Generates all possible combinations of array elements:
88
- # [1,2,3] x [a,b,c] x [k,l,m] => [[1,a,k], [1,a,l], [1,a,m], [1,b,k], [1,b,l], [1,b,m], [1,c,k], ..., [3,c,m]]
89
- # Note: Also calculates the weights and sorts them accordingly.
90
- #
91
- # Note: This is a heavily optimized ruby version.
92
- #
93
- # Works like this:
94
- # [1,2,3], [a,b,c], [k,l,m] are expanded to
95
- # group mult: 1
96
- # <- single mult ->
97
- # [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3] = 27 elements
98
- # group mult: 3
99
- # <- -> s/m
100
- # [a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c] = 27 elements
101
- # group mult: 9
102
- # <> s/m
103
- # [k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m] = 27 elements
104
- # The array elements are then combined by index (i.e. vertically) to get all combinations.
105
- #
106
- # Note: Of course I could split this method up into smaller
107
- # ones, but I guess I am a bit sentimental.
108
- #
109
- def expand_combinations_from possible_combinations
110
- # Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
111
- #
112
- # TODO How does this work if an element has size 0? Since below we account for size 0.
113
- # Should we even continue if an element has size 0?
114
- # This means one of the tokens cannot be allocated.
115
- #
116
- single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
117
-
118
- # Initialize a group multiplicator.
119
- #
120
- group_mult = 1
121
-
122
- possible_combinations.reject!(&:empty?)
123
-
124
- # The expanding part to line up the combinations
125
- # for later combination in allocations.
126
- #
127
- possible_combinations.collect! do |combinations|
128
-
129
- # Get the size of the combinations of the first token.
130
- #
131
- combinations_size = combinations.size
132
-
133
- # Special case: If there is no combination for one of the tokens.
134
- # In that case, we just use the same single mult for
135
- # the next iteration.
136
- # If there are combinations, we divide the single mult
137
- # by the number of combinations.
138
- #
139
- single_mult /= combinations_size unless combinations_size.zero?
140
-
141
- # Expand each combination by the single mult:
142
- # [a,b,c]
143
- # [a,a,a, b,b,b, c,c,c]
144
- # Then, expand the result by the group mult:
145
- # [a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c]
146
- #
147
- combinations = combinations.inject([]) do |total, combination|
148
- total + [combination]*single_mult
149
- end * group_mult
150
-
151
- # Multiply the group mult by the combinations size,
152
- # since the next combinations' single mult is smaller
153
- # and we need to adjust for that.
154
- #
155
- group_mult = group_mult * combinations_size
156
-
157
- # Return the combinations.
158
- #
159
- combinations
160
- end
161
- end
162
-
163
- end
164
-
165
- end
@@ -1,102 +0,0 @@
1
- module Results # :nodoc:all
2
-
3
- # This is the internal results object. Usually, to_marshal, or to_json
4
- # is called on it to get a string for the answer.
5
- #
6
- class Base
7
-
8
- # Duration is set externally by the query.
9
- #
10
- attr_writer :duration
11
- attr_reader :allocations, :offset
12
-
13
- # Takes instances of Query::Allocations as param.
14
- #
15
- def initialize offset = 0, allocations = Query::Allocations.new
16
- @offset = offset
17
- @allocations = allocations # || Query::Allocations.new
18
- end
19
- # Create new results and calculate the ids.
20
- #
21
- def self.from offset, allocations
22
- results = new offset, allocations
23
- results.prepare!
24
- results
25
- end
26
-
27
- #
28
- #
29
- def serialize
30
- { allocations: allocations.to_result,
31
- offset: offset,
32
- duration: duration,
33
- total: total }
34
- end
35
- # The default format is json.
36
- #
37
- def to_response options = {}
38
- to_json options
39
- end
40
- # Convert to json format.
41
- #
42
- def to_json options = {}
43
- serialize.to_json options
44
- end
45
-
46
- # This starts the actual processing.
47
- #
48
- # Without this, the allocations are not processed,
49
- # and no ids are calculated.
50
- #
51
- def prepare!
52
- allocations.process! self.max_results, self.offset
53
- end
54
-
55
- # Duration default is 0.
56
- #
57
- def duration
58
- @duration || 0
59
- end
60
- # The total results. Delegates to the allocations.
61
- #
62
- # Caches.
63
- #
64
- def total
65
- @total || @total = allocations.total || 0
66
- end
67
-
68
- # How many results are returned.
69
- #
70
- # Set in config using
71
- # Results::Full.max_results = 20
72
- #
73
- class_inheritable_accessor :max_results
74
- def max_results
75
- self.class.max_results
76
- end
77
-
78
- # Convenience methods.
79
- #
80
-
81
- # Delegates to allocations.
82
- #
83
- def ids amount = 20
84
- allocations.ids amount
85
- end
86
- # Gets an amout of random ids from the allocations.
87
- #
88
- # Note: Basically delegates to the allocations.
89
- #
90
- def random_ids amount = 1
91
- allocations.random_ids amount
92
- end
93
-
94
- # Human readable log.
95
- #
96
- def to_log query
97
- "|#{Time.now.to_s(:db)}|#{'%8f' % duration}|#{'%-50s' % query}|#{'%8d' % total}|#{'%4d' % offset}|#{'%2d' % allocations.size}|"
98
- end
99
-
100
- end
101
-
102
- end
@@ -1,13 +0,0 @@
1
- module Results
2
- # Full results are limited to maximally 20 results (by default).
3
- #
4
- class Full < Base
5
-
6
- self.max_results = 20
7
-
8
- def to_log *args
9
- ?> + super
10
- end
11
-
12
- end
13
- end
@@ -1,13 +0,0 @@
1
- module Results
2
- # Live results are not returning any results.
3
- #
4
- class Live < Base
5
-
6
- self.max_results = 0
7
-
8
- def to_log *args
9
- ?. + super
10
- end
11
-
12
- end
13
- end
@@ -1,161 +0,0 @@
1
- module Tokenizers # :nodoc:all
2
-
3
- # Defines tokenizing processes used both in indexing and querying.
4
- #
5
- class Base
6
-
7
- # TODO Move EMPTY_STRING top level.
8
- #
9
- EMPTY_STRING = ''.freeze
10
-
11
- # Stopwords.
12
- #
13
- def stopwords regexp
14
- @remove_stopwords_regexp = regexp
15
- end
16
- def remove_stopwords text
17
- text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
18
- text
19
- end
20
- @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
21
- def remove_non_single_stopwords text
22
- return text if text.match @@non_single_stopword_regexp
23
- remove_stopwords text
24
- end
25
-
26
- # Illegals.
27
- #
28
- # TODO Should there be a legal?
29
- #
30
- def removes_characters regexp
31
- @removes_characters_regexp = regexp
32
- end
33
- def remove_illegals text
34
- text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
35
- text
36
- end
37
-
38
- # Splitting.
39
- #
40
- def splits_text_on regexp
41
- @splits_text_on_regexp = regexp
42
- end
43
- def split text
44
- text.split @splits_text_on_regexp
45
- end
46
-
47
- # Normalizing.
48
- #
49
- def normalizes_words regexp_replaces
50
- @normalizes_words_regexp_replaces = regexp_replaces
51
- end
52
- def normalize_with_patterns text
53
- return text unless @normalizes_words_regexp_replaces
54
-
55
- @normalizes_words_regexp_replaces.each do |regex, replace|
56
- # This should be sufficient
57
- #
58
- text.gsub!(regex, replace) and break
59
- end
60
- remove_after_normalizing_illegals text
61
- text
62
- end
63
-
64
- # Illegal after normalizing.
65
- #
66
- def removes_characters_after_splitting regexp
67
- @removes_characters_after_splitting_regexp = regexp
68
- end
69
- def remove_after_normalizing_illegals text
70
- text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
71
- end
72
-
73
- # Substitute Characters with this substituter.
74
- #
75
- # Default is European Character substitution.
76
- #
77
- def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
78
- # TODO Raise if it doesn't quack substitute?
79
- @substituter = substituter
80
- end
81
- def substitute_characters text
82
- substituter?? substituter.substitute(text) : text
83
- end
84
-
85
- # Reject tokens after tokenizing based on the given criteria.
86
- #
87
- # Note: Currently only for indexing. TODO Redesign and write for both!
88
- #
89
- def reject_token_if &condition
90
- @reject_condition = condition
91
- end
92
- def reject tokens
93
- tokens.reject! &@reject_condition
94
- end
95
-
96
-
97
- # Returns a number of tokens, generated from the given text.
98
- #
99
- # Note:
100
- # * preprocess, pretokenize are hooks
101
- #
102
- def tokenize text
103
- text = preprocess text # processing the text
104
- return empty_tokens if text.blank?
105
- words = pretokenize text # splitting and preparations for tokenizing
106
- return empty_tokens if words.empty?
107
- tokens = tokens_for words # creating tokens / strings
108
- process tokens # processing tokens / strings
109
- end
110
-
111
- attr_reader :substituter
112
- alias substituter? substituter
113
-
114
- def initialize options = {}
115
- removes_characters options[:removes_characters] if options[:removes_characters]
116
- contracts_expressions *options[:contracts_expressions] if options[:contracts_expressions]
117
- stopwords options[:stopwords] if options[:stopwords]
118
- normalizes_words options[:normalizes_words] if options[:normalizes_words]
119
- removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
120
- substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
121
-
122
- # Defaults.
123
- #
124
- splits_text_on options[:splits_text_on] || /\s/
125
- reject_token_if &(options[:reject_token_if] || :blank?)
126
- end
127
-
128
- # Hooks.
129
- #
130
-
131
- # Preprocessing.
132
- #
133
- def preprocess text; end
134
- # Pretokenizing.
135
- #
136
- def pretokenize text; end
137
- # Postprocessing.
138
- #
139
- def process tokens
140
- reject tokens # Reject any tokens that don't meet criteria
141
- tokens
142
- end
143
-
144
- # Converts words into real tokens.
145
- #
146
- def tokens_for words
147
- ::Query::Tokens.new words.collect! { |word| token_for word }
148
- end
149
- # Turns non-blank text into symbols.
150
- #
151
- def symbolize text
152
- text.blank? ? nil : text.to_sym
153
- end
154
- # Returns a tokens object.
155
- #
156
- def empty_tokens
157
- ::Query::Tokens.new
158
- end
159
-
160
- end
161
- end
@@ -1,58 +0,0 @@
1
- module Tokenizers
2
-
3
- # The base indexing tokenizer.
4
- #
5
- # Override in indexing subclasses and define in configuration.
6
- #
7
- class Index < Base
8
-
9
- def self.default= new_default
10
- @default = new_default
11
- end
12
- def self.default
13
- @default ||= new
14
- end
15
-
16
- # Default indexing preprocessing hook.
17
- #
18
- # Does:
19
- # 1. Character substitution.
20
- # 2. Downcasing.
21
- # 3. Remove illegal expressions.
22
- # 4. Remove non-single stopwords. (Stopwords that occur with other words)
23
- #
24
- def preprocess text
25
- text = substitute_characters text
26
- text.downcase!
27
- remove_illegals text
28
- # we do not remove single stopwords for an entirely different
29
- # reason than in the query tokenizer.
30
- # An indexed thing with just name "UND" (a possible stopword) should not lose its name.
31
- #
32
- remove_non_single_stopwords text
33
- text
34
- end
35
-
36
- # Default indexing pretokenizing hook.
37
- #
38
- # Does:
39
- # 1. Split the text into words.
40
- # 2. Normalize each word.
41
- #
42
- def pretokenize text
43
- words = split text
44
- words.collect! do |word|
45
- normalize_with_patterns word
46
- word
47
- end
48
- end
49
-
50
- # Does not actually return a token, but a
51
- # symbol "token".
52
- #
53
- def token_for text
54
- symbolize text
55
- end
56
-
57
- end
58
- end
@@ -1,74 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Tokenizers
4
-
5
- # There are a few class methods that you can use to configure how a query works.
6
- #
7
- # removes_characters regexp
8
- # illegal_after_normalizing regexp
9
- # stopwords regexp
10
- # contracts_expressions regexp, to_string
11
- # splits_text_on regexp
12
- # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
13
- #
14
- class Query < Base
15
-
16
- def self.default= new_default
17
- @default = new_default
18
- end
19
- def self.default
20
- @default ||= new
21
- end
22
-
23
- attr_reader :maximum_tokens
24
-
25
- def initialize options = {}
26
- super options
27
- @maximum_tokens = options[:maximum_tokens] || 5
28
- end
29
-
30
- def preprocess text
31
- remove_illegals text # Remove illegal characters
32
- remove_non_single_stopwords text # remove stop words
33
- text
34
- end
35
-
36
- # Split the text and put some back together.
37
- #
38
- # TODO Make the same as in indexing?
39
- #
40
- def pretokenize text
41
- split text
42
- end
43
-
44
- # Let each token process itself.
45
- # Reject, limit, and partialize tokens.
46
- #
47
- def process tokens
48
- tokens.tokenize_with self
49
- tokens.reject # Reject any tokens that don't meet criteria
50
- tokens.cap maximum_tokens # Cut off superfluous tokens
51
- tokens.partialize_last # Set certain tokens as partial
52
- tokens
53
- end
54
-
55
- # Called by the token.
56
- #
57
- # TODO Perhaps move to Normalizer?
58
- #
59
- def normalize text
60
- text = substitute_characters text # Substitute special characters
61
- text.downcase! # Downcase all text
62
- normalize_with_patterns text # normalize
63
- text.to_sym # symbolize
64
- end
65
-
66
- # Returns a token for a word.
67
- # The basic query tokenizer uses new tokens.
68
- #
69
- def token_for word
70
- ::Query::Token.processed word
71
- end
72
-
73
- end
74
- end
@@ -1,15 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Cacher::Partial::Default do
4
-
5
- it "should be a subtoken" do
6
- Cacher::Partial::Default.should be_kind_of(Cacher::Partial::Substring)
7
- end
8
- it "should be a the right down to" do
9
- Cacher::Partial::Default.from.should == -3
10
- end
11
- it "should be a the right starting at" do
12
- Cacher::Partial::Default.to.should == -1
13
- end
14
-
15
- end
@@ -1,17 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Cacher::Partial::None do
4
-
5
- it "has the right superclass" do
6
- Cacher::Partial::None.should < Cacher::Partial::Strategy
7
- end
8
- it "returns an empty index" do
9
- Cacher::Partial::None.new.generate_from(:unimportant).should == {}
10
- end
11
- describe 'use_exact_for_partial?' do
12
- it 'returns true' do
13
- Cacher::Partial::None.new.use_exact_for_partial?.should == true
14
- end
15
- end
16
-
17
- end
@@ -1,21 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Cacher::WeightsGenerator do
4
-
5
- context 'integration' do
6
- it 'should generate the correct values' do
7
- generator = Cacher::WeightsGenerator.new :a => Array.new(0),
8
- :b => Array.new(1),
9
- :c => Array.new(10),
10
- :d => Array.new(100),
11
- :e => Array.new(1000)
12
-
13
- result = generator.generate
14
-
15
- result[:c].should be_close 2.3, 0.011
16
- result[:d].should be_close 4.6, 0.011
17
- result[:e].should be_close 6.9, 0.011
18
- end
19
- end
20
-
21
- end