picky 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
  2. data/lib/picky/application.rb +18 -19
  3. data/lib/picky/cores.rb +1 -1
  4. data/lib/picky/generators/aliases.rb +3 -0
  5. data/lib/picky/index/base.rb +179 -0
  6. data/lib/picky/index/memory.rb +28 -0
  7. data/lib/picky/index/redis.rb +28 -0
  8. data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
  9. data/lib/picky/indexed/indexes.rb +11 -7
  10. data/lib/picky/indexing/indexes.rb +14 -8
  11. data/lib/picky/internals/adapters/rack/base.rb +27 -0
  12. data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
  13. data/lib/picky/internals/adapters/rack/query.rb +63 -0
  14. data/lib/picky/internals/adapters/rack.rb +34 -0
  15. data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
  16. data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
  17. data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
  18. data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
  19. data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
  20. data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
  21. data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
  22. data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
  23. data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
  24. data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
  25. data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
  26. data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
  27. data/lib/picky/internals/generators/base.rb +19 -0
  28. data/lib/picky/internals/generators/partial/default.rb +7 -0
  29. data/lib/picky/internals/generators/partial/none.rb +35 -0
  30. data/lib/picky/internals/generators/partial/strategy.rb +29 -0
  31. data/lib/picky/internals/generators/partial/substring.rb +122 -0
  32. data/lib/picky/internals/generators/partial_generator.rb +19 -0
  33. data/lib/picky/internals/generators/similarity/default.rb +9 -0
  34. data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
  35. data/lib/picky/internals/generators/similarity/none.rb +35 -0
  36. data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
  37. data/lib/picky/internals/generators/similarity_generator.rb +19 -0
  38. data/lib/picky/internals/generators/strategy.rb +18 -0
  39. data/lib/picky/internals/generators/weights/default.rb +9 -0
  40. data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
  41. data/lib/picky/internals/generators/weights/strategy.rb +11 -0
  42. data/lib/picky/internals/generators/weights_generator.rb +19 -0
  43. data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
  44. data/lib/picky/internals/index/backend.rb +113 -0
  45. data/lib/picky/internals/index/file/basic.rb +101 -0
  46. data/lib/picky/internals/index/file/json.rb +38 -0
  47. data/lib/picky/internals/index/file/marshal.rb +38 -0
  48. data/lib/picky/internals/index/file/text.rb +60 -0
  49. data/lib/picky/internals/index/files.rb +24 -0
  50. data/lib/picky/internals/index/redis/basic.rb +77 -0
  51. data/lib/picky/internals/index/redis/list_hash.rb +46 -0
  52. data/lib/picky/internals/index/redis/string_hash.rb +35 -0
  53. data/lib/picky/internals/index/redis.rb +44 -0
  54. data/lib/picky/internals/indexed/bundle/base.rb +72 -0
  55. data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
  56. data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
  57. data/lib/picky/internals/indexed/categories.rb +135 -0
  58. data/lib/picky/internals/indexed/category.rb +90 -0
  59. data/lib/picky/internals/indexed/index.rb +57 -0
  60. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
  61. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
  62. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
  63. data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
  64. data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
  65. data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
  66. data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
  67. data/lib/picky/internals/indexing/bundle/base.rb +219 -0
  68. data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
  69. data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
  70. data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
  71. data/lib/picky/internals/indexing/categories.rb +42 -0
  72. data/lib/picky/internals/indexing/category.rb +120 -0
  73. data/lib/picky/internals/indexing/index.rb +67 -0
  74. data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
  75. data/lib/picky/internals/query/allocation.rb +88 -0
  76. data/lib/picky/internals/query/allocations.rb +137 -0
  77. data/lib/picky/internals/query/combination.rb +80 -0
  78. data/lib/picky/internals/query/combinations/base.rb +84 -0
  79. data/lib/picky/internals/query/combinations/memory.rb +58 -0
  80. data/lib/picky/internals/query/combinations/redis.rb +59 -0
  81. data/lib/picky/internals/query/indexes.rb +180 -0
  82. data/lib/picky/internals/query/qualifiers.rb +81 -0
  83. data/lib/picky/internals/query/token.rb +215 -0
  84. data/lib/picky/internals/query/tokens.rb +89 -0
  85. data/lib/picky/{query → internals/query}/weights.rb +0 -0
  86. data/lib/picky/internals/results/base.rb +106 -0
  87. data/lib/picky/internals/results/full.rb +17 -0
  88. data/lib/picky/internals/results/live.rb +17 -0
  89. data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
  90. data/lib/picky/internals/tokenizers/base.rb +166 -0
  91. data/lib/picky/internals/tokenizers/index.rb +63 -0
  92. data/lib/picky/internals/tokenizers/query.rb +79 -0
  93. data/lib/picky/loader.rb +148 -112
  94. data/lib/picky/query/base.rb +57 -26
  95. data/lib/picky/query/full.rb +1 -1
  96. data/lib/picky/query/live.rb +1 -1
  97. data/lib/picky/sources/db.rb +27 -6
  98. data/lib/tasks/index.rake +3 -3
  99. data/lib/tasks/try.rake +2 -2
  100. data/spec/lib/aliases_spec.rb +9 -0
  101. data/spec/lib/application_spec.rb +3 -3
  102. data/spec/lib/generators/aliases_spec.rb +1 -0
  103. data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
  104. data/spec/lib/index_bundle_spec.rb +71 -0
  105. data/spec/lib/indexed/indexes_spec.rb +61 -0
  106. data/spec/lib/indexing/indexes_spec.rb +94 -24
  107. data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
  108. data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
  109. data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
  110. data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
  111. data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
  112. data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
  113. data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
  114. data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
  115. data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
  116. data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
  117. data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
  118. data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
  119. data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
  120. data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
  121. data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
  122. data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
  123. data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
  124. data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
  125. data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
  126. data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
  127. data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
  128. data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
  129. data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
  130. data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
  131. data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
  132. data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
  133. data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
  134. data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
  135. data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
  136. data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
  137. data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
  138. data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
  139. data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
  140. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
  141. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
  142. data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
  143. data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
  144. data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
  145. data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
  146. data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
  147. data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
  148. data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
  149. data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
  150. data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
  151. data/spec/lib/internals/results/base_spec.rb +105 -0
  152. data/spec/lib/internals/results/full_spec.rb +78 -0
  153. data/spec/lib/internals/results/live_spec.rb +88 -0
  154. data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
  155. data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
  156. data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
  157. data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
  158. data/spec/lib/query/allocation_spec.rb +12 -12
  159. data/spec/lib/query/allocations_spec.rb +19 -19
  160. data/spec/lib/query/base_spec.rb +28 -4
  161. data/spec/lib/query/combination_spec.rb +8 -9
  162. data/spec/lib/query/combinations/base_spec.rb +116 -0
  163. data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
  164. data/spec/lib/query/combinations/redis_spec.rb +132 -0
  165. data/spec/lib/query/full_spec.rb +2 -2
  166. data/spec/lib/query/indexes_spec.rb +81 -0
  167. data/spec/lib/query/live_spec.rb +3 -3
  168. data/spec/lib/query/qualifiers_spec.rb +6 -6
  169. data/spec/lib/query/token_spec.rb +38 -38
  170. data/spec/lib/query/tokens_spec.rb +35 -35
  171. data/spec/lib/sources/db_spec.rb +23 -18
  172. metadata +212 -181
  173. data/lib/picky/adapters/rack/base.rb +0 -23
  174. data/lib/picky/adapters/rack/live_parameters.rb +0 -33
  175. data/lib/picky/adapters/rack/query.rb +0 -59
  176. data/lib/picky/adapters/rack.rb +0 -28
  177. data/lib/picky/cacher/convenience.rb +0 -3
  178. data/lib/picky/cacher/generator.rb +0 -15
  179. data/lib/picky/cacher/partial/default.rb +0 -5
  180. data/lib/picky/cacher/partial/none.rb +0 -31
  181. data/lib/picky/cacher/partial/strategy.rb +0 -21
  182. data/lib/picky/cacher/partial/substring.rb +0 -118
  183. data/lib/picky/cacher/partial_generator.rb +0 -15
  184. data/lib/picky/cacher/similarity/default.rb +0 -7
  185. data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
  186. data/lib/picky/cacher/similarity/none.rb +0 -31
  187. data/lib/picky/cacher/similarity/strategy.rb +0 -9
  188. data/lib/picky/cacher/similarity_generator.rb +0 -15
  189. data/lib/picky/cacher/strategy.rb +0 -12
  190. data/lib/picky/cacher/weights/default.rb +0 -7
  191. data/lib/picky/cacher/weights/logarithmic.rb +0 -39
  192. data/lib/picky/cacher/weights/strategy.rb +0 -9
  193. data/lib/picky/cacher/weights_generator.rb +0 -15
  194. data/lib/picky/frontend_adapters/rack.rb +0 -150
  195. data/lib/picky/index/bundle.rb +0 -54
  196. data/lib/picky/index/file/basic.rb +0 -97
  197. data/lib/picky/index/file/json.rb +0 -34
  198. data/lib/picky/index/file/marshal.rb +0 -34
  199. data/lib/picky/index/file/text.rb +0 -56
  200. data/lib/picky/index/files.rb +0 -118
  201. data/lib/picky/index_api.rb +0 -175
  202. data/lib/picky/indexed/bundle.rb +0 -54
  203. data/lib/picky/indexed/categories.rb +0 -131
  204. data/lib/picky/indexed/category.rb +0 -85
  205. data/lib/picky/indexed/index.rb +0 -39
  206. data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
  207. data/lib/picky/indexing/bundle.rb +0 -213
  208. data/lib/picky/indexing/categories.rb +0 -38
  209. data/lib/picky/indexing/category.rb +0 -117
  210. data/lib/picky/indexing/index.rb +0 -55
  211. data/lib/picky/query/allocation.rb +0 -82
  212. data/lib/picky/query/allocations.rb +0 -130
  213. data/lib/picky/query/combination.rb +0 -74
  214. data/lib/picky/query/combinations.rb +0 -105
  215. data/lib/picky/query/qualifiers.rb +0 -77
  216. data/lib/picky/query/token.rb +0 -202
  217. data/lib/picky/query/tokens.rb +0 -86
  218. data/lib/picky/query/weigher.rb +0 -165
  219. data/lib/picky/results/base.rb +0 -102
  220. data/lib/picky/results/full.rb +0 -13
  221. data/lib/picky/results/live.rb +0 -13
  222. data/lib/picky/tokenizers/base.rb +0 -161
  223. data/lib/picky/tokenizers/index.rb +0 -58
  224. data/lib/picky/tokenizers/query.rb +0 -74
  225. data/spec/lib/cacher/partial/default_spec.rb +0 -15
  226. data/spec/lib/cacher/partial/none_spec.rb +0 -17
  227. data/spec/lib/cacher/weights_generator_spec.rb +0 -21
  228. data/spec/lib/results/base_spec.rb +0 -257
  229. data/spec/lib/results/live_spec.rb +0 -15
@@ -1,130 +0,0 @@
1
- module Query
2
- # Container class for allocations.
3
- #
4
- class Allocations # :nodoc:all
5
-
6
- # TODO Remove size
7
- #
8
- delegate :each, :inject, :empty?, :size, :to => :@allocations
9
- attr_reader :total
10
-
11
- def initialize allocations = []
12
- @allocations = allocations
13
- end
14
-
15
- # Score each allocation.
16
- #
17
- def calculate_score weights
18
- @allocations.each do |allocation|
19
- allocation.calculate_score weights
20
- end
21
- end
22
- # Sort the allocations.
23
- #
24
- def sort
25
- @allocations.sort!
26
- end
27
-
28
- # Reduces the amount of allocations to x.
29
- #
30
- def reduce_to amount
31
- @allocations = @allocations.shift amount
32
- end
33
-
34
- # Keeps combinations.
35
- #
36
- # Only those passed in remain.
37
- #
38
- def keep identifiers = []
39
- @allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
40
- end
41
- # Removes combinations.
42
- #
43
- # Only those passed in are removed.
44
- #
45
- # TODO Rewrite
46
- #
47
- def remove identifiers = []
48
- @allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
49
- end
50
-
51
- # Returns the top amount ids.
52
- #
53
- def ids amount = 20
54
- @allocations.inject([]) do |total, allocation|
55
- total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
56
- end
57
- end
58
-
59
- # Returns a random id from the allocations.
60
- #
61
- # Note: This is an ok algorithm for small id sets.
62
- #
63
- # But still TODO try for a faster one.
64
- #
65
- def random_ids amount = 1
66
- return [] if @allocations.empty?
67
- ids = @allocations.first.ids
68
- indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
69
- indexes.first(amount).map { |id| ids[id] }
70
- end
71
-
72
- # This is the main method of this class that will replace ids and count.
73
- #
74
- # What it does is calculate the ids and counts of its allocations
75
- # for being used in the results. It also calculates the total
76
- #
77
- # Parameters:
78
- # * amount: the amount of ids to calculate
79
- # * offset: the offset from where in the result set to take the ids
80
- #
81
- # Note: With an amount of 0, an offset > 0 doesn't make much
82
- # sense, as seen in the live search.
83
- #
84
- # Note: Each allocation caches its count, but not its ids (thrown away).
85
- # The ids are cached in this class.
86
- #
87
- # Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
88
- #
89
- def process! amount, offset = 0
90
- @total = 0
91
- current_offset = 0
92
- @allocations.each do |allocation|
93
- ids = allocation.process! amount, offset
94
- @total = @total + allocation.count # the total mixed in
95
- if ids.empty?
96
- offset = offset - allocation.count unless offset.zero?
97
- else
98
- amount = amount - ids.size # we need less results from the following allocation
99
- offset = 0 # we have already passed the offset
100
- end
101
- end
102
- end
103
-
104
- def uniq
105
- @allocations.uniq!
106
- end
107
-
108
- def to_a
109
- @allocations
110
- end
111
-
112
- # Simply inspects the internal allocations.
113
- #
114
- def to_s
115
- @allocations.inspect
116
- end
117
-
118
- # Allocations for results are in the form:
119
- # [
120
- # allocation1.to_result,
121
- # allocation2.to_result
122
- # ...
123
- # ]
124
- #
125
- def to_result
126
- @allocations.map(&:to_result).compact
127
- end
128
-
129
- end
130
- end
@@ -1,74 +0,0 @@
1
- module Query
2
-
3
- # Describes the combination of a token (the text) and
4
- # the index (the bundle).
5
- #
6
- # A combination is a single part of an allocation.
7
- #
8
- # An allocation consists of a number of combinations.
9
- #
10
- class Combination # :nodoc:all
11
-
12
- attr_reader :token, :bundle, :category_name
13
-
14
- def initialize token, category
15
- @token = token
16
- @category_name = category.name
17
- @bundle = category.bundle_for token
18
- @text = @token.text # don't want to use reset_similar already
19
- end
20
-
21
- # Note: Required for uniq!
22
- #
23
- def hash
24
- [@token.to_s, @bundle].hash
25
- end
26
-
27
- # Returns the weight of this combination.
28
- #
29
- # Note: Caching is most oft the time useful.
30
- #
31
- def weight
32
- @weight ||= @bundle.weight(@text)
33
- end
34
-
35
- # Returns an array of ids for the given text.
36
- #
37
- # Note: Caching is most oft the time useful.
38
- #
39
- def ids
40
- @ids ||= @bundle.ids(@text)
41
- end
42
-
43
- # The identifier for this combination.
44
- #
45
- def identifier
46
- @category_name
47
- end
48
-
49
- # Is the identifier in the given identifiers?
50
- #
51
- def in? identifiers
52
- identifiers.include? identifier
53
- end
54
-
55
- # Combines the category names with the original names.
56
- # [
57
- # [:title, 'Flarbl', :flarbl],
58
- # [:category, 'Gnorf', :gnorf]
59
- # ]
60
- #
61
- def to_result
62
- [identifier, *@token.to_result]
63
- end
64
-
65
- # Example:
66
- # "exact title:Peter*:peter"
67
- #
68
- def to_s
69
- "#{bundle.identifier} #{to_result.join(':')}"
70
- end
71
-
72
- end
73
-
74
- end
@@ -1,105 +0,0 @@
1
- module Query
2
-
3
- # Combinations are a number of Combination-s.
4
- #
5
- # They are the core of an allocation.
6
- #
7
- class Combinations # :nodoc:all
8
-
9
- attr_reader :combinations
10
-
11
- delegate :empty?, :to => :@combinations
12
-
13
- def initialize combinations = []
14
- @combinations = combinations
15
- end
16
-
17
- def hash
18
- @combinations.hash
19
- end
20
-
21
- # Uses user specific weights to calculate a score for the combinations.
22
- #
23
- def calculate_score weights
24
- total_score + weighted_score(weights)
25
- end
26
- def total_score
27
- @combinations.sum &:weight
28
- end
29
- def weighted_score weights
30
- weights.score @combinations
31
- end
32
-
33
- # Gets all ids for the allocations.
34
- #
35
- # Sorts the ids by size and & through them in the following order (sizes):
36
- # 0. [100_000, 400, 30, 2]
37
- # 1. [2, 30, 400, 100_000]
38
- # 2. (100_000 & (400 & (30 & 2))) # => result
39
- #
40
- # Returns the ids. Also sets the count.
41
- #
42
- # Note: Uses a C-optimized intersection routine for speed and memory efficiency.
43
- #
44
- def ids
45
- return [] if @combinations.empty?
46
-
47
- # Get the ids for each combination.
48
- #
49
- id_arrays = @combinations.inject([]) do |total, combination|
50
- total << combination.ids
51
- end
52
-
53
- # Order by smallest size first such that the intersect can be performed faster.
54
- #
55
- # TODO Move into the memory_efficient_intersect such that
56
- # this precondition for a fast algorithm is always given.
57
- #
58
- id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
59
-
60
- # Call the optimized C algorithm.
61
- #
62
- Performant::Array.memory_efficient_intersect id_arrays
63
- end
64
-
65
- # Wrap the combinations into an allocation with the result_identifier.
66
- #
67
- def pack_into_allocation result_identifier
68
- Allocation.new self, result_identifier
69
- end
70
-
71
- # Filters the tokens and identifiers such that only identifiers
72
- # that are passed in, remain, including their tokens.
73
- #
74
- # Note: This method is not totally independent of the calculate_ids one.
75
- # Since identifiers are only nullified, we need to not include the
76
- # ids that have an associated identifier that is nil.
77
- #
78
- def keep identifiers = []
79
- # TODO Rewrite to use the category!!!
80
- #
81
- @combinations.reject! { |combination| !combination.in?(identifiers) }
82
- end
83
-
84
- # Filters the tokens and identifiers such that identifiers
85
- # that are passed in, are removed, including their tokens.
86
- #
87
- # Note: This method is not totally independent of the calculate_ids one.
88
- # Since identifiers are only nullified, we need to not include the
89
- # ids that have an associated identifier that is nil.
90
- #
91
- def remove identifiers = []
92
- # TODO Rewrite to use the category!!!
93
- #
94
- @combinations.reject! { |combination| combination.in?(identifiers) }
95
- end
96
-
97
- #
98
- #
99
- def to_result
100
- @combinations.map &:to_result
101
- end
102
-
103
- end
104
-
105
- end
@@ -1,77 +0,0 @@
1
- # coding: utf-8
2
- #
3
-
4
- #
5
- #
6
- module Query
7
-
8
- # A single qualifier.
9
- #
10
- class Qualifier # :nodoc:all
11
-
12
- attr_reader :normalized_qualifier, :codes
13
-
14
- #
15
- #
16
- # codes is an array.
17
- #
18
- def initialize normalized_qualifier, codes
19
- @normalized_qualifier = normalized_qualifier
20
- @codes = codes.map &:to_sym
21
- end
22
-
23
- # Will overwrite if the key is present in the hash.
24
- #
25
- def inject_into hash
26
- codes.each do |code|
27
- hash[code] = normalized_qualifier
28
- end
29
- end
30
-
31
- end
32
-
33
- # Collection class for qualifiers.
34
- #
35
- class Qualifiers # :nodoc:all
36
-
37
- include Singleton
38
-
39
- attr_reader :qualifiers, :normalization_mapping
40
-
41
- delegate :<<, :to => :qualifiers
42
-
43
- #
44
- #
45
- def initialize
46
- @qualifiers = []
47
- @normalization_mapping = {}
48
- end
49
-
50
- # TODO Spec.
51
- #
52
- def self.add name, qualifiers
53
- instance << Qualifier.new(name, qualifiers)
54
- end
55
-
56
- # Uses the qualifiers to prepare (optimize) the qualifier handling.
57
- #
58
- def prepare
59
- qualifiers.each do |qualifier|
60
- qualifier.inject_into normalization_mapping
61
- end
62
- end
63
-
64
- # Normalizes the given qualifier.
65
- #
66
- # Returns nil if it is not allowed, the normalized qualifier if it is.
67
- #
68
- # Note: Normalizes.
69
- #
70
- def normalize qualifier
71
- return nil if qualifier.blank?
72
-
73
- normalization_mapping[qualifier.to_sym]
74
- end
75
-
76
- end
77
- end
@@ -1,202 +0,0 @@
1
- module Query
2
-
3
- # This is a query token. Together with other tokens it makes up a query.
4
- #
5
- # It remembers the original form, and and a normalized form.
6
- #
7
- # It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
8
- #
9
- # TODO Make partial / similarity char configurable.
10
- #
11
- class Token # :nodoc:all
12
-
13
- attr_reader :text, :original
14
- attr_writer :similar
15
-
16
- delegate :blank?, :to => :text
17
-
18
- # Normal initializer.
19
- #
20
- # Note: Use this if you do not want a qualified and normalized token.
21
- #
22
- def initialize text
23
- @text = text
24
- end
25
-
26
- # Returns a qualified and normalized token.
27
- #
28
- # Note: Use this in the search engine if you need a qualified
29
- # and normalized token. I.e. one prepared for a search.
30
- #
31
- def self.processed text
32
- token = new text
33
- token.qualify
34
- token.extract_original
35
- token.partialize
36
- token.similarize
37
- token.remove_illegals
38
- token
39
- end
40
-
41
- # This returns a predefined category name if the user has given one.
42
- #
43
- def user_defined_category_name
44
- @qualifier
45
- end
46
-
47
- # Extracts a qualifier for this token and pre-assigns an allocation.
48
- #
49
- # Note: Removes the qualifier if it is not allowed.
50
- #
51
- def qualify
52
- @qualifier, @text = split @text
53
- @qualifier = Query::Qualifiers.instance.normalize @qualifier
54
- end
55
- def extract_original
56
- @original = @text.dup
57
- end
58
-
59
- # Partial is a conditional setter.
60
- #
61
- # It is only settable if it hasn't been set yet.
62
- #
63
- def partial= partial
64
- @partial = partial if @partial.nil?
65
- end
66
- def partial?
67
- !@similar && @partial
68
- end
69
-
70
- # If the text ends with *, partialize it. If with ", don't.
71
- #
72
- @@no_partial = /\"\Z/
73
- @@partial = /\*\Z/
74
- def partialize
75
- self.partial = false and return if @text =~ @@no_partial
76
- self.partial = true if @text =~ @@partial
77
- end
78
-
79
- # If the text ends with ~ similarize it. If with ", don't.
80
- #
81
- @@no_similar = /\"\Z/
82
- @@similar = /\~\Z/
83
- def similarize
84
- self.similar = false and return if @text =~ @@no_similar
85
- self.similar = true if @text =~ @@similar
86
- end
87
-
88
- def similar?
89
- @similar
90
- end
91
-
92
- # Normalizes this token's text.
93
- #
94
- @@illegals = /["*~]/
95
- def remove_illegals
96
- @text.gsub! @@illegals, '' unless @text.blank?
97
- end
98
-
99
- # Visitor for tokenizer.
100
- #
101
- # TODO Rewrite!!!
102
- #
103
- def tokenize_with tokenizer
104
- @text = tokenizer.normalize @text
105
- end
106
- # TODO spec!
107
- #
108
- # TODO Rewrite!!
109
- #
110
- def tokenized tokenizer
111
- tokenizer.tokenize(@text.to_s).each do |text|
112
- yield text
113
- end
114
- end
115
-
116
- # Returns an array of possible combinations.
117
- #
118
- def possible_combinations_in type
119
- type.possible_combinations self
120
- end
121
-
122
- # Returns a token with the next similar text.
123
- #
124
- # TODO Rewrite this. It is hard to understand. Also spec performance.
125
- #
126
- def next_similar_token category
127
- token = self.dup
128
- token if token.next_similar category.bundle_for(token)
129
- end
130
- # Sets and returns the next similar word.
131
- #
132
- # Note: Also overrides the original.
133
- #
134
- def next_similar bundle
135
- @text = @original = (similarity(bundle).shift || return) if similar?
136
- end
137
- # Lazy similar reader.
138
- #
139
- def similarity bundle = nil
140
- @similarity || @similarity = generate_similarity_for(bundle)
141
- end
142
- # Returns an enumerator that traverses over the similar.
143
- #
144
- # Note: The dup isn't too nice – since it is needed on account of the shift, above.
145
- # (We avoid a StopIteration exception. Which of both is less evil?)
146
- #
147
- def generate_similarity_for bundle
148
- bundle.similar(@text).dup || []
149
- end
150
-
151
- # Generates a solr term from this token.
152
- #
153
- # E.g. "name:heroes~0.75"
154
- #
155
- @@solr_fuzzy_mapping = {
156
- 1 => :'',
157
- 2 => :'',
158
- 3 => :'',
159
- 4 => :'~0.74',
160
- 5 => :'~0.78',
161
- 6 => :'~0.81',
162
- 7 => :'~0.83',
163
- 8 => :'~0.85',
164
- 9 => :'~0.87',
165
- 10 => :'~0.89'
166
- }
167
- @@solr_fuzzy_mapping.default = :'~0.9'
168
- def to_solr
169
- blank? ? '' : (to_s + @@solr_fuzzy_mapping[@text.size].to_s)
170
- end
171
-
172
- #
173
- #
174
- def to_result
175
- [@original, @text]
176
- end
177
-
178
- # Displays the qualifier text and the text, joined.
179
- #
180
- # e.g. name:meier
181
- #
182
- def to_s
183
- [@qualifier, @text].compact.join ':'
184
- end
185
-
186
- private
187
-
188
- # Splits text into a qualifier and text.
189
- #
190
- # Returns [qualifier, text].
191
- #
192
- def split unqualified_text
193
- qualifier, text = (unqualified_text || '').split(':', 2)
194
- if text.blank?
195
- [nil, (qualifier || '')]
196
- else
197
- [qualifier, text]
198
- end
199
- end
200
-
201
- end
202
- end
@@ -1,86 +0,0 @@
1
- # encoding: utf-8
2
- #
3
-
4
- #
5
- #
6
- module Query
7
-
8
- # This class primarily handles switching through similar token constellations.
9
- #
10
- class Tokens # :nodoc:all
11
-
12
- # Basically delegates to its internal tokens array.
13
- #
14
- self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
15
-
16
- #
17
- #
18
- def initialize tokens = []
19
- @tokens = tokens
20
- end
21
-
22
- #
23
- #
24
- def tokenize_with tokenizer
25
- @tokens.each { |token| token.tokenize_with(tokenizer) }
26
- end
27
-
28
- # Generates an array in the form of
29
- # [
30
- # [combination], # of token 1
31
- # [combination, combination, combination], # of token 2
32
- # [combination, combination] # of token 3
33
- # ]
34
- #
35
- # TODO If we want token behaviour defined per Query, we can
36
- # compact! here
37
- #
38
- def possible_combinations_in type
39
- @tokens.inject([]) do |combinations, token|
40
- combinations << token.possible_combinations_in(type)
41
- end
42
- # TODO compact! if ignore_unassigned_tokens
43
- end
44
-
45
- # Makes the last of the tokens partial.
46
- #
47
- def partialize_last
48
- @tokens.last.partial = true unless empty?
49
- end
50
-
51
- # Caps the tokens to the maximum.
52
- #
53
- def cap maximum
54
- @tokens.slice!(maximum..-1) if cap?(maximum)
55
- end
56
- def cap? maximum
57
- @tokens.size > maximum
58
- end
59
-
60
- # Rejects blank tokens.
61
- #
62
- def reject
63
- @tokens.reject! &:blank?
64
- end
65
-
66
- # Returns a solr query.
67
- #
68
- def to_solr_query
69
- @tokens.map(&:to_solr).join ' '
70
- end
71
-
72
- #
73
- #
74
- def originals
75
- @tokens.map(&:original)
76
- end
77
-
78
- # Just join the token original texts.
79
- #
80
- def to_s
81
- originals.join ' '
82
- end
83
-
84
- end
85
-
86
- end