picky 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (229) hide show
  1. data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
  2. data/lib/picky/application.rb +18 -19
  3. data/lib/picky/cores.rb +1 -1
  4. data/lib/picky/generators/aliases.rb +3 -0
  5. data/lib/picky/index/base.rb +179 -0
  6. data/lib/picky/index/memory.rb +28 -0
  7. data/lib/picky/index/redis.rb +28 -0
  8. data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
  9. data/lib/picky/indexed/indexes.rb +11 -7
  10. data/lib/picky/indexing/indexes.rb +14 -8
  11. data/lib/picky/internals/adapters/rack/base.rb +27 -0
  12. data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
  13. data/lib/picky/internals/adapters/rack/query.rb +63 -0
  14. data/lib/picky/internals/adapters/rack.rb +34 -0
  15. data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
  16. data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
  17. data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
  18. data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
  19. data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
  20. data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
  21. data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
  22. data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
  23. data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
  24. data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
  25. data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
  26. data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
  27. data/lib/picky/internals/generators/base.rb +19 -0
  28. data/lib/picky/internals/generators/partial/default.rb +7 -0
  29. data/lib/picky/internals/generators/partial/none.rb +35 -0
  30. data/lib/picky/internals/generators/partial/strategy.rb +29 -0
  31. data/lib/picky/internals/generators/partial/substring.rb +122 -0
  32. data/lib/picky/internals/generators/partial_generator.rb +19 -0
  33. data/lib/picky/internals/generators/similarity/default.rb +9 -0
  34. data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
  35. data/lib/picky/internals/generators/similarity/none.rb +35 -0
  36. data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
  37. data/lib/picky/internals/generators/similarity_generator.rb +19 -0
  38. data/lib/picky/internals/generators/strategy.rb +18 -0
  39. data/lib/picky/internals/generators/weights/default.rb +9 -0
  40. data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
  41. data/lib/picky/internals/generators/weights/strategy.rb +11 -0
  42. data/lib/picky/internals/generators/weights_generator.rb +19 -0
  43. data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
  44. data/lib/picky/internals/index/backend.rb +113 -0
  45. data/lib/picky/internals/index/file/basic.rb +101 -0
  46. data/lib/picky/internals/index/file/json.rb +38 -0
  47. data/lib/picky/internals/index/file/marshal.rb +38 -0
  48. data/lib/picky/internals/index/file/text.rb +60 -0
  49. data/lib/picky/internals/index/files.rb +24 -0
  50. data/lib/picky/internals/index/redis/basic.rb +77 -0
  51. data/lib/picky/internals/index/redis/list_hash.rb +46 -0
  52. data/lib/picky/internals/index/redis/string_hash.rb +35 -0
  53. data/lib/picky/internals/index/redis.rb +44 -0
  54. data/lib/picky/internals/indexed/bundle/base.rb +72 -0
  55. data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
  56. data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
  57. data/lib/picky/internals/indexed/categories.rb +135 -0
  58. data/lib/picky/internals/indexed/category.rb +90 -0
  59. data/lib/picky/internals/indexed/index.rb +57 -0
  60. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
  61. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
  62. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
  63. data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
  64. data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
  65. data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
  66. data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
  67. data/lib/picky/internals/indexing/bundle/base.rb +219 -0
  68. data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
  69. data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
  70. data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
  71. data/lib/picky/internals/indexing/categories.rb +42 -0
  72. data/lib/picky/internals/indexing/category.rb +120 -0
  73. data/lib/picky/internals/indexing/index.rb +67 -0
  74. data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
  75. data/lib/picky/internals/query/allocation.rb +88 -0
  76. data/lib/picky/internals/query/allocations.rb +137 -0
  77. data/lib/picky/internals/query/combination.rb +80 -0
  78. data/lib/picky/internals/query/combinations/base.rb +84 -0
  79. data/lib/picky/internals/query/combinations/memory.rb +58 -0
  80. data/lib/picky/internals/query/combinations/redis.rb +59 -0
  81. data/lib/picky/internals/query/indexes.rb +180 -0
  82. data/lib/picky/internals/query/qualifiers.rb +81 -0
  83. data/lib/picky/internals/query/token.rb +215 -0
  84. data/lib/picky/internals/query/tokens.rb +89 -0
  85. data/lib/picky/{query → internals/query}/weights.rb +0 -0
  86. data/lib/picky/internals/results/base.rb +106 -0
  87. data/lib/picky/internals/results/full.rb +17 -0
  88. data/lib/picky/internals/results/live.rb +17 -0
  89. data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
  90. data/lib/picky/internals/tokenizers/base.rb +166 -0
  91. data/lib/picky/internals/tokenizers/index.rb +63 -0
  92. data/lib/picky/internals/tokenizers/query.rb +79 -0
  93. data/lib/picky/loader.rb +148 -112
  94. data/lib/picky/query/base.rb +57 -26
  95. data/lib/picky/query/full.rb +1 -1
  96. data/lib/picky/query/live.rb +1 -1
  97. data/lib/picky/sources/db.rb +27 -6
  98. data/lib/tasks/index.rake +3 -3
  99. data/lib/tasks/try.rake +2 -2
  100. data/spec/lib/aliases_spec.rb +9 -0
  101. data/spec/lib/application_spec.rb +3 -3
  102. data/spec/lib/generators/aliases_spec.rb +1 -0
  103. data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
  104. data/spec/lib/index_bundle_spec.rb +71 -0
  105. data/spec/lib/indexed/indexes_spec.rb +61 -0
  106. data/spec/lib/indexing/indexes_spec.rb +94 -24
  107. data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
  108. data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
  109. data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
  110. data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
  111. data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
  112. data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
  113. data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
  114. data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
  115. data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
  116. data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
  117. data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
  118. data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
  119. data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
  120. data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
  121. data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
  122. data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
  123. data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
  124. data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
  125. data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
  126. data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
  127. data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
  128. data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
  129. data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
  130. data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
  131. data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
  132. data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
  133. data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
  134. data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
  135. data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
  136. data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
  137. data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
  138. data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
  139. data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
  140. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
  141. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
  142. data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
  143. data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
  144. data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
  145. data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
  146. data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
  147. data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
  148. data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
  149. data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
  150. data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
  151. data/spec/lib/internals/results/base_spec.rb +105 -0
  152. data/spec/lib/internals/results/full_spec.rb +78 -0
  153. data/spec/lib/internals/results/live_spec.rb +88 -0
  154. data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
  155. data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
  156. data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
  157. data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
  158. data/spec/lib/query/allocation_spec.rb +12 -12
  159. data/spec/lib/query/allocations_spec.rb +19 -19
  160. data/spec/lib/query/base_spec.rb +28 -4
  161. data/spec/lib/query/combination_spec.rb +8 -9
  162. data/spec/lib/query/combinations/base_spec.rb +116 -0
  163. data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
  164. data/spec/lib/query/combinations/redis_spec.rb +132 -0
  165. data/spec/lib/query/full_spec.rb +2 -2
  166. data/spec/lib/query/indexes_spec.rb +81 -0
  167. data/spec/lib/query/live_spec.rb +3 -3
  168. data/spec/lib/query/qualifiers_spec.rb +6 -6
  169. data/spec/lib/query/token_spec.rb +38 -38
  170. data/spec/lib/query/tokens_spec.rb +35 -35
  171. data/spec/lib/sources/db_spec.rb +23 -18
  172. metadata +212 -181
  173. data/lib/picky/adapters/rack/base.rb +0 -23
  174. data/lib/picky/adapters/rack/live_parameters.rb +0 -33
  175. data/lib/picky/adapters/rack/query.rb +0 -59
  176. data/lib/picky/adapters/rack.rb +0 -28
  177. data/lib/picky/cacher/convenience.rb +0 -3
  178. data/lib/picky/cacher/generator.rb +0 -15
  179. data/lib/picky/cacher/partial/default.rb +0 -5
  180. data/lib/picky/cacher/partial/none.rb +0 -31
  181. data/lib/picky/cacher/partial/strategy.rb +0 -21
  182. data/lib/picky/cacher/partial/substring.rb +0 -118
  183. data/lib/picky/cacher/partial_generator.rb +0 -15
  184. data/lib/picky/cacher/similarity/default.rb +0 -7
  185. data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
  186. data/lib/picky/cacher/similarity/none.rb +0 -31
  187. data/lib/picky/cacher/similarity/strategy.rb +0 -9
  188. data/lib/picky/cacher/similarity_generator.rb +0 -15
  189. data/lib/picky/cacher/strategy.rb +0 -12
  190. data/lib/picky/cacher/weights/default.rb +0 -7
  191. data/lib/picky/cacher/weights/logarithmic.rb +0 -39
  192. data/lib/picky/cacher/weights/strategy.rb +0 -9
  193. data/lib/picky/cacher/weights_generator.rb +0 -15
  194. data/lib/picky/frontend_adapters/rack.rb +0 -150
  195. data/lib/picky/index/bundle.rb +0 -54
  196. data/lib/picky/index/file/basic.rb +0 -97
  197. data/lib/picky/index/file/json.rb +0 -34
  198. data/lib/picky/index/file/marshal.rb +0 -34
  199. data/lib/picky/index/file/text.rb +0 -56
  200. data/lib/picky/index/files.rb +0 -118
  201. data/lib/picky/index_api.rb +0 -175
  202. data/lib/picky/indexed/bundle.rb +0 -54
  203. data/lib/picky/indexed/categories.rb +0 -131
  204. data/lib/picky/indexed/category.rb +0 -85
  205. data/lib/picky/indexed/index.rb +0 -39
  206. data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
  207. data/lib/picky/indexing/bundle.rb +0 -213
  208. data/lib/picky/indexing/categories.rb +0 -38
  209. data/lib/picky/indexing/category.rb +0 -117
  210. data/lib/picky/indexing/index.rb +0 -55
  211. data/lib/picky/query/allocation.rb +0 -82
  212. data/lib/picky/query/allocations.rb +0 -130
  213. data/lib/picky/query/combination.rb +0 -74
  214. data/lib/picky/query/combinations.rb +0 -105
  215. data/lib/picky/query/qualifiers.rb +0 -77
  216. data/lib/picky/query/token.rb +0 -202
  217. data/lib/picky/query/tokens.rb +0 -86
  218. data/lib/picky/query/weigher.rb +0 -165
  219. data/lib/picky/results/base.rb +0 -102
  220. data/lib/picky/results/full.rb +0 -13
  221. data/lib/picky/results/live.rb +0 -13
  222. data/lib/picky/tokenizers/base.rb +0 -161
  223. data/lib/picky/tokenizers/index.rb +0 -58
  224. data/lib/picky/tokenizers/query.rb +0 -74
  225. data/spec/lib/cacher/partial/default_spec.rb +0 -15
  226. data/spec/lib/cacher/partial/none_spec.rb +0 -17
  227. data/spec/lib/cacher/weights_generator_spec.rb +0 -21
  228. data/spec/lib/results/base_spec.rb +0 -257
  229. data/spec/lib/results/live_spec.rb +0 -15
@@ -1,130 +0,0 @@
1
- module Query
2
- # Container class for allocations.
3
- #
4
- class Allocations # :nodoc:all
5
-
6
- # TODO Remove size
7
- #
8
- delegate :each, :inject, :empty?, :size, :to => :@allocations
9
- attr_reader :total
10
-
11
- def initialize allocations = []
12
- @allocations = allocations
13
- end
14
-
15
- # Score each allocation.
16
- #
17
- def calculate_score weights
18
- @allocations.each do |allocation|
19
- allocation.calculate_score weights
20
- end
21
- end
22
- # Sort the allocations.
23
- #
24
- def sort
25
- @allocations.sort!
26
- end
27
-
28
- # Reduces the amount of allocations to x.
29
- #
30
- def reduce_to amount
31
- @allocations = @allocations.shift amount
32
- end
33
-
34
- # Keeps combinations.
35
- #
36
- # Only those passed in remain.
37
- #
38
- def keep identifiers = []
39
- @allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
40
- end
41
- # Removes combinations.
42
- #
43
- # Only those passed in are removed.
44
- #
45
- # TODO Rewrite
46
- #
47
- def remove identifiers = []
48
- @allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
49
- end
50
-
51
- # Returns the top amount ids.
52
- #
53
- def ids amount = 20
54
- @allocations.inject([]) do |total, allocation|
55
- total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
56
- end
57
- end
58
-
59
- # Returns a random id from the allocations.
60
- #
61
- # Note: This is an ok algorithm for small id sets.
62
- #
63
- # But still TODO try for a faster one.
64
- #
65
- def random_ids amount = 1
66
- return [] if @allocations.empty?
67
- ids = @allocations.first.ids
68
- indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
69
- indexes.first(amount).map { |id| ids[id] }
70
- end
71
-
72
- # This is the main method of this class that will replace ids and count.
73
- #
74
- # What it does is calculate the ids and counts of its allocations
75
- # for being used in the results. It also calculates the total
76
- #
77
- # Parameters:
78
- # * amount: the amount of ids to calculate
79
- # * offset: the offset from where in the result set to take the ids
80
- #
81
- # Note: With an amount of 0, an offset > 0 doesn't make much
82
- # sense, as seen in the live search.
83
- #
84
- # Note: Each allocation caches its count, but not its ids (thrown away).
85
- # The ids are cached in this class.
86
- #
87
- # Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
88
- #
89
- def process! amount, offset = 0
90
- @total = 0
91
- current_offset = 0
92
- @allocations.each do |allocation|
93
- ids = allocation.process! amount, offset
94
- @total = @total + allocation.count # the total mixed in
95
- if ids.empty?
96
- offset = offset - allocation.count unless offset.zero?
97
- else
98
- amount = amount - ids.size # we need less results from the following allocation
99
- offset = 0 # we have already passed the offset
100
- end
101
- end
102
- end
103
-
104
- def uniq
105
- @allocations.uniq!
106
- end
107
-
108
- def to_a
109
- @allocations
110
- end
111
-
112
- # Simply inspects the internal allocations.
113
- #
114
- def to_s
115
- @allocations.inspect
116
- end
117
-
118
- # Allocations for results are in the form:
119
- # [
120
- # allocation1.to_result,
121
- # allocation2.to_result
122
- # ...
123
- # ]
124
- #
125
- def to_result
126
- @allocations.map(&:to_result).compact
127
- end
128
-
129
- end
130
- end
@@ -1,74 +0,0 @@
1
- module Query
2
-
3
- # Describes the combination of a token (the text) and
4
- # the index (the bundle).
5
- #
6
- # A combination is a single part of an allocation.
7
- #
8
- # An allocation consists of a number of combinations.
9
- #
10
- class Combination # :nodoc:all
11
-
12
- attr_reader :token, :bundle, :category_name
13
-
14
- def initialize token, category
15
- @token = token
16
- @category_name = category.name
17
- @bundle = category.bundle_for token
18
- @text = @token.text # don't want to use reset_similar already
19
- end
20
-
21
- # Note: Required for uniq!
22
- #
23
- def hash
24
- [@token.to_s, @bundle].hash
25
- end
26
-
27
- # Returns the weight of this combination.
28
- #
29
- # Note: Caching is most oft the time useful.
30
- #
31
- def weight
32
- @weight ||= @bundle.weight(@text)
33
- end
34
-
35
- # Returns an array of ids for the given text.
36
- #
37
- # Note: Caching is most oft the time useful.
38
- #
39
- def ids
40
- @ids ||= @bundle.ids(@text)
41
- end
42
-
43
- # The identifier for this combination.
44
- #
45
- def identifier
46
- @category_name
47
- end
48
-
49
- # Is the identifier in the given identifiers?
50
- #
51
- def in? identifiers
52
- identifiers.include? identifier
53
- end
54
-
55
- # Combines the category names with the original names.
56
- # [
57
- # [:title, 'Flarbl', :flarbl],
58
- # [:category, 'Gnorf', :gnorf]
59
- # ]
60
- #
61
- def to_result
62
- [identifier, *@token.to_result]
63
- end
64
-
65
- # Example:
66
- # "exact title:Peter*:peter"
67
- #
68
- def to_s
69
- "#{bundle.identifier} #{to_result.join(':')}"
70
- end
71
-
72
- end
73
-
74
- end
@@ -1,105 +0,0 @@
1
- module Query
2
-
3
- # Combinations are a number of Combination-s.
4
- #
5
- # They are the core of an allocation.
6
- #
7
- class Combinations # :nodoc:all
8
-
9
- attr_reader :combinations
10
-
11
- delegate :empty?, :to => :@combinations
12
-
13
- def initialize combinations = []
14
- @combinations = combinations
15
- end
16
-
17
- def hash
18
- @combinations.hash
19
- end
20
-
21
- # Uses user specific weights to calculate a score for the combinations.
22
- #
23
- def calculate_score weights
24
- total_score + weighted_score(weights)
25
- end
26
- def total_score
27
- @combinations.sum &:weight
28
- end
29
- def weighted_score weights
30
- weights.score @combinations
31
- end
32
-
33
- # Gets all ids for the allocations.
34
- #
35
- # Sorts the ids by size and & through them in the following order (sizes):
36
- # 0. [100_000, 400, 30, 2]
37
- # 1. [2, 30, 400, 100_000]
38
- # 2. (100_000 & (400 & (30 & 2))) # => result
39
- #
40
- # Returns the ids. Also sets the count.
41
- #
42
- # Note: Uses a C-optimized intersection routine for speed and memory efficiency.
43
- #
44
- def ids
45
- return [] if @combinations.empty?
46
-
47
- # Get the ids for each combination.
48
- #
49
- id_arrays = @combinations.inject([]) do |total, combination|
50
- total << combination.ids
51
- end
52
-
53
- # Order by smallest size first such that the intersect can be performed faster.
54
- #
55
- # TODO Move into the memory_efficient_intersect such that
56
- # this precondition for a fast algorithm is always given.
57
- #
58
- id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
59
-
60
- # Call the optimized C algorithm.
61
- #
62
- Performant::Array.memory_efficient_intersect id_arrays
63
- end
64
-
65
- # Wrap the combinations into an allocation with the result_identifier.
66
- #
67
- def pack_into_allocation result_identifier
68
- Allocation.new self, result_identifier
69
- end
70
-
71
- # Filters the tokens and identifiers such that only identifiers
72
- # that are passed in, remain, including their tokens.
73
- #
74
- # Note: This method is not totally independent of the calculate_ids one.
75
- # Since identifiers are only nullified, we need to not include the
76
- # ids that have an associated identifier that is nil.
77
- #
78
- def keep identifiers = []
79
- # TODO Rewrite to use the category!!!
80
- #
81
- @combinations.reject! { |combination| !combination.in?(identifiers) }
82
- end
83
-
84
- # Filters the tokens and identifiers such that identifiers
85
- # that are passed in, are removed, including their tokens.
86
- #
87
- # Note: This method is not totally independent of the calculate_ids one.
88
- # Since identifiers are only nullified, we need to not include the
89
- # ids that have an associated identifier that is nil.
90
- #
91
- def remove identifiers = []
92
- # TODO Rewrite to use the category!!!
93
- #
94
- @combinations.reject! { |combination| combination.in?(identifiers) }
95
- end
96
-
97
- #
98
- #
99
- def to_result
100
- @combinations.map &:to_result
101
- end
102
-
103
- end
104
-
105
- end
@@ -1,77 +0,0 @@
1
- # coding: utf-8
2
- #
3
-
4
- #
5
- #
6
- module Query
7
-
8
- # A single qualifier.
9
- #
10
- class Qualifier # :nodoc:all
11
-
12
- attr_reader :normalized_qualifier, :codes
13
-
14
- #
15
- #
16
- # codes is an array.
17
- #
18
- def initialize normalized_qualifier, codes
19
- @normalized_qualifier = normalized_qualifier
20
- @codes = codes.map &:to_sym
21
- end
22
-
23
- # Will overwrite if the key is present in the hash.
24
- #
25
- def inject_into hash
26
- codes.each do |code|
27
- hash[code] = normalized_qualifier
28
- end
29
- end
30
-
31
- end
32
-
33
- # Collection class for qualifiers.
34
- #
35
- class Qualifiers # :nodoc:all
36
-
37
- include Singleton
38
-
39
- attr_reader :qualifiers, :normalization_mapping
40
-
41
- delegate :<<, :to => :qualifiers
42
-
43
- #
44
- #
45
- def initialize
46
- @qualifiers = []
47
- @normalization_mapping = {}
48
- end
49
-
50
- # TODO Spec.
51
- #
52
- def self.add name, qualifiers
53
- instance << Qualifier.new(name, qualifiers)
54
- end
55
-
56
- # Uses the qualifiers to prepare (optimize) the qualifier handling.
57
- #
58
- def prepare
59
- qualifiers.each do |qualifier|
60
- qualifier.inject_into normalization_mapping
61
- end
62
- end
63
-
64
- # Normalizes the given qualifier.
65
- #
66
- # Returns nil if it is not allowed, the normalized qualifier if it is.
67
- #
68
- # Note: Normalizes.
69
- #
70
- def normalize qualifier
71
- return nil if qualifier.blank?
72
-
73
- normalization_mapping[qualifier.to_sym]
74
- end
75
-
76
- end
77
- end
@@ -1,202 +0,0 @@
1
- module Query
2
-
3
- # This is a query token. Together with other tokens it makes up a query.
4
- #
5
- # It remembers the original form, and and a normalized form.
6
- #
7
- # It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
8
- #
9
- # TODO Make partial / similarity char configurable.
10
- #
11
- class Token # :nodoc:all
12
-
13
- attr_reader :text, :original
14
- attr_writer :similar
15
-
16
- delegate :blank?, :to => :text
17
-
18
- # Normal initializer.
19
- #
20
- # Note: Use this if you do not want a qualified and normalized token.
21
- #
22
- def initialize text
23
- @text = text
24
- end
25
-
26
- # Returns a qualified and normalized token.
27
- #
28
- # Note: Use this in the search engine if you need a qualified
29
- # and normalized token. I.e. one prepared for a search.
30
- #
31
- def self.processed text
32
- token = new text
33
- token.qualify
34
- token.extract_original
35
- token.partialize
36
- token.similarize
37
- token.remove_illegals
38
- token
39
- end
40
-
41
- # This returns a predefined category name if the user has given one.
42
- #
43
- def user_defined_category_name
44
- @qualifier
45
- end
46
-
47
- # Extracts a qualifier for this token and pre-assigns an allocation.
48
- #
49
- # Note: Removes the qualifier if it is not allowed.
50
- #
51
- def qualify
52
- @qualifier, @text = split @text
53
- @qualifier = Query::Qualifiers.instance.normalize @qualifier
54
- end
55
- def extract_original
56
- @original = @text.dup
57
- end
58
-
59
- # Partial is a conditional setter.
60
- #
61
- # It is only settable if it hasn't been set yet.
62
- #
63
- def partial= partial
64
- @partial = partial if @partial.nil?
65
- end
66
- def partial?
67
- !@similar && @partial
68
- end
69
-
70
- # If the text ends with *, partialize it. If with ", don't.
71
- #
72
- @@no_partial = /\"\Z/
73
- @@partial = /\*\Z/
74
- def partialize
75
- self.partial = false and return if @text =~ @@no_partial
76
- self.partial = true if @text =~ @@partial
77
- end
78
-
79
- # If the text ends with ~ similarize it. If with ", don't.
80
- #
81
- @@no_similar = /\"\Z/
82
- @@similar = /\~\Z/
83
- def similarize
84
- self.similar = false and return if @text =~ @@no_similar
85
- self.similar = true if @text =~ @@similar
86
- end
87
-
88
- def similar?
89
- @similar
90
- end
91
-
92
- # Normalizes this token's text.
93
- #
94
- @@illegals = /["*~]/
95
- def remove_illegals
96
- @text.gsub! @@illegals, '' unless @text.blank?
97
- end
98
-
99
- # Visitor for tokenizer.
100
- #
101
- # TODO Rewrite!!!
102
- #
103
- def tokenize_with tokenizer
104
- @text = tokenizer.normalize @text
105
- end
106
- # TODO spec!
107
- #
108
- # TODO Rewrite!!
109
- #
110
- def tokenized tokenizer
111
- tokenizer.tokenize(@text.to_s).each do |text|
112
- yield text
113
- end
114
- end
115
-
116
- # Returns an array of possible combinations.
117
- #
118
- def possible_combinations_in type
119
- type.possible_combinations self
120
- end
121
-
122
- # Returns a token with the next similar text.
123
- #
124
- # TODO Rewrite this. It is hard to understand. Also spec performance.
125
- #
126
- def next_similar_token category
127
- token = self.dup
128
- token if token.next_similar category.bundle_for(token)
129
- end
130
- # Sets and returns the next similar word.
131
- #
132
- # Note: Also overrides the original.
133
- #
134
- def next_similar bundle
135
- @text = @original = (similarity(bundle).shift || return) if similar?
136
- end
137
- # Lazy similar reader.
138
- #
139
- def similarity bundle = nil
140
- @similarity || @similarity = generate_similarity_for(bundle)
141
- end
142
- # Returns an enumerator that traverses over the similar.
143
- #
144
- # Note: The dup isn't too nice – since it is needed on account of the shift, above.
145
- # (We avoid a StopIteration exception. Which of both is less evil?)
146
- #
147
- def generate_similarity_for bundle
148
- bundle.similar(@text).dup || []
149
- end
150
-
151
- # Generates a solr term from this token.
152
- #
153
- # E.g. "name:heroes~0.75"
154
- #
155
- @@solr_fuzzy_mapping = {
156
- 1 => :'',
157
- 2 => :'',
158
- 3 => :'',
159
- 4 => :'~0.74',
160
- 5 => :'~0.78',
161
- 6 => :'~0.81',
162
- 7 => :'~0.83',
163
- 8 => :'~0.85',
164
- 9 => :'~0.87',
165
- 10 => :'~0.89'
166
- }
167
- @@solr_fuzzy_mapping.default = :'~0.9'
168
- def to_solr
169
- blank? ? '' : (to_s + @@solr_fuzzy_mapping[@text.size].to_s)
170
- end
171
-
172
- #
173
- #
174
- def to_result
175
- [@original, @text]
176
- end
177
-
178
- # Displays the qualifier text and the text, joined.
179
- #
180
- # e.g. name:meier
181
- #
182
- def to_s
183
- [@qualifier, @text].compact.join ':'
184
- end
185
-
186
- private
187
-
188
- # Splits text into a qualifier and text.
189
- #
190
- # Returns [qualifier, text].
191
- #
192
- def split unqualified_text
193
- qualifier, text = (unqualified_text || '').split(':', 2)
194
- if text.blank?
195
- [nil, (qualifier || '')]
196
- else
197
- [qualifier, text]
198
- end
199
- end
200
-
201
- end
202
- end
@@ -1,86 +0,0 @@
1
- # encoding: utf-8
2
- #
3
-
4
- #
5
- #
6
- module Query
7
-
8
- # This class primarily handles switching through similar token constellations.
9
- #
10
- class Tokens # :nodoc:all
11
-
12
- # Basically delegates to its internal tokens array.
13
- #
14
- self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
15
-
16
- #
17
- #
18
- def initialize tokens = []
19
- @tokens = tokens
20
- end
21
-
22
- #
23
- #
24
- def tokenize_with tokenizer
25
- @tokens.each { |token| token.tokenize_with(tokenizer) }
26
- end
27
-
28
- # Generates an array in the form of
29
- # [
30
- # [combination], # of token 1
31
- # [combination, combination, combination], # of token 2
32
- # [combination, combination] # of token 3
33
- # ]
34
- #
35
- # TODO If we want token behaviour defined per Query, we can
36
- # compact! here
37
- #
38
- def possible_combinations_in type
39
- @tokens.inject([]) do |combinations, token|
40
- combinations << token.possible_combinations_in(type)
41
- end
42
- # TODO compact! if ignore_unassigned_tokens
43
- end
44
-
45
- # Makes the last of the tokens partial.
46
- #
47
- def partialize_last
48
- @tokens.last.partial = true unless empty?
49
- end
50
-
51
- # Caps the tokens to the maximum.
52
- #
53
- def cap maximum
54
- @tokens.slice!(maximum..-1) if cap?(maximum)
55
- end
56
- def cap? maximum
57
- @tokens.size > maximum
58
- end
59
-
60
- # Rejects blank tokens.
61
- #
62
- def reject
63
- @tokens.reject! &:blank?
64
- end
65
-
66
- # Returns a solr query.
67
- #
68
- def to_solr_query
69
- @tokens.map(&:to_solr).join ' '
70
- end
71
-
72
- #
73
- #
74
- def originals
75
- @tokens.map(&:original)
76
- end
77
-
78
- # Just join the token original texts.
79
- #
80
- def to_s
81
- originals.join ' '
82
- end
83
-
84
- end
85
-
86
- end