picky 2.5.2 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -0,0 +1,103 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Query
4
+
5
+ # This class primarily handles switching through similar token constellations.
6
+ #
7
+ class Tokens # :nodoc:all
8
+
9
+ # Basically delegates to its internal tokens array.
10
+ #
11
+ self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
12
+
13
+ # Create a new Tokens object with the array of tokens passed in.
14
+ #
15
+ def initialize tokens = []
16
+ @tokens = tokens
17
+ end
18
+
19
+ # Creates a new Tokens object from a number of Strings.
20
+ #
21
+ # Options:
22
+ # * downcase: Whether to downcase the passed strings (default is true)
23
+ #
24
+ def self.processed words, downcase = true
25
+ new words.collect! { |word| Token.processed word, downcase }
26
+ end
27
+
28
+ # Tokenizes each token.
29
+ #
30
+ # Note: Passed tokenizer needs to offer #normalize(text).
31
+ #
32
+ def tokenize_with tokenizer
33
+ @tokens.each { |token| token.tokenize_with(tokenizer) }
34
+ end
35
+
36
+ # Generates an array in the form of
37
+ # [
38
+ # [combination], # of token 1
39
+ # [combination, combination, combination], # of token 2
40
+ # [combination, combination] # of token 3
41
+ # ]
42
+ #
43
+ def possible_combinations_in index
44
+ @tokens.inject([]) do |combinations, token|
45
+ possible_combinations = token.possible_combinations_in index
46
+
47
+ # TODO Could move the ignore_unassigned_tokens here!
48
+ #
49
+ # Note: Optimization for ignoring tokens that allocate to nothing and
50
+ # can be ignored.
51
+ # For example in a special search, where "florian" is not
52
+ # mapped to any category.
53
+ #
54
+ possible_combinations ? combinations << possible_combinations : combinations
55
+ end
56
+ end
57
+
58
+ # Makes the last of the tokens partial.
59
+ #
60
+ def partialize_last
61
+ @tokens.last.partial = true unless empty?
62
+ end
63
+
64
+ # Caps the tokens to the maximum.
65
+ #
66
+ def cap maximum
67
+ @tokens.slice!(maximum..-1) if cap?(maximum)
68
+ end
69
+ def cap? maximum
70
+ @tokens.size > maximum
71
+ end
72
+
73
+ # Rejects blank tokens.
74
+ #
75
+ def reject
76
+ @tokens.reject! &:blank?
77
+ end
78
+
79
+ # Returns a solr query.
80
+ #
81
+ def to_solr_query
82
+ @tokens.map(&:to_solr).join ' '
83
+ end
84
+
85
+ #
86
+ #
87
+ def originals
88
+ @tokens.map(&:original)
89
+ end
90
+
91
+ def == other
92
+ self.tokens == other.tokens
93
+ end
94
+
95
+ # Just join the token original texts.
96
+ #
97
+ def to_s
98
+ originals.join ' '
99
+ end
100
+
101
+ end
102
+
103
+ end
File without changes
data/lib/picky/results.rb CHANGED
@@ -10,7 +10,7 @@ class Results
10
10
 
11
11
  # Takes instances of Query::Allocations as param.
12
12
  #
13
- def initialize amount = 0, offset = 0, allocations = Internals::Query::Allocations.new
13
+ def initialize amount = 0, offset = 0, allocations = Query::Allocations.new
14
14
  @offset = offset
15
15
  @amount = amount
16
16
  @allocations = allocations
data/lib/picky/search.rb CHANGED
@@ -35,7 +35,7 @@ class Search
35
35
  def initialize *index_definitions
36
36
  options = Hash === index_definitions.last ? index_definitions.pop : {}
37
37
 
38
- @indexes = Internals::Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
38
+ @indexes = Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
39
39
  searching options[:tokenizer]
40
40
  boost options[:weights]
41
41
 
@@ -54,11 +54,11 @@ class Search
54
54
  @tokenizer = if options.respond_to?(:tokenize)
55
55
  options
56
56
  else
57
- options && Internals::Tokenizers::Query.new(options)
57
+ options && Tokenizers::Query.new(options)
58
58
  end
59
59
  end
60
60
  def tokenizer
61
- @tokenizer || Internals::Tokenizers::Query.default
61
+ @tokenizer || Tokenizers::Query.default
62
62
  end
63
63
  # TODO Doc. Spec.
64
64
  #
@@ -82,14 +82,14 @@ class Search
82
82
  # Picky will raise a Query::Indexes::DifferentTypesError.
83
83
  #
84
84
  @@mapping = {
85
- Index::Memory => Internals::Query::Combinations::Memory,
86
- Index::Redis => Internals::Query::Combinations::Redis
85
+ Index::Memory => Query::Combinations::Memory,
86
+ Index::Redis => Query::Combinations::Redis
87
87
  }
88
88
  def combinations_type_for index_definitions_ary
89
89
  index_types = index_definitions_ary.map(&:class)
90
90
  index_types.uniq!
91
91
  raise_different(index_types) if index_types.size > 1
92
- !index_types.empty? && @@mapping[*index_types] || Internals::Query::Combinations::Memory
92
+ !index_types.empty? && @@mapping[*index_types] || Query::Combinations::Memory
93
93
  end
94
94
  # Currently it isn't possible using Memory and Redis etc.
95
95
  # indexes in the same query index group.
@@ -87,7 +87,7 @@ module Sources
87
87
  def take_snapshot index
88
88
  connect_backend
89
89
 
90
- origin = snapshot_table_name index
90
+ origin = snapshot_table_name index.name
91
91
  on_database = database.connection
92
92
 
93
93
  # Drop the table if it exists.
@@ -109,16 +109,16 @@ module Sources
109
109
 
110
110
  # Counts all the entries that are used for the index.
111
111
  #
112
- def count index
112
+ def count index_name
113
113
  connect_backend
114
114
 
115
- database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index)}").to_i
115
+ database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
116
116
  end
117
117
 
118
118
  # The name of the snapshot table created by Picky.
119
119
  #
120
- def snapshot_table_name index
121
- "picky_#{index.name}_index"
120
+ def snapshot_table_name index_name
121
+ "picky_#{index_name}_index"
122
122
  end
123
123
 
124
124
  # Harvests the data to index in chunks.
@@ -126,7 +126,7 @@ module Sources
126
126
  def harvest category, &block
127
127
  connect_backend
128
128
 
129
- (0..count(category.index)).step(chunksize) do |offset|
129
+ (0..count(category.index_name)).step(chunksize) do |offset|
130
130
  get_data category, offset, &block
131
131
  end
132
132
  end
@@ -166,7 +166,7 @@ module Sources
166
166
  # The harvest statement used to pull data from the snapshot table.
167
167
  #
168
168
  def harvest_statement category
169
- "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index)} st"
169
+ "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
170
170
  end
171
171
 
172
172
  # The amount of records that are loaded each chunk.
@@ -10,7 +10,7 @@ module Sources
10
10
 
11
11
  def initialize source, grid, precision = 1
12
12
  super source
13
- @calculation = Internals::Calculations::Location.new grid, precision
13
+ @calculation = Calculations::Location.new grid, precision
14
14
  end
15
15
 
16
16
  # Yield the data (id, text for id) for the given category.
@@ -42,7 +42,7 @@ module Sources
42
42
 
43
43
  # TODO Move to the right place.
44
44
  #
45
- category.exact[:location_minimum] = minimum
45
+ category.indexing_exact[:location_minimum] = minimum
46
46
  end
47
47
 
48
48
  end
@@ -0,0 +1,224 @@
1
+ module Tokenizers # :nodoc:all
2
+
3
+ # Defines tokenizing processes used both in indexing and querying.
4
+ #
5
+ class Base
6
+
7
+ # TODO Move EMPTY_STRING top level.
8
+ #
9
+ EMPTY_STRING = ''.freeze
10
+
11
+ def to_s
12
+ reject_condition_location = @reject_condition.to_s[/:(\d+) \(lambda\)/, 1]
13
+ <<-TOKENIZER
14
+ Removes characters: #{@removes_characters_regexp ? "/#{@removes_characters_regexp.source}/" : '-'}
15
+ Stopwords: #{@remove_stopwords_regexp ? "/#{@remove_stopwords_regexp.source}/" : '-'}
16
+ Splits text on: #{@splits_text_on.respond_to?(:source) ? "/#{@splits_text_on.source}/" : (@splits_text_on ? @splits_text_on : '-')}
17
+ Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@removes_characters_after_splitting_regexp.source}/" : '-'}
18
+ Normalizes words: #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
19
+ Rejects tokens? #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
20
+ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-' }
21
+ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
22
+ TOKENIZER
23
+ end
24
+
25
+ # Stopwords.
26
+ #
27
+ # We only allow regexps (even if string would be okay
28
+ # too for gsub! - it's too hard to understand)
29
+ #
30
+ def stopwords regexp
31
+ check_argument_in __method__, Regexp, regexp
32
+ @remove_stopwords_regexp = regexp
33
+ end
34
+ def remove_stopwords text
35
+ text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
36
+ text
37
+ end
38
+ @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
39
+ def remove_non_single_stopwords text
40
+ return text if text.match @@non_single_stopword_regexp
41
+ remove_stopwords text
42
+ end
43
+
44
+ # Illegals.
45
+ #
46
+ # We only allow regexps (even if string would be okay
47
+ # too for gsub! - it's too hard to understand)
48
+ #
49
+ def removes_characters regexp
50
+ check_argument_in __method__, Regexp, regexp
51
+ @removes_characters_regexp = regexp
52
+ end
53
+ def remove_illegals text
54
+ text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
55
+ text
56
+ end
57
+
58
+ # Splitting.
59
+ #
60
+ # We allow Strings and Regexps.
61
+ # Note: We do not test against to_str since symbols do not work with String#split.
62
+ #
63
+ def splits_text_on regexp_or_string
64
+ raise ArgumentError.new "#{__method__} takes a Regexp or String as argument, not a #{regexp_or_string.class}." unless Regexp === regexp_or_string || String === regexp_or_string
65
+ @splits_text_on = regexp_or_string
66
+ end
67
+ def split text
68
+ text.split @splits_text_on
69
+ end
70
+
71
+ # Normalizing.
72
+ #
73
+ # We only allow arrays.
74
+ #
75
+ def normalizes_words regexp_replaces
76
+ raise ArgumentError.new "#{__method__} takes an Array of replaces as argument, not a #{regexp_replaces.class}." unless regexp_replaces.respond_to?(:to_ary)
77
+ @normalizes_words_regexp_replaces = regexp_replaces
78
+ end
79
+ def normalize_with_patterns text
80
+ return text unless @normalizes_words_regexp_replaces
81
+
82
+ @normalizes_words_regexp_replaces.each do |regex, replace|
83
+ # This should be sufficient
84
+ #
85
+ text.gsub!(regex, replace) and break
86
+ end
87
+ remove_after_normalizing_illegals text
88
+ text
89
+ end
90
+
91
+ # Illegal after normalizing.
92
+ #
93
+ # We only allow regexps (even if string would be okay
94
+ # too for gsub! - it's too hard to understand)
95
+ #
96
+ def removes_characters_after_splitting regexp
97
+ check_argument_in __method__, Regexp, regexp
98
+ @removes_characters_after_splitting_regexp = regexp
99
+ end
100
+ def remove_after_normalizing_illegals text
101
+ text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
102
+ end
103
+
104
+ # Substitute Characters with this substituter.
105
+ #
106
+ # Default is European Character substitution.
107
+ #
108
+ def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
109
+ raise ArgumentError.new "The substitutes_characters_with option needs a character substituter, which responds to #substitute." unless substituter.respond_to?(:substitute)
110
+ @substituter = substituter
111
+ end
112
+ def substitute_characters text
113
+ substituter?? substituter.substitute(text) : text
114
+ end
115
+
116
+ # Reject tokens after tokenizing based on the given criteria.
117
+ #
118
+ # Note: Currently only for indexing.
119
+ #
120
+ def reject_token_if &condition
121
+ @reject_condition = condition
122
+ end
123
+ def reject tokens
124
+ tokens.reject! &@reject_condition
125
+ end
126
+
127
+ def case_sensitive case_sensitive
128
+ @case_sensitive = case_sensitive
129
+ end
130
+ def downcase?
131
+ !@case_sensitive
132
+ end
133
+
134
+ # Checks if the right argument type has been given.
135
+ #
136
+ def check_argument_in method, type, argument, &condition
137
+ raise ArgumentError.new "Application##{method} takes a #{type} as argument, not a #{argument.class}." unless type === argument
138
+ end
139
+
140
+
141
+ # Returns a number of tokens, generated from the given text.
142
+ #
143
+ # Note:
144
+ # * preprocess, pretokenize are hooks
145
+ #
146
+ def tokenize text
147
+ text = preprocess text # processing the text
148
+ return empty_tokens if text.blank?
149
+ words = pretokenize text # splitting and preparations for tokenizing
150
+ return empty_tokens if words.empty?
151
+ tokens = tokens_for words # creating tokens / strings
152
+ process tokens # processing tokens / strings
153
+ end
154
+
155
+ attr_reader :substituter
156
+ alias substituter? substituter
157
+
158
+ def initialize options = {}
159
+ removes_characters options[:removes_characters] if options[:removes_characters]
160
+ contracts_expressions *options[:contracts_expressions] if options[:contracts_expressions]
161
+ stopwords options[:stopwords] if options[:stopwords]
162
+ normalizes_words options[:normalizes_words] if options[:normalizes_words]
163
+ removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
164
+ substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
165
+ case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil?
166
+
167
+ # Defaults.
168
+ #
169
+ splits_text_on options[:splits_text_on] || /\s/
170
+ reject_token_if &(options[:reject_token_if] || options[:rejects_token_if] || :blank?) # TODO Decide on using an s or not.
171
+ end
172
+
173
+ # Default preprocessing hook.
174
+ #
175
+ # Does:
176
+ # 1. Character substitution.
177
+ # 2. Remove illegal expressions.
178
+ # 3. Remove non-single stopwords. (Stopwords that occur with other words)
179
+ #
180
+ def preprocess text
181
+ text = substitute_characters text
182
+ remove_illegals text
183
+ # We do not remove single stopwords e.g. in the indexer for
184
+ # an entirely different reason than in the query tokenizer.
185
+ # An indexed thing with just name "UND" (a possible stopword)
186
+ # should not lose its name.
187
+ #
188
+ remove_non_single_stopwords text
189
+ text
190
+ end
191
+ # Pretokenizing.
192
+ #
193
+ # Does:
194
+ # 1. Split the text into words.
195
+ # 2. Normalize each word.
196
+ #
197
+ def pretokenize text
198
+ words = split text
199
+ words.collect! do |word|
200
+ normalize_with_patterns word
201
+ word
202
+ end
203
+ end
204
+ # Basic postprocessing (overridden in both query/index tokenizers).
205
+ #
206
+ def process tokens
207
+ reject tokens # Reject any tokens that don't meet criteria
208
+ tokens
209
+ end
210
+
211
+ # # Converts words into real tokens.
212
+ # #
213
+ # def tokens_for words
214
+ # Query::Tokens.new words.collect! { |word| token_for word }
215
+ # end
216
+ # Turns non-blank text into symbols.
217
+ #
218
+ def symbolize text
219
+ text.blank? ? nil : text.to_sym
220
+ end
221
+
222
+ end
223
+
224
+ end
@@ -0,0 +1,30 @@
1
+ module Tokenizers
2
+
3
+ # The base indexing tokenizer.
4
+ #
5
+ # Override in indexing subclasses and define in configuration.
6
+ #
7
+ class Index < Base
8
+
9
+ def self.default= new_default
10
+ @default = new_default
11
+ end
12
+ def self.default
13
+ @default ||= new
14
+ end
15
+
16
+ # Does not actually return a token, but a
17
+ # symbol "token".
18
+ #
19
+ def tokens_for words
20
+ words.collect! { |word| word.downcase! if downcase?; word.to_sym }
21
+ end
22
+ # Returns empty tokens.
23
+ #
24
+ def empty_tokens
25
+ []
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,49 @@
1
+ module Tokenizers
2
+
3
+ class Location < Base
4
+
5
+ attr_reader :calculation
6
+
7
+ def initialize options = {}
8
+ super options
9
+
10
+ grid = options[:grid]
11
+ precision = options[:precision] || 1
12
+
13
+ @calculation = Calculations::Location.new grid, precision
14
+
15
+ @minimum = 1.0 / 0
16
+
17
+ @locations = []
18
+ end
19
+
20
+ # TODO Work on this!
21
+ #
22
+ def tokenize text
23
+
24
+ # Gather min/max.
25
+ #
26
+ source.harvest category do |indexed_id, location|
27
+ location = location.to_f
28
+ minimum = location if location < minimum
29
+ locations << [indexed_id, location]
30
+ end
31
+
32
+ calculation.minimum = minimum
33
+
34
+ # Recalculate locations.
35
+ #
36
+ locations.each do |indexed_id, location|
37
+ calculation.recalculated_range(location).each do |new_location|
38
+ yield indexed_id, new_location.to_s
39
+ end
40
+ end
41
+
42
+ # TODO Move to the right place.
43
+ #
44
+ category.indexing_exact[:location_minimum] = minimum
45
+ end
46
+
47
+ end
48
+
49
+ end
@@ -0,0 +1,55 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Tokenizers
4
+
5
+ # There are a few class methods that you can use to configure how a query works.
6
+ #
7
+ # removes_characters regexp
8
+ # illegal_after_normalizing regexp
9
+ # stopwords regexp
10
+ # contracts_expressions regexp, to_string
11
+ # splits_text_on regexp
12
+ # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
13
+ #
14
+ class Query < Base
15
+
16
+ def self.default= new_default
17
+ @default = new_default
18
+ end
19
+ def self.default
20
+ @default ||= new
21
+ end
22
+
23
+ attr_reader :maximum_tokens
24
+
25
+ def initialize options = {}
26
+ super options
27
+ @maximum_tokens = options[:maximum_tokens] || 5
28
+ end
29
+
30
+ # Let each token process itself.
31
+ # Reject, limit, and partialize tokens.
32
+ #
33
+ # In querying we work with real tokens (in indexing it's just symbols).
34
+ #
35
+ def process tokens
36
+ tokens.reject # Reject any tokens that don't meet criteria.
37
+ tokens.cap maximum_tokens # Cut off superfluous tokens.
38
+ tokens.partialize_last # Set certain tokens as partial.
39
+ tokens
40
+ end
41
+
42
+ # Converts words into real tokens.
43
+ #
44
+ def tokens_for words
45
+ ::Query::Tokens.processed words, downcase?
46
+ end
47
+ # Returns a tokens object.
48
+ #
49
+ def empty_tokens
50
+ ::Query::Tokens.new
51
+ end
52
+
53
+ end
54
+
55
+ end
data/lib/tasks/index.rake CHANGED
@@ -23,9 +23,10 @@ namespace :index do
23
23
  desc "Generates a specific index from index snapshots (category optional)."
24
24
  task :specific, [:index, :category] => :application do |_, options|
25
25
  index, category = options.index, options.category
26
- specific_index = Indexes.find index.to_sym, (category && category.to_sym)
27
- specific_index.index!
28
- specific_index.cache!
26
+
27
+ specific = Indexes[index]
28
+ specific = specific[category] if category
29
+ specific.index
29
30
  end
30
31
 
31
32
  end
data/lib/tasks/try.rake CHANGED
@@ -6,7 +6,7 @@ namespace :try do
6
6
  task :index, [:text, :index, :category] => :application do |_, options|
7
7
  text, index, category = options.text, options.index, options.category
8
8
 
9
- tokenizer = category ? Indexes.find(index, category).tokenizer : Internals::Tokenizers::Index.default
9
+ tokenizer = category ? Indexes.find(index, category).tokenizer : Tokenizers::Index.default
10
10
 
11
11
  puts "\"#{text}\" is saved in the index as #{tokenizer.tokenize(text.dup).to_a}"
12
12
  end
@@ -15,7 +15,7 @@ namespace :try do
15
15
  task :query, [:text] => :application do |_, options|
16
16
  text = options.text
17
17
 
18
- puts "\"#{text}\" as a search will be preprocessed into #{Internals::Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
18
+ puts "\"#{text}\" as a search will be preprocessed into #{Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
19
19
  puts
20
20
  puts "(category qualifiers, e.g. title: are removed if they do not exist as a qualifier, so 'toitle:bla' -> 'bla')"
21
21
  end
@@ -2,7 +2,7 @@
2
2
  #
3
3
  require 'spec_helper'
4
4
 
5
- describe Internals::Adapters::Rack::Base do
5
+ describe Adapters::Rack::Base do
6
6
 
7
7
  before(:each) do
8
8
  @adapter = described_class.new
@@ -2,7 +2,7 @@
2
2
  #
3
3
  require 'spec_helper'
4
4
 
5
- describe Internals::Adapters::Rack::LiveParameters do
5
+ describe Adapters::Rack::LiveParameters do
6
6
 
7
7
  let(:live_parameters) { stub :live_parameters }
8
8
  let(:adapter) { described_class.new live_parameters }
@@ -2,7 +2,7 @@
2
2
  #
3
3
  require 'spec_helper'
4
4
 
5
- describe Internals::Adapters::Rack::Query do
5
+ describe Adapters::Rack::Query do
6
6
 
7
7
  before(:each) do
8
8
  @query = stub :query
@@ -15,8 +15,8 @@ describe Application do
15
15
 
16
16
  route %r{^/books} => Search.new(books)
17
17
  end
18
- Internals::Tokenizers::Index.default.tokenize 'some text'
19
- Internals::Tokenizers::Query.default.tokenize 'some text'
18
+ Tokenizers::Index.default.tokenize 'some text'
19
+ Tokenizers::Query.default.tokenize 'some text'
20
20
  }.should_not raise_error
21
21
  end
22
22
  it "should run ok" do
@@ -105,7 +105,7 @@ describe Application do
105
105
  lambda { Application.rack_adapter }.should_not raise_error
106
106
  end
107
107
  it "should return a new FrontendAdapters::Rack instance" do
108
- Application.rack_adapter.should be_kind_of(Internals::FrontendAdapters::Rack)
108
+ Application.rack_adapter.should be_kind_of(FrontendAdapters::Rack)
109
109
  end
110
110
  it "should cache the instance" do
111
111
  Application.rack_adapter.should == Application.rack_adapter
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Internals::Index::File::Basic do
3
+ describe Backend::File::Basic do
4
4
 
5
5
  let(:file) { described_class.new 'some/cache/path/to/file' }
6
6
 
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Internals::Index::File::JSON do
3
+ describe Backend::File::JSON do
4
4
 
5
5
  before(:each) do
6
6
  @file = described_class.new "some_cache_path"