picky 2.5.2 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -0,0 +1,103 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Query
4
+
5
+ # This class primarily handles switching through similar token constellations.
6
+ #
7
+ class Tokens # :nodoc:all
8
+
9
+ # Basically delegates to its internal tokens array.
10
+ #
11
+ self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
12
+
13
+ # Create a new Tokens object with the array of tokens passed in.
14
+ #
15
+ def initialize tokens = []
16
+ @tokens = tokens
17
+ end
18
+
19
+ # Creates a new Tokens object from a number of Strings.
20
+ #
21
+ # Options:
22
+ # * downcase: Whether to downcase the passed strings (default is true)
23
+ #
24
+ def self.processed words, downcase = true
25
+ new words.collect! { |word| Token.processed word, downcase }
26
+ end
27
+
28
+ # Tokenizes each token.
29
+ #
30
+ # Note: Passed tokenizer needs to offer #normalize(text).
31
+ #
32
+ def tokenize_with tokenizer
33
+ @tokens.each { |token| token.tokenize_with(tokenizer) }
34
+ end
35
+
36
+ # Generates an array in the form of
37
+ # [
38
+ # [combination], # of token 1
39
+ # [combination, combination, combination], # of token 2
40
+ # [combination, combination] # of token 3
41
+ # ]
42
+ #
43
+ def possible_combinations_in index
44
+ @tokens.inject([]) do |combinations, token|
45
+ possible_combinations = token.possible_combinations_in index
46
+
47
+ # TODO Could move the ignore_unassigned_tokens here!
48
+ #
49
+ # Note: Optimization for ignoring tokens that allocate to nothing and
50
+ # can be ignored.
51
+ # For example in a special search, where "florian" is not
52
+ # mapped to any category.
53
+ #
54
+ possible_combinations ? combinations << possible_combinations : combinations
55
+ end
56
+ end
57
+
58
+ # Makes the last of the tokens partial.
59
+ #
60
+ def partialize_last
61
+ @tokens.last.partial = true unless empty?
62
+ end
63
+
64
+ # Caps the tokens to the maximum.
65
+ #
66
+ def cap maximum
67
+ @tokens.slice!(maximum..-1) if cap?(maximum)
68
+ end
69
+ def cap? maximum
70
+ @tokens.size > maximum
71
+ end
72
+
73
+ # Rejects blank tokens.
74
+ #
75
+ def reject
76
+ @tokens.reject! &:blank?
77
+ end
78
+
79
+ # Returns a solr query.
80
+ #
81
+ def to_solr_query
82
+ @tokens.map(&:to_solr).join ' '
83
+ end
84
+
85
+ #
86
+ #
87
+ def originals
88
+ @tokens.map(&:original)
89
+ end
90
+
91
+ def == other
92
+ self.tokens == other.tokens
93
+ end
94
+
95
+ # Just join the token original texts.
96
+ #
97
+ def to_s
98
+ originals.join ' '
99
+ end
100
+
101
+ end
102
+
103
+ end
File without changes
data/lib/picky/results.rb CHANGED
@@ -10,7 +10,7 @@ class Results
10
10
 
11
11
  # Takes instances of Query::Allocations as param.
12
12
  #
13
- def initialize amount = 0, offset = 0, allocations = Internals::Query::Allocations.new
13
+ def initialize amount = 0, offset = 0, allocations = Query::Allocations.new
14
14
  @offset = offset
15
15
  @amount = amount
16
16
  @allocations = allocations
data/lib/picky/search.rb CHANGED
@@ -35,7 +35,7 @@ class Search
35
35
  def initialize *index_definitions
36
36
  options = Hash === index_definitions.last ? index_definitions.pop : {}
37
37
 
38
- @indexes = Internals::Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
38
+ @indexes = Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
39
39
  searching options[:tokenizer]
40
40
  boost options[:weights]
41
41
 
@@ -54,11 +54,11 @@ class Search
54
54
  @tokenizer = if options.respond_to?(:tokenize)
55
55
  options
56
56
  else
57
- options && Internals::Tokenizers::Query.new(options)
57
+ options && Tokenizers::Query.new(options)
58
58
  end
59
59
  end
60
60
  def tokenizer
61
- @tokenizer || Internals::Tokenizers::Query.default
61
+ @tokenizer || Tokenizers::Query.default
62
62
  end
63
63
  # TODO Doc. Spec.
64
64
  #
@@ -82,14 +82,14 @@ class Search
82
82
  # Picky will raise a Query::Indexes::DifferentTypesError.
83
83
  #
84
84
  @@mapping = {
85
- Index::Memory => Internals::Query::Combinations::Memory,
86
- Index::Redis => Internals::Query::Combinations::Redis
85
+ Index::Memory => Query::Combinations::Memory,
86
+ Index::Redis => Query::Combinations::Redis
87
87
  }
88
88
  def combinations_type_for index_definitions_ary
89
89
  index_types = index_definitions_ary.map(&:class)
90
90
  index_types.uniq!
91
91
  raise_different(index_types) if index_types.size > 1
92
- !index_types.empty? && @@mapping[*index_types] || Internals::Query::Combinations::Memory
92
+ !index_types.empty? && @@mapping[*index_types] || Query::Combinations::Memory
93
93
  end
94
94
  # Currently it isn't possible using Memory and Redis etc.
95
95
  # indexes in the same query index group.
@@ -87,7 +87,7 @@ module Sources
87
87
  def take_snapshot index
88
88
  connect_backend
89
89
 
90
- origin = snapshot_table_name index
90
+ origin = snapshot_table_name index.name
91
91
  on_database = database.connection
92
92
 
93
93
  # Drop the table if it exists.
@@ -109,16 +109,16 @@ module Sources
109
109
 
110
110
  # Counts all the entries that are used for the index.
111
111
  #
112
- def count index
112
+ def count index_name
113
113
  connect_backend
114
114
 
115
- database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index)}").to_i
115
+ database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
116
116
  end
117
117
 
118
118
  # The name of the snapshot table created by Picky.
119
119
  #
120
- def snapshot_table_name index
121
- "picky_#{index.name}_index"
120
+ def snapshot_table_name index_name
121
+ "picky_#{index_name}_index"
122
122
  end
123
123
 
124
124
  # Harvests the data to index in chunks.
@@ -126,7 +126,7 @@ module Sources
126
126
  def harvest category, &block
127
127
  connect_backend
128
128
 
129
- (0..count(category.index)).step(chunksize) do |offset|
129
+ (0..count(category.index_name)).step(chunksize) do |offset|
130
130
  get_data category, offset, &block
131
131
  end
132
132
  end
@@ -166,7 +166,7 @@ module Sources
166
166
  # The harvest statement used to pull data from the snapshot table.
167
167
  #
168
168
  def harvest_statement category
169
- "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index)} st"
169
+ "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
170
170
  end
171
171
 
172
172
  # The amount of records that are loaded each chunk.
@@ -10,7 +10,7 @@ module Sources
10
10
 
11
11
  def initialize source, grid, precision = 1
12
12
  super source
13
- @calculation = Internals::Calculations::Location.new grid, precision
13
+ @calculation = Calculations::Location.new grid, precision
14
14
  end
15
15
 
16
16
  # Yield the data (id, text for id) for the given category.
@@ -42,7 +42,7 @@ module Sources
42
42
 
43
43
  # TODO Move to the right place.
44
44
  #
45
- category.exact[:location_minimum] = minimum
45
+ category.indexing_exact[:location_minimum] = minimum
46
46
  end
47
47
 
48
48
  end
@@ -0,0 +1,224 @@
1
+ module Tokenizers # :nodoc:all
2
+
3
+ # Defines tokenizing processes used both in indexing and querying.
4
+ #
5
+ class Base
6
+
7
+ # TODO Move EMPTY_STRING top level.
8
+ #
9
+ EMPTY_STRING = ''.freeze
10
+
11
+ def to_s
12
+ reject_condition_location = @reject_condition.to_s[/:(\d+) \(lambda\)/, 1]
13
+ <<-TOKENIZER
14
+ Removes characters: #{@removes_characters_regexp ? "/#{@removes_characters_regexp.source}/" : '-'}
15
+ Stopwords: #{@remove_stopwords_regexp ? "/#{@remove_stopwords_regexp.source}/" : '-'}
16
+ Splits text on: #{@splits_text_on.respond_to?(:source) ? "/#{@splits_text_on.source}/" : (@splits_text_on ? @splits_text_on : '-')}
17
+ Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@removes_characters_after_splitting_regexp.source}/" : '-'}
18
+ Normalizes words: #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
19
+ Rejects tokens? #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
20
+ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-' }
21
+ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
22
+ TOKENIZER
23
+ end
24
+
25
+ # Stopwords.
26
+ #
27
+ # We only allow regexps (even if string would be okay
28
+ # too for gsub! - it's too hard to understand)
29
+ #
30
+ def stopwords regexp
31
+ check_argument_in __method__, Regexp, regexp
32
+ @remove_stopwords_regexp = regexp
33
+ end
34
+ def remove_stopwords text
35
+ text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
36
+ text
37
+ end
38
+ @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
39
+ def remove_non_single_stopwords text
40
+ return text if text.match @@non_single_stopword_regexp
41
+ remove_stopwords text
42
+ end
43
+
44
+ # Illegals.
45
+ #
46
+ # We only allow regexps (even if string would be okay
47
+ # too for gsub! - it's too hard to understand)
48
+ #
49
+ def removes_characters regexp
50
+ check_argument_in __method__, Regexp, regexp
51
+ @removes_characters_regexp = regexp
52
+ end
53
+ def remove_illegals text
54
+ text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
55
+ text
56
+ end
57
+
58
+ # Splitting.
59
+ #
60
+ # We allow Strings and Regexps.
61
+ # Note: We do not test against to_str since symbols do not work with String#split.
62
+ #
63
+ def splits_text_on regexp_or_string
64
+ raise ArgumentError.new "#{__method__} takes a Regexp or String as argument, not a #{regexp_or_string.class}." unless Regexp === regexp_or_string || String === regexp_or_string
65
+ @splits_text_on = regexp_or_string
66
+ end
67
+ def split text
68
+ text.split @splits_text_on
69
+ end
70
+
71
+ # Normalizing.
72
+ #
73
+ # We only allow arrays.
74
+ #
75
+ def normalizes_words regexp_replaces
76
+ raise ArgumentError.new "#{__method__} takes an Array of replaces as argument, not a #{regexp_replaces.class}." unless regexp_replaces.respond_to?(:to_ary)
77
+ @normalizes_words_regexp_replaces = regexp_replaces
78
+ end
79
+ def normalize_with_patterns text
80
+ return text unless @normalizes_words_regexp_replaces
81
+
82
+ @normalizes_words_regexp_replaces.each do |regex, replace|
83
+ # This should be sufficient
84
+ #
85
+ text.gsub!(regex, replace) and break
86
+ end
87
+ remove_after_normalizing_illegals text
88
+ text
89
+ end
90
+
91
+ # Illegal after normalizing.
92
+ #
93
+ # We only allow regexps (even if string would be okay
94
+ # too for gsub! - it's too hard to understand)
95
+ #
96
+ def removes_characters_after_splitting regexp
97
+ check_argument_in __method__, Regexp, regexp
98
+ @removes_characters_after_splitting_regexp = regexp
99
+ end
100
+ def remove_after_normalizing_illegals text
101
+ text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
102
+ end
103
+
104
+ # Substitute Characters with this substituter.
105
+ #
106
+ # Default is European Character substitution.
107
+ #
108
+ def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
109
+ raise ArgumentError.new "The substitutes_characters_with option needs a character substituter, which responds to #substitute." unless substituter.respond_to?(:substitute)
110
+ @substituter = substituter
111
+ end
112
+ def substitute_characters text
113
+ substituter?? substituter.substitute(text) : text
114
+ end
115
+
116
+ # Reject tokens after tokenizing based on the given criteria.
117
+ #
118
+ # Note: Currently only for indexing.
119
+ #
120
+ def reject_token_if &condition
121
+ @reject_condition = condition
122
+ end
123
+ def reject tokens
124
+ tokens.reject! &@reject_condition
125
+ end
126
+
127
+ def case_sensitive case_sensitive
128
+ @case_sensitive = case_sensitive
129
+ end
130
+ def downcase?
131
+ !@case_sensitive
132
+ end
133
+
134
+ # Checks if the right argument type has been given.
135
+ #
136
+ def check_argument_in method, type, argument, &condition
137
+ raise ArgumentError.new "Application##{method} takes a #{type} as argument, not a #{argument.class}." unless type === argument
138
+ end
139
+
140
+
141
+ # Returns a number of tokens, generated from the given text.
142
+ #
143
+ # Note:
144
+ # * preprocess, pretokenize are hooks
145
+ #
146
+ def tokenize text
147
+ text = preprocess text # processing the text
148
+ return empty_tokens if text.blank?
149
+ words = pretokenize text # splitting and preparations for tokenizing
150
+ return empty_tokens if words.empty?
151
+ tokens = tokens_for words # creating tokens / strings
152
+ process tokens # processing tokens / strings
153
+ end
154
+
155
+ attr_reader :substituter
156
+ alias substituter? substituter
157
+
158
+ def initialize options = {}
159
+ removes_characters options[:removes_characters] if options[:removes_characters]
160
+ contracts_expressions *options[:contracts_expressions] if options[:contracts_expressions]
161
+ stopwords options[:stopwords] if options[:stopwords]
162
+ normalizes_words options[:normalizes_words] if options[:normalizes_words]
163
+ removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
164
+ substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
165
+ case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil?
166
+
167
+ # Defaults.
168
+ #
169
+ splits_text_on options[:splits_text_on] || /\s/
170
+ reject_token_if &(options[:reject_token_if] || options[:rejects_token_if] || :blank?) # TODO Decide on using an s or not.
171
+ end
172
+
173
+ # Default preprocessing hook.
174
+ #
175
+ # Does:
176
+ # 1. Character substitution.
177
+ # 2. Remove illegal expressions.
178
+ # 3. Remove non-single stopwords. (Stopwords that occur with other words)
179
+ #
180
+ def preprocess text
181
+ text = substitute_characters text
182
+ remove_illegals text
183
+ # We do not remove single stopwords e.g. in the indexer for
184
+ # an entirely different reason than in the query tokenizer.
185
+ # An indexed thing with just name "UND" (a possible stopword)
186
+ # should not lose its name.
187
+ #
188
+ remove_non_single_stopwords text
189
+ text
190
+ end
191
+ # Pretokenizing.
192
+ #
193
+ # Does:
194
+ # 1. Split the text into words.
195
+ # 2. Normalize each word.
196
+ #
197
+ def pretokenize text
198
+ words = split text
199
+ words.collect! do |word|
200
+ normalize_with_patterns word
201
+ word
202
+ end
203
+ end
204
+ # Basic postprocessing (overridden in both query/index tokenizers).
205
+ #
206
+ def process tokens
207
+ reject tokens # Reject any tokens that don't meet criteria
208
+ tokens
209
+ end
210
+
211
+ # # Converts words into real tokens.
212
+ # #
213
+ # def tokens_for words
214
+ # Query::Tokens.new words.collect! { |word| token_for word }
215
+ # end
216
+ # Turns non-blank text into symbols.
217
+ #
218
+ def symbolize text
219
+ text.blank? ? nil : text.to_sym
220
+ end
221
+
222
+ end
223
+
224
+ end
@@ -0,0 +1,30 @@
1
+ module Tokenizers
2
+
3
+ # The base indexing tokenizer.
4
+ #
5
+ # Override in indexing subclasses and define in configuration.
6
+ #
7
+ class Index < Base
8
+
9
+ def self.default= new_default
10
+ @default = new_default
11
+ end
12
+ def self.default
13
+ @default ||= new
14
+ end
15
+
16
+ # Does not actually return a token, but a
17
+ # symbol "token".
18
+ #
19
+ def tokens_for words
20
+ words.collect! { |word| word.downcase! if downcase?; word.to_sym }
21
+ end
22
+ # Returns empty tokens.
23
+ #
24
+ def empty_tokens
25
+ []
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,49 @@
1
+ module Tokenizers
2
+
3
+ class Location < Base
4
+
5
+ attr_reader :calculation
6
+
7
+ def initialize options = {}
8
+ super options
9
+
10
+ grid = options[:grid]
11
+ precision = options[:precision] || 1
12
+
13
+ @calculation = Calculations::Location.new grid, precision
14
+
15
+ @minimum = 1.0 / 0
16
+
17
+ @locations = []
18
+ end
19
+
20
+ # TODO Work on this!
21
+ #
22
+ def tokenize text
23
+
24
+ # Gather min/max.
25
+ #
26
+ source.harvest category do |indexed_id, location|
27
+ location = location.to_f
28
+ minimum = location if location < minimum
29
+ locations << [indexed_id, location]
30
+ end
31
+
32
+ calculation.minimum = minimum
33
+
34
+ # Recalculate locations.
35
+ #
36
+ locations.each do |indexed_id, location|
37
+ calculation.recalculated_range(location).each do |new_location|
38
+ yield indexed_id, new_location.to_s
39
+ end
40
+ end
41
+
42
+ # TODO Move to the right place.
43
+ #
44
+ category.indexing_exact[:location_minimum] = minimum
45
+ end
46
+
47
+ end
48
+
49
+ end
@@ -0,0 +1,55 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Tokenizers
4
+
5
+ # There are a few class methods that you can use to configure how a query works.
6
+ #
7
+ # removes_characters regexp
8
+ # illegal_after_normalizing regexp
9
+ # stopwords regexp
10
+ # contracts_expressions regexp, to_string
11
+ # splits_text_on regexp
12
+ # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
13
+ #
14
+ class Query < Base
15
+
16
+ def self.default= new_default
17
+ @default = new_default
18
+ end
19
+ def self.default
20
+ @default ||= new
21
+ end
22
+
23
+ attr_reader :maximum_tokens
24
+
25
+ def initialize options = {}
26
+ super options
27
+ @maximum_tokens = options[:maximum_tokens] || 5
28
+ end
29
+
30
+ # Let each token process itself.
31
+ # Reject, limit, and partialize tokens.
32
+ #
33
+ # In querying we work with real tokens (in indexing it's just symbols).
34
+ #
35
+ def process tokens
36
+ tokens.reject # Reject any tokens that don't meet criteria.
37
+ tokens.cap maximum_tokens # Cut off superfluous tokens.
38
+ tokens.partialize_last # Set certain tokens as partial.
39
+ tokens
40
+ end
41
+
42
+ # Converts words into real tokens.
43
+ #
44
+ def tokens_for words
45
+ ::Query::Tokens.processed words, downcase?
46
+ end
47
+ # Returns a tokens object.
48
+ #
49
+ def empty_tokens
50
+ ::Query::Tokens.new
51
+ end
52
+
53
+ end
54
+
55
+ end
data/lib/tasks/index.rake CHANGED
@@ -23,9 +23,10 @@ namespace :index do
23
23
  desc "Generates a specific index from index snapshots (category optional)."
24
24
  task :specific, [:index, :category] => :application do |_, options|
25
25
  index, category = options.index, options.category
26
- specific_index = Indexes.find index.to_sym, (category && category.to_sym)
27
- specific_index.index!
28
- specific_index.cache!
26
+
27
+ specific = Indexes[index]
28
+ specific = specific[category] if category
29
+ specific.index
29
30
  end
30
31
 
31
32
  end
data/lib/tasks/try.rake CHANGED
@@ -6,7 +6,7 @@ namespace :try do
6
6
  task :index, [:text, :index, :category] => :application do |_, options|
7
7
  text, index, category = options.text, options.index, options.category
8
8
 
9
- tokenizer = category ? Indexes.find(index, category).tokenizer : Internals::Tokenizers::Index.default
9
+ tokenizer = category ? Indexes.find(index, category).tokenizer : Tokenizers::Index.default
10
10
 
11
11
  puts "\"#{text}\" is saved in the index as #{tokenizer.tokenize(text.dup).to_a}"
12
12
  end
@@ -15,7 +15,7 @@ namespace :try do
15
15
  task :query, [:text] => :application do |_, options|
16
16
  text = options.text
17
17
 
18
- puts "\"#{text}\" as a search will be preprocessed into #{Internals::Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
18
+ puts "\"#{text}\" as a search will be preprocessed into #{Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
19
19
  puts
20
20
  puts "(category qualifiers, e.g. title: are removed if they do not exist as a qualifier, so 'toitle:bla' -> 'bla')"
21
21
  end
@@ -2,7 +2,7 @@
2
2
  #
3
3
  require 'spec_helper'
4
4
 
5
- describe Internals::Adapters::Rack::Base do
5
+ describe Adapters::Rack::Base do
6
6
 
7
7
  before(:each) do
8
8
  @adapter = described_class.new
@@ -2,7 +2,7 @@
2
2
  #
3
3
  require 'spec_helper'
4
4
 
5
- describe Internals::Adapters::Rack::LiveParameters do
5
+ describe Adapters::Rack::LiveParameters do
6
6
 
7
7
  let(:live_parameters) { stub :live_parameters }
8
8
  let(:adapter) { described_class.new live_parameters }
@@ -2,7 +2,7 @@
2
2
  #
3
3
  require 'spec_helper'
4
4
 
5
- describe Internals::Adapters::Rack::Query do
5
+ describe Adapters::Rack::Query do
6
6
 
7
7
  before(:each) do
8
8
  @query = stub :query
@@ -15,8 +15,8 @@ describe Application do
15
15
 
16
16
  route %r{^/books} => Search.new(books)
17
17
  end
18
- Internals::Tokenizers::Index.default.tokenize 'some text'
19
- Internals::Tokenizers::Query.default.tokenize 'some text'
18
+ Tokenizers::Index.default.tokenize 'some text'
19
+ Tokenizers::Query.default.tokenize 'some text'
20
20
  }.should_not raise_error
21
21
  end
22
22
  it "should run ok" do
@@ -105,7 +105,7 @@ describe Application do
105
105
  lambda { Application.rack_adapter }.should_not raise_error
106
106
  end
107
107
  it "should return a new FrontendAdapters::Rack instance" do
108
- Application.rack_adapter.should be_kind_of(Internals::FrontendAdapters::Rack)
108
+ Application.rack_adapter.should be_kind_of(FrontendAdapters::Rack)
109
109
  end
110
110
  it "should cache the instance" do
111
111
  Application.rack_adapter.should == Application.rack_adapter
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Internals::Index::File::Basic do
3
+ describe Backend::File::Basic do
4
4
 
5
5
  let(:file) { described_class.new 'some/cache/path/to/file' }
6
6
 
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Internals::Index::File::JSON do
3
+ describe Backend::File::JSON do
4
4
 
5
5
  before(:each) do
6
6
  @file = described_class.new "some_cache_path"