picky 2.5.2 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -1,109 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Internals
4
-
5
- #
6
- #
7
- module Query
8
-
9
- # This class primarily handles switching through similar token constellations.
10
- #
11
- class Tokens # :nodoc:all
12
-
13
- # Basically delegates to its internal tokens array.
14
- #
15
- self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
16
-
17
- # Create a new Tokens object with the array of tokens passed in.
18
- #
19
- def initialize tokens = []
20
- @tokens = tokens
21
- end
22
-
23
- # Creates a new Tokens object from a number of Strings.
24
- #
25
- # Options:
26
- # * downcase: Whether to downcase the passed strings (default is true)
27
- #
28
- def self.processed words, downcase = true
29
- new words.collect! { |word| Token.processed word, downcase }
30
- end
31
-
32
- # Tokenizes each token.
33
- #
34
- # Note: Passed tokenizer needs to offer #normalize(text).
35
- #
36
- def tokenize_with tokenizer
37
- @tokens.each { |token| token.tokenize_with(tokenizer) }
38
- end
39
-
40
- # Generates an array in the form of
41
- # [
42
- # [combination], # of token 1
43
- # [combination, combination, combination], # of token 2
44
- # [combination, combination] # of token 3
45
- # ]
46
- #
47
- def possible_combinations_in index
48
- @tokens.inject([]) do |combinations, token|
49
- possible_combinations = token.possible_combinations_in index
50
-
51
- # TODO Could move the ignore_unassigned_tokens here!
52
- #
53
- # Note: Optimization for ignoring tokens that allocate to nothing and
54
- # can be ignored.
55
- # For example in a special search, where "florian" is not
56
- # mapped to any category.
57
- #
58
- possible_combinations ? combinations << possible_combinations : combinations
59
- end
60
- end
61
-
62
- # Makes the last of the tokens partial.
63
- #
64
- def partialize_last
65
- @tokens.last.partial = true unless empty?
66
- end
67
-
68
- # Caps the tokens to the maximum.
69
- #
70
- def cap maximum
71
- @tokens.slice!(maximum..-1) if cap?(maximum)
72
- end
73
- def cap? maximum
74
- @tokens.size > maximum
75
- end
76
-
77
- # Rejects blank tokens.
78
- #
79
- def reject
80
- @tokens.reject! &:blank?
81
- end
82
-
83
- # Returns a solr query.
84
- #
85
- def to_solr_query
86
- @tokens.map(&:to_solr).join ' '
87
- end
88
-
89
- #
90
- #
91
- def originals
92
- @tokens.map(&:original)
93
- end
94
-
95
- def == other
96
- self.tokens == other.tokens
97
- end
98
-
99
- # Just join the token original texts.
100
- #
101
- def to_s
102
- originals.join ' '
103
- end
104
-
105
- end
106
-
107
- end
108
-
109
- end
@@ -1,52 +0,0 @@
1
- module Internals
2
- module Shared
3
-
4
- module Category
5
-
6
- def index_name
7
- index.name
8
- end
9
- def category_name
10
- name
11
- end
12
-
13
- # Path and partial filename of a specific index on this category.
14
- #
15
- def index_path bundle_name, type
16
- "#{index_directory}/#{name}_#{bundle_name}_#{type}"
17
- end
18
-
19
- #
20
- #
21
- def prepared_index_path
22
- @prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
23
- end
24
- def prepared_index_file &block
25
- @prepared_index_file ||= Internals::Index::File::Text.new prepared_index_path
26
- @prepared_index_file.open_for_indexing &block
27
- end
28
-
29
- # Identifier for internal use.
30
- #
31
- def identifier
32
- @identifier ||= "#{index.name}:#{name}"
33
- end
34
- def to_s
35
- "#{index.name} #{name}"
36
- end
37
-
38
- # The index directory for this category.
39
- #
40
- def index_directory
41
- @index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{index.name}"
42
- end
43
- # Creates the index directory including all necessary paths above it.
44
- #
45
- def prepare_index_directory
46
- FileUtils.mkdir_p index_directory
47
- end
48
-
49
- end
50
-
51
- end
52
- end
@@ -1,228 +0,0 @@
1
- module Internals
2
-
3
- module Tokenizers # :nodoc:all
4
-
5
- # Defines tokenizing processes used both in indexing and querying.
6
- #
7
- class Base
8
-
9
- # TODO Move EMPTY_STRING top level.
10
- #
11
- EMPTY_STRING = ''.freeze
12
-
13
- def to_s
14
- reject_condition_location = @reject_condition.to_s[/:(\d+) \(lambda\)/, 1]
15
- <<-TOKENIZER
16
- Removes characters: #{@removes_characters_regexp ? "/#{@removes_characters_regexp.source}/" : '-'}
17
- Stopwords: #{@remove_stopwords_regexp ? "/#{@remove_stopwords_regexp.source}/" : '-'}
18
- Splits text on: #{@splits_text_on.respond_to?(:source) ? "/#{@splits_text_on.source}/" : (@splits_text_on ? @splits_text_on : '-')}
19
- Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@removes_characters_after_splitting_regexp.source}/" : '-'}
20
- Normalizes words: #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
21
- Rejects tokens? #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
22
- Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-' }
23
- Case sensitive? #{@case_sensitive ? "Yes." : "-"}
24
- TOKENIZER
25
- end
26
-
27
- # Stopwords.
28
- #
29
- # We only allow regexps (even if string would be okay
30
- # too for gsub! - it's too hard to understand)
31
- #
32
- def stopwords regexp
33
- check_argument_in __method__, Regexp, regexp
34
- @remove_stopwords_regexp = regexp
35
- end
36
- def remove_stopwords text
37
- text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
38
- text
39
- end
40
- @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
41
- def remove_non_single_stopwords text
42
- return text if text.match @@non_single_stopword_regexp
43
- remove_stopwords text
44
- end
45
-
46
- # Illegals.
47
- #
48
- # We only allow regexps (even if string would be okay
49
- # too for gsub! - it's too hard to understand)
50
- #
51
- def removes_characters regexp
52
- check_argument_in __method__, Regexp, regexp
53
- @removes_characters_regexp = regexp
54
- end
55
- def remove_illegals text
56
- text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
57
- text
58
- end
59
-
60
- # Splitting.
61
- #
62
- # We allow Strings and Regexps.
63
- # Note: We do not test against to_str since symbols do not work with String#split.
64
- #
65
- def splits_text_on regexp_or_string
66
- raise ArgumentError.new "#{__method__} takes a Regexp or String as argument, not a #{regexp_or_string.class}." unless Regexp === regexp_or_string || String === regexp_or_string
67
- @splits_text_on = regexp_or_string
68
- end
69
- def split text
70
- text.split @splits_text_on
71
- end
72
-
73
- # Normalizing.
74
- #
75
- # We only allow arrays.
76
- #
77
- def normalizes_words regexp_replaces
78
- raise ArgumentError.new "#{__method__} takes an Array of replaces as argument, not a #{regexp_replaces.class}." unless regexp_replaces.respond_to?(:to_ary)
79
- @normalizes_words_regexp_replaces = regexp_replaces
80
- end
81
- def normalize_with_patterns text
82
- return text unless @normalizes_words_regexp_replaces
83
-
84
- @normalizes_words_regexp_replaces.each do |regex, replace|
85
- # This should be sufficient
86
- #
87
- text.gsub!(regex, replace) and break
88
- end
89
- remove_after_normalizing_illegals text
90
- text
91
- end
92
-
93
- # Illegal after normalizing.
94
- #
95
- # We only allow regexps (even if string would be okay
96
- # too for gsub! - it's too hard to understand)
97
- #
98
- def removes_characters_after_splitting regexp
99
- check_argument_in __method__, Regexp, regexp
100
- @removes_characters_after_splitting_regexp = regexp
101
- end
102
- def remove_after_normalizing_illegals text
103
- text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
104
- end
105
-
106
- # Substitute Characters with this substituter.
107
- #
108
- # Default is European Character substitution.
109
- #
110
- def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
111
- raise ArgumentError.new "The substitutes_characters_with option needs a character substituter, which responds to #substitute." unless substituter.respond_to?(:substitute)
112
- @substituter = substituter
113
- end
114
- def substitute_characters text
115
- substituter?? substituter.substitute(text) : text
116
- end
117
-
118
- # Reject tokens after tokenizing based on the given criteria.
119
- #
120
- # Note: Currently only for indexing.
121
- #
122
- def reject_token_if &condition
123
- @reject_condition = condition
124
- end
125
- def reject tokens
126
- tokens.reject! &@reject_condition
127
- end
128
-
129
- def case_sensitive case_sensitive
130
- @case_sensitive = case_sensitive
131
- end
132
- def downcase?
133
- !@case_sensitive
134
- end
135
-
136
- # Checks if the right argument type has been given.
137
- #
138
- def check_argument_in method, type, argument, &condition
139
- raise ArgumentError.new "Application##{method} takes a #{type} as argument, not a #{argument.class}." unless type === argument
140
- end
141
-
142
-
143
- # Returns a number of tokens, generated from the given text.
144
- #
145
- # Note:
146
- # * preprocess, pretokenize are hooks
147
- #
148
- def tokenize text
149
- text = preprocess text # processing the text
150
- return empty_tokens if text.blank?
151
- words = pretokenize text # splitting and preparations for tokenizing
152
- return empty_tokens if words.empty?
153
- tokens = tokens_for words # creating tokens / strings
154
- process tokens # processing tokens / strings
155
- end
156
-
157
- attr_reader :substituter
158
- alias substituter? substituter
159
-
160
- def initialize options = {}
161
- removes_characters options[:removes_characters] if options[:removes_characters]
162
- contracts_expressions *options[:contracts_expressions] if options[:contracts_expressions]
163
- stopwords options[:stopwords] if options[:stopwords]
164
- normalizes_words options[:normalizes_words] if options[:normalizes_words]
165
- removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
166
- substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
167
- case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil?
168
-
169
- # Defaults.
170
- #
171
- splits_text_on options[:splits_text_on] || /\s/
172
- reject_token_if &(options[:reject_token_if] || options[:rejects_token_if] || :blank?) # TODO Decide on using an s or not.
173
- end
174
-
175
- # Default preprocessing hook.
176
- #
177
- # Does:
178
- # 1. Character substitution.
179
- # 2. Remove illegal expressions.
180
- # 3. Remove non-single stopwords. (Stopwords that occur with other words)
181
- #
182
- def preprocess text
183
- text = substitute_characters text
184
- remove_illegals text
185
- # We do not remove single stopwords e.g. in the indexer for
186
- # an entirely different reason than in the query tokenizer.
187
- # An indexed thing with just name "UND" (a possible stopword)
188
- # should not lose its name.
189
- #
190
- remove_non_single_stopwords text
191
- text
192
- end
193
- # Pretokenizing.
194
- #
195
- # Does:
196
- # 1. Split the text into words.
197
- # 2. Normalize each word.
198
- #
199
- def pretokenize text
200
- words = split text
201
- words.collect! do |word|
202
- normalize_with_patterns word
203
- word
204
- end
205
- end
206
- # Basic postprocessing (overridden in both query/index tokenizers).
207
- #
208
- def process tokens
209
- reject tokens # Reject any tokens that don't meet criteria
210
- tokens
211
- end
212
-
213
- # # Converts words into real tokens.
214
- # #
215
- # def tokens_for words
216
- # Internals::Query::Tokens.new words.collect! { |word| token_for word }
217
- # end
218
- # Turns non-blank text into symbols.
219
- #
220
- def symbolize text
221
- text.blank? ? nil : text.to_sym
222
- end
223
-
224
- end
225
-
226
- end
227
-
228
- end
@@ -1,34 +0,0 @@
1
- module Internals
2
-
3
- module Tokenizers
4
-
5
- # The base indexing tokenizer.
6
- #
7
- # Override in indexing subclasses and define in configuration.
8
- #
9
- class Index < Base
10
-
11
- def self.default= new_default
12
- @default = new_default
13
- end
14
- def self.default
15
- @default ||= new
16
- end
17
-
18
- # Does not actually return a token, but a
19
- # symbol "token".
20
- #
21
- def tokens_for words
22
- words.collect! { |word| word.downcase! if downcase?; word.to_sym }
23
- end
24
- # Returns empty tokens.
25
- #
26
- def empty_tokens
27
- []
28
- end
29
-
30
- end
31
-
32
- end
33
-
34
- end
@@ -1,54 +0,0 @@
1
- module Internals
2
-
3
- module Tokenizers
4
-
5
-
6
- class Location < Base
7
-
8
- attr_reader :calculation
9
-
10
- def initialize options = {}
11
- super options
12
-
13
- grid = options[:grid]
14
- precision = options[:precision] || 1
15
-
16
- @calculation = Internals::Calculations::Location.new grid, precision
17
-
18
- @minimum = 1.0 / 0
19
-
20
- @locations = []
21
- end
22
-
23
- # TODO Work on this!
24
- #
25
- def tokenize text
26
-
27
- # Gather min/max.
28
- #
29
- source.harvest category do |indexed_id, location|
30
- location = location.to_f
31
- minimum = location if location < minimum
32
- locations << [indexed_id, location]
33
- end
34
-
35
- calculation.minimum = minimum
36
-
37
- # Recalculate locations.
38
- #
39
- locations.each do |indexed_id, location|
40
- calculation.recalculated_range(location).each do |new_location|
41
- yield indexed_id, new_location.to_s
42
- end
43
- end
44
-
45
- # TODO Move to the right place.
46
- #
47
- category.exact[:location_minimum] = minimum
48
- end
49
-
50
- end
51
-
52
- end
53
-
54
- end
@@ -1,59 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Internals
4
-
5
- module Tokenizers
6
-
7
- # There are a few class methods that you can use to configure how a query works.
8
- #
9
- # removes_characters regexp
10
- # illegal_after_normalizing regexp
11
- # stopwords regexp
12
- # contracts_expressions regexp, to_string
13
- # splits_text_on regexp
14
- # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
15
- #
16
- class Query < Base
17
-
18
- def self.default= new_default
19
- @default = new_default
20
- end
21
- def self.default
22
- @default ||= new
23
- end
24
-
25
- attr_reader :maximum_tokens
26
-
27
- def initialize options = {}
28
- super options
29
- @maximum_tokens = options[:maximum_tokens] || 5
30
- end
31
-
32
- # Let each token process itself.
33
- # Reject, limit, and partialize tokens.
34
- #
35
- # In querying we work with real tokens (in indexing it's just symbols).
36
- #
37
- def process tokens
38
- tokens.reject # Reject any tokens that don't meet criteria.
39
- tokens.cap maximum_tokens # Cut off superfluous tokens.
40
- tokens.partialize_last # Set certain tokens as partial.
41
- tokens
42
- end
43
-
44
- # Converts words into real tokens.
45
- #
46
- def tokens_for words
47
- Internals::Query::Tokens.processed words, downcase?
48
- end
49
- # Returns a tokens object.
50
- #
51
- def empty_tokens
52
- Internals::Query::Tokens.new
53
- end
54
-
55
- end
56
-
57
- end
58
-
59
- end
@@ -1,2 +0,0 @@
1
- module Internals # :nodoc:all
2
- end
@@ -1,9 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- require 'spec_helper'
4
-
5
- describe 'aliases' do
6
- it 'exists an Indexes class that is an instance of API::Indexes' do
7
- Indexes.class.should == IndexBundle
8
- end
9
- end
@@ -1,69 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- require 'spec_helper'
4
-
5
- describe IndexBundle do
6
-
7
- let(:some_index) { stub :index, :name => :some_index, :internal_indexed => :indexed_index, :internal_indexing => :indexing_index }
8
- let(:indexes) { described_class.new }
9
- let(:indexed) { stub :indexed, :register => nil }
10
- let(:indexing) { stub :indexing, :register => nil }
11
-
12
- before(:each) do
13
- indexes.stub! :indexing => indexing
14
- indexes.stub! :indexed => indexed
15
- end
16
-
17
- def self.it_delegates method, receiver
18
- it "delegates #{method} to #{receiver}" do
19
- indexes.send(receiver).should_receive(method.to_sym).once
20
-
21
- indexes.send method
22
- end
23
- end
24
-
25
- describe 'delegation' do
26
- it_delegates :reload, :indexed
27
- it_delegates :load_from_cache, :indexed
28
-
29
- it_delegates :check_caches, :indexing
30
- it_delegates :find, :indexing
31
- it_delegates :index, :indexing
32
- it_delegates :index_for_tests, :indexing
33
- end
34
-
35
- describe '[]' do
36
- before(:each) do
37
- indexes.register some_index
38
- end
39
- it 'takes strings' do
40
- indexes['some_index'].should == some_index
41
- end
42
- it 'takes symbols' do
43
- indexes[:some_index].should == some_index
44
- end
45
- end
46
-
47
- describe 'register' do
48
- it 'registers with the indexes' do
49
- indexes.register some_index
50
-
51
- indexes.indexes.should == [some_index]
52
- end
53
- it 'registers with the index map' do
54
- indexes.register some_index
55
-
56
- indexes[some_index.name].should == some_index
57
- end
58
- it 'registers with the indexing' do
59
- indexing.should_receive(:register).once.with :indexing_index
60
-
61
- indexes.register some_index
62
- end
63
- it 'registers with the indexed' do
64
- indexed.should_receive(:register).once.with :indexed_index
65
-
66
- indexes.register some_index
67
- end
68
- end
69
- end