picky 2.5.2 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -1,109 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Internals
4
-
5
- #
6
- #
7
- module Query
8
-
9
- # This class primarily handles switching through similar token constellations.
10
- #
11
- class Tokens # :nodoc:all
12
-
13
- # Basically delegates to its internal tokens array.
14
- #
15
- self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
16
-
17
- # Create a new Tokens object with the array of tokens passed in.
18
- #
19
- def initialize tokens = []
20
- @tokens = tokens
21
- end
22
-
23
- # Creates a new Tokens object from a number of Strings.
24
- #
25
- # Options:
26
- # * downcase: Whether to downcase the passed strings (default is true)
27
- #
28
- def self.processed words, downcase = true
29
- new words.collect! { |word| Token.processed word, downcase }
30
- end
31
-
32
- # Tokenizes each token.
33
- #
34
- # Note: Passed tokenizer needs to offer #normalize(text).
35
- #
36
- def tokenize_with tokenizer
37
- @tokens.each { |token| token.tokenize_with(tokenizer) }
38
- end
39
-
40
- # Generates an array in the form of
41
- # [
42
- # [combination], # of token 1
43
- # [combination, combination, combination], # of token 2
44
- # [combination, combination] # of token 3
45
- # ]
46
- #
47
- def possible_combinations_in index
48
- @tokens.inject([]) do |combinations, token|
49
- possible_combinations = token.possible_combinations_in index
50
-
51
- # TODO Could move the ignore_unassigned_tokens here!
52
- #
53
- # Note: Optimization for ignoring tokens that allocate to nothing and
54
- # can be ignored.
55
- # For example in a special search, where "florian" is not
56
- # mapped to any category.
57
- #
58
- possible_combinations ? combinations << possible_combinations : combinations
59
- end
60
- end
61
-
62
- # Makes the last of the tokens partial.
63
- #
64
- def partialize_last
65
- @tokens.last.partial = true unless empty?
66
- end
67
-
68
- # Caps the tokens to the maximum.
69
- #
70
- def cap maximum
71
- @tokens.slice!(maximum..-1) if cap?(maximum)
72
- end
73
- def cap? maximum
74
- @tokens.size > maximum
75
- end
76
-
77
- # Rejects blank tokens.
78
- #
79
- def reject
80
- @tokens.reject! &:blank?
81
- end
82
-
83
- # Returns a solr query.
84
- #
85
- def to_solr_query
86
- @tokens.map(&:to_solr).join ' '
87
- end
88
-
89
- #
90
- #
91
- def originals
92
- @tokens.map(&:original)
93
- end
94
-
95
- def == other
96
- self.tokens == other.tokens
97
- end
98
-
99
- # Just join the token original texts.
100
- #
101
- def to_s
102
- originals.join ' '
103
- end
104
-
105
- end
106
-
107
- end
108
-
109
- end
@@ -1,52 +0,0 @@
1
- module Internals
2
- module Shared
3
-
4
- module Category
5
-
6
- def index_name
7
- index.name
8
- end
9
- def category_name
10
- name
11
- end
12
-
13
- # Path and partial filename of a specific index on this category.
14
- #
15
- def index_path bundle_name, type
16
- "#{index_directory}/#{name}_#{bundle_name}_#{type}"
17
- end
18
-
19
- #
20
- #
21
- def prepared_index_path
22
- @prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
23
- end
24
- def prepared_index_file &block
25
- @prepared_index_file ||= Internals::Index::File::Text.new prepared_index_path
26
- @prepared_index_file.open_for_indexing &block
27
- end
28
-
29
- # Identifier for internal use.
30
- #
31
- def identifier
32
- @identifier ||= "#{index.name}:#{name}"
33
- end
34
- def to_s
35
- "#{index.name} #{name}"
36
- end
37
-
38
- # The index directory for this category.
39
- #
40
- def index_directory
41
- @index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{index.name}"
42
- end
43
- # Creates the index directory including all necessary paths above it.
44
- #
45
- def prepare_index_directory
46
- FileUtils.mkdir_p index_directory
47
- end
48
-
49
- end
50
-
51
- end
52
- end
@@ -1,228 +0,0 @@
1
- module Internals
2
-
3
- module Tokenizers # :nodoc:all
4
-
5
- # Defines tokenizing processes used both in indexing and querying.
6
- #
7
- class Base
8
-
9
- # TODO Move EMPTY_STRING top level.
10
- #
11
- EMPTY_STRING = ''.freeze
12
-
13
- def to_s
14
- reject_condition_location = @reject_condition.to_s[/:(\d+) \(lambda\)/, 1]
15
- <<-TOKENIZER
16
- Removes characters: #{@removes_characters_regexp ? "/#{@removes_characters_regexp.source}/" : '-'}
17
- Stopwords: #{@remove_stopwords_regexp ? "/#{@remove_stopwords_regexp.source}/" : '-'}
18
- Splits text on: #{@splits_text_on.respond_to?(:source) ? "/#{@splits_text_on.source}/" : (@splits_text_on ? @splits_text_on : '-')}
19
- Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@removes_characters_after_splitting_regexp.source}/" : '-'}
20
- Normalizes words: #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
21
- Rejects tokens? #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
22
- Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-' }
23
- Case sensitive? #{@case_sensitive ? "Yes." : "-"}
24
- TOKENIZER
25
- end
26
-
27
- # Stopwords.
28
- #
29
- # We only allow regexps (even if string would be okay
30
- # too for gsub! - it's too hard to understand)
31
- #
32
- def stopwords regexp
33
- check_argument_in __method__, Regexp, regexp
34
- @remove_stopwords_regexp = regexp
35
- end
36
- def remove_stopwords text
37
- text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
38
- text
39
- end
40
- @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
41
- def remove_non_single_stopwords text
42
- return text if text.match @@non_single_stopword_regexp
43
- remove_stopwords text
44
- end
45
-
46
- # Illegals.
47
- #
48
- # We only allow regexps (even if string would be okay
49
- # too for gsub! - it's too hard to understand)
50
- #
51
- def removes_characters regexp
52
- check_argument_in __method__, Regexp, regexp
53
- @removes_characters_regexp = regexp
54
- end
55
- def remove_illegals text
56
- text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
57
- text
58
- end
59
-
60
- # Splitting.
61
- #
62
- # We allow Strings and Regexps.
63
- # Note: We do not test against to_str since symbols do not work with String#split.
64
- #
65
- def splits_text_on regexp_or_string
66
- raise ArgumentError.new "#{__method__} takes a Regexp or String as argument, not a #{regexp_or_string.class}." unless Regexp === regexp_or_string || String === regexp_or_string
67
- @splits_text_on = regexp_or_string
68
- end
69
- def split text
70
- text.split @splits_text_on
71
- end
72
-
73
- # Normalizing.
74
- #
75
- # We only allow arrays.
76
- #
77
- def normalizes_words regexp_replaces
78
- raise ArgumentError.new "#{__method__} takes an Array of replaces as argument, not a #{regexp_replaces.class}." unless regexp_replaces.respond_to?(:to_ary)
79
- @normalizes_words_regexp_replaces = regexp_replaces
80
- end
81
- def normalize_with_patterns text
82
- return text unless @normalizes_words_regexp_replaces
83
-
84
- @normalizes_words_regexp_replaces.each do |regex, replace|
85
- # This should be sufficient
86
- #
87
- text.gsub!(regex, replace) and break
88
- end
89
- remove_after_normalizing_illegals text
90
- text
91
- end
92
-
93
- # Illegal after normalizing.
94
- #
95
- # We only allow regexps (even if string would be okay
96
- # too for gsub! - it's too hard to understand)
97
- #
98
- def removes_characters_after_splitting regexp
99
- check_argument_in __method__, Regexp, regexp
100
- @removes_characters_after_splitting_regexp = regexp
101
- end
102
- def remove_after_normalizing_illegals text
103
- text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
104
- end
105
-
106
- # Substitute Characters with this substituter.
107
- #
108
- # Default is European Character substitution.
109
- #
110
- def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
111
- raise ArgumentError.new "The substitutes_characters_with option needs a character substituter, which responds to #substitute." unless substituter.respond_to?(:substitute)
112
- @substituter = substituter
113
- end
114
- def substitute_characters text
115
- substituter?? substituter.substitute(text) : text
116
- end
117
-
118
- # Reject tokens after tokenizing based on the given criteria.
119
- #
120
- # Note: Currently only for indexing.
121
- #
122
- def reject_token_if &condition
123
- @reject_condition = condition
124
- end
125
- def reject tokens
126
- tokens.reject! &@reject_condition
127
- end
128
-
129
- def case_sensitive case_sensitive
130
- @case_sensitive = case_sensitive
131
- end
132
- def downcase?
133
- !@case_sensitive
134
- end
135
-
136
- # Checks if the right argument type has been given.
137
- #
138
- def check_argument_in method, type, argument, &condition
139
- raise ArgumentError.new "Application##{method} takes a #{type} as argument, not a #{argument.class}." unless type === argument
140
- end
141
-
142
-
143
- # Returns a number of tokens, generated from the given text.
144
- #
145
- # Note:
146
- # * preprocess, pretokenize are hooks
147
- #
148
- def tokenize text
149
- text = preprocess text # processing the text
150
- return empty_tokens if text.blank?
151
- words = pretokenize text # splitting and preparations for tokenizing
152
- return empty_tokens if words.empty?
153
- tokens = tokens_for words # creating tokens / strings
154
- process tokens # processing tokens / strings
155
- end
156
-
157
- attr_reader :substituter
158
- alias substituter? substituter
159
-
160
- def initialize options = {}
161
- removes_characters options[:removes_characters] if options[:removes_characters]
162
- contracts_expressions *options[:contracts_expressions] if options[:contracts_expressions]
163
- stopwords options[:stopwords] if options[:stopwords]
164
- normalizes_words options[:normalizes_words] if options[:normalizes_words]
165
- removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
166
- substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
167
- case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil?
168
-
169
- # Defaults.
170
- #
171
- splits_text_on options[:splits_text_on] || /\s/
172
- reject_token_if &(options[:reject_token_if] || options[:rejects_token_if] || :blank?) # TODO Decide on using an s or not.
173
- end
174
-
175
- # Default preprocessing hook.
176
- #
177
- # Does:
178
- # 1. Character substitution.
179
- # 2. Remove illegal expressions.
180
- # 3. Remove non-single stopwords. (Stopwords that occur with other words)
181
- #
182
- def preprocess text
183
- text = substitute_characters text
184
- remove_illegals text
185
- # We do not remove single stopwords e.g. in the indexer for
186
- # an entirely different reason than in the query tokenizer.
187
- # An indexed thing with just name "UND" (a possible stopword)
188
- # should not lose its name.
189
- #
190
- remove_non_single_stopwords text
191
- text
192
- end
193
- # Pretokenizing.
194
- #
195
- # Does:
196
- # 1. Split the text into words.
197
- # 2. Normalize each word.
198
- #
199
- def pretokenize text
200
- words = split text
201
- words.collect! do |word|
202
- normalize_with_patterns word
203
- word
204
- end
205
- end
206
- # Basic postprocessing (overridden in both query/index tokenizers).
207
- #
208
- def process tokens
209
- reject tokens # Reject any tokens that don't meet criteria
210
- tokens
211
- end
212
-
213
- # # Converts words into real tokens.
214
- # #
215
- # def tokens_for words
216
- # Internals::Query::Tokens.new words.collect! { |word| token_for word }
217
- # end
218
- # Turns non-blank text into symbols.
219
- #
220
- def symbolize text
221
- text.blank? ? nil : text.to_sym
222
- end
223
-
224
- end
225
-
226
- end
227
-
228
- end
@@ -1,34 +0,0 @@
1
- module Internals
2
-
3
- module Tokenizers
4
-
5
- # The base indexing tokenizer.
6
- #
7
- # Override in indexing subclasses and define in configuration.
8
- #
9
- class Index < Base
10
-
11
- def self.default= new_default
12
- @default = new_default
13
- end
14
- def self.default
15
- @default ||= new
16
- end
17
-
18
- # Does not actually return a token, but a
19
- # symbol "token".
20
- #
21
- def tokens_for words
22
- words.collect! { |word| word.downcase! if downcase?; word.to_sym }
23
- end
24
- # Returns empty tokens.
25
- #
26
- def empty_tokens
27
- []
28
- end
29
-
30
- end
31
-
32
- end
33
-
34
- end
@@ -1,54 +0,0 @@
1
- module Internals
2
-
3
- module Tokenizers
4
-
5
-
6
- class Location < Base
7
-
8
- attr_reader :calculation
9
-
10
- def initialize options = {}
11
- super options
12
-
13
- grid = options[:grid]
14
- precision = options[:precision] || 1
15
-
16
- @calculation = Internals::Calculations::Location.new grid, precision
17
-
18
- @minimum = 1.0 / 0
19
-
20
- @locations = []
21
- end
22
-
23
- # TODO Work on this!
24
- #
25
- def tokenize text
26
-
27
- # Gather min/max.
28
- #
29
- source.harvest category do |indexed_id, location|
30
- location = location.to_f
31
- minimum = location if location < minimum
32
- locations << [indexed_id, location]
33
- end
34
-
35
- calculation.minimum = minimum
36
-
37
- # Recalculate locations.
38
- #
39
- locations.each do |indexed_id, location|
40
- calculation.recalculated_range(location).each do |new_location|
41
- yield indexed_id, new_location.to_s
42
- end
43
- end
44
-
45
- # TODO Move to the right place.
46
- #
47
- category.exact[:location_minimum] = minimum
48
- end
49
-
50
- end
51
-
52
- end
53
-
54
- end
@@ -1,59 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Internals
4
-
5
- module Tokenizers
6
-
7
- # There are a few class methods that you can use to configure how a query works.
8
- #
9
- # removes_characters regexp
10
- # illegal_after_normalizing regexp
11
- # stopwords regexp
12
- # contracts_expressions regexp, to_string
13
- # splits_text_on regexp
14
- # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
15
- #
16
- class Query < Base
17
-
18
- def self.default= new_default
19
- @default = new_default
20
- end
21
- def self.default
22
- @default ||= new
23
- end
24
-
25
- attr_reader :maximum_tokens
26
-
27
- def initialize options = {}
28
- super options
29
- @maximum_tokens = options[:maximum_tokens] || 5
30
- end
31
-
32
- # Let each token process itself.
33
- # Reject, limit, and partialize tokens.
34
- #
35
- # In querying we work with real tokens (in indexing it's just symbols).
36
- #
37
- def process tokens
38
- tokens.reject # Reject any tokens that don't meet criteria.
39
- tokens.cap maximum_tokens # Cut off superfluous tokens.
40
- tokens.partialize_last # Set certain tokens as partial.
41
- tokens
42
- end
43
-
44
- # Converts words into real tokens.
45
- #
46
- def tokens_for words
47
- Internals::Query::Tokens.processed words, downcase?
48
- end
49
- # Returns a tokens object.
50
- #
51
- def empty_tokens
52
- Internals::Query::Tokens.new
53
- end
54
-
55
- end
56
-
57
- end
58
-
59
- end
@@ -1,2 +0,0 @@
1
- module Internals # :nodoc:all
2
- end
@@ -1,9 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- require 'spec_helper'
4
-
5
- describe 'aliases' do
6
- it 'exists an Indexes class that is an instance of API::Indexes' do
7
- Indexes.class.should == IndexBundle
8
- end
9
- end
@@ -1,69 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- require 'spec_helper'
4
-
5
- describe IndexBundle do
6
-
7
- let(:some_index) { stub :index, :name => :some_index, :internal_indexed => :indexed_index, :internal_indexing => :indexing_index }
8
- let(:indexes) { described_class.new }
9
- let(:indexed) { stub :indexed, :register => nil }
10
- let(:indexing) { stub :indexing, :register => nil }
11
-
12
- before(:each) do
13
- indexes.stub! :indexing => indexing
14
- indexes.stub! :indexed => indexed
15
- end
16
-
17
- def self.it_delegates method, receiver
18
- it "delegates #{method} to #{receiver}" do
19
- indexes.send(receiver).should_receive(method.to_sym).once
20
-
21
- indexes.send method
22
- end
23
- end
24
-
25
- describe 'delegation' do
26
- it_delegates :reload, :indexed
27
- it_delegates :load_from_cache, :indexed
28
-
29
- it_delegates :check_caches, :indexing
30
- it_delegates :find, :indexing
31
- it_delegates :index, :indexing
32
- it_delegates :index_for_tests, :indexing
33
- end
34
-
35
- describe '[]' do
36
- before(:each) do
37
- indexes.register some_index
38
- end
39
- it 'takes strings' do
40
- indexes['some_index'].should == some_index
41
- end
42
- it 'takes symbols' do
43
- indexes[:some_index].should == some_index
44
- end
45
- end
46
-
47
- describe 'register' do
48
- it 'registers with the indexes' do
49
- indexes.register some_index
50
-
51
- indexes.indexes.should == [some_index]
52
- end
53
- it 'registers with the index map' do
54
- indexes.register some_index
55
-
56
- indexes[some_index.name].should == some_index
57
- end
58
- it 'registers with the indexing' do
59
- indexing.should_receive(:register).once.with :indexing_index
60
-
61
- indexes.register some_index
62
- end
63
- it 'registers with the indexed' do
64
- indexed.should_receive(:register).once.with :indexed_index
65
-
66
- indexes.register some_index
67
- end
68
- end
69
- end