picky 2.7.0 → 3.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (213) hide show
  1. data/lib/picky/adapters/rack/base.rb +20 -16
  2. data/lib/picky/adapters/rack/live_parameters.rb +28 -24
  3. data/lib/picky/adapters/rack/search.rb +67 -0
  4. data/lib/picky/adapters/rack.rb +27 -23
  5. data/lib/picky/application.rb +246 -236
  6. data/lib/picky/backend/base.rb +115 -119
  7. data/lib/picky/backend/file/basic.rb +102 -98
  8. data/lib/picky/backend/file/json.rb +27 -23
  9. data/lib/picky/backend/file/marshal.rb +32 -28
  10. data/lib/picky/backend/file/text.rb +45 -41
  11. data/lib/picky/backend/files.rb +19 -15
  12. data/lib/picky/backend/redis/basic.rb +76 -72
  13. data/lib/picky/backend/redis/list_hash.rb +40 -36
  14. data/lib/picky/backend/redis/string_hash.rb +30 -26
  15. data/lib/picky/backend/redis.rb +32 -28
  16. data/lib/picky/bundle.rb +82 -57
  17. data/lib/{bundling.rb → picky/bundling.rb} +0 -0
  18. data/lib/picky/calculations/location.rb +51 -47
  19. data/lib/picky/categories.rb +60 -56
  20. data/lib/picky/categories_indexed.rb +73 -82
  21. data/lib/picky/categories_indexing.rb +12 -8
  22. data/lib/picky/category.rb +109 -120
  23. data/lib/picky/category_indexed.rb +39 -41
  24. data/lib/picky/category_indexing.rb +123 -125
  25. data/lib/picky/character_substituters/west_european.rb +32 -26
  26. data/lib/{constants.rb → picky/constants.rb} +0 -0
  27. data/lib/picky/cores.rb +96 -92
  28. data/lib/{deployment.rb → picky/deployment.rb} +0 -0
  29. data/lib/picky/frontend_adapters/rack.rb +133 -118
  30. data/lib/picky/generators/aliases.rb +5 -3
  31. data/lib/picky/generators/base.rb +11 -7
  32. data/lib/picky/generators/partial/default.rb +7 -3
  33. data/lib/picky/generators/partial/none.rb +24 -20
  34. data/lib/picky/generators/partial/strategy.rb +20 -16
  35. data/lib/picky/generators/partial/substring.rb +94 -90
  36. data/lib/picky/generators/partial_generator.rb +11 -7
  37. data/lib/picky/generators/similarity/default.rb +9 -5
  38. data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
  39. data/lib/picky/generators/similarity/metaphone.rb +20 -16
  40. data/lib/picky/generators/similarity/none.rb +23 -19
  41. data/lib/picky/generators/similarity/phonetic.rb +49 -45
  42. data/lib/picky/generators/similarity/soundex.rb +20 -16
  43. data/lib/picky/generators/similarity/strategy.rb +10 -6
  44. data/lib/picky/generators/similarity_generator.rb +11 -7
  45. data/lib/picky/generators/strategy.rb +14 -10
  46. data/lib/picky/generators/weights/default.rb +9 -5
  47. data/lib/picky/generators/weights/logarithmic.rb +30 -26
  48. data/lib/picky/generators/weights/strategy.rb +10 -6
  49. data/lib/picky/generators/weights_generator.rb +11 -7
  50. data/lib/picky/helpers/measuring.rb +20 -16
  51. data/lib/picky/indexed/bundle/base.rb +39 -37
  52. data/lib/picky/indexed/bundle/memory.rb +68 -64
  53. data/lib/picky/indexed/bundle/redis.rb +73 -69
  54. data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
  55. data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
  56. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
  57. data/lib/picky/indexed/wrappers/category/location.rb +17 -13
  58. data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
  59. data/lib/picky/indexers/base.rb +26 -22
  60. data/lib/picky/indexers/parallel.rb +62 -58
  61. data/lib/picky/indexers/serial.rb +41 -37
  62. data/lib/picky/indexes/index.rb +400 -0
  63. data/lib/picky/indexes/index_indexed.rb +24 -0
  64. data/lib/picky/indexes/index_indexing.rb +138 -0
  65. data/lib/picky/indexes/memory.rb +20 -0
  66. data/lib/picky/indexes/redis.rb +20 -0
  67. data/lib/picky/indexes.rb +68 -61
  68. data/lib/picky/indexes_indexed.rb +16 -12
  69. data/lib/picky/indexes_indexing.rb +41 -37
  70. data/lib/picky/indexing/bundle/base.rb +216 -205
  71. data/lib/picky/indexing/bundle/memory.rb +16 -11
  72. data/lib/picky/indexing/bundle/redis.rb +14 -12
  73. data/lib/picky/indexing/wrappers/category/location.rb +17 -13
  74. data/lib/picky/interfaces/live_parameters.rb +159 -154
  75. data/lib/picky/loader.rb +267 -304
  76. data/lib/picky/loggers/search.rb +20 -13
  77. data/lib/picky/no_source_specified_exception.rb +7 -3
  78. data/lib/picky/performant.rb +6 -2
  79. data/lib/picky/query/allocation.rb +71 -67
  80. data/lib/picky/query/allocations.rb +99 -94
  81. data/lib/picky/query/combination.rb +70 -66
  82. data/lib/picky/query/combinations/base.rb +56 -52
  83. data/lib/picky/query/combinations/memory.rb +36 -32
  84. data/lib/picky/query/combinations/redis.rb +66 -62
  85. data/lib/picky/query/indexes.rb +175 -160
  86. data/lib/picky/query/qualifier_category_mapper.rb +43 -0
  87. data/lib/picky/query/token.rb +165 -172
  88. data/lib/picky/query/tokens.rb +86 -82
  89. data/lib/picky/query/weights.rb +44 -48
  90. data/lib/picky/query.rb +5 -1
  91. data/lib/picky/rack/harakiri.rb +51 -47
  92. data/lib/picky/results.rb +81 -77
  93. data/lib/picky/search.rb +169 -158
  94. data/lib/picky/sinatra.rb +34 -0
  95. data/lib/picky/sources/base.rb +73 -70
  96. data/lib/picky/sources/couch.rb +61 -57
  97. data/lib/picky/sources/csv.rb +68 -64
  98. data/lib/picky/sources/db.rb +139 -135
  99. data/lib/picky/sources/delicious.rb +52 -48
  100. data/lib/picky/sources/mongo.rb +68 -63
  101. data/lib/picky/sources/wrappers/base.rb +20 -16
  102. data/lib/picky/sources/wrappers/location.rb +37 -33
  103. data/lib/picky/statistics.rb +46 -43
  104. data/lib/picky/tasks.rb +3 -0
  105. data/lib/picky/tokenizers/base.rb +192 -187
  106. data/lib/picky/tokenizers/index.rb +25 -21
  107. data/lib/picky/tokenizers/location.rb +33 -29
  108. data/lib/picky/tokenizers/query.rb +49 -43
  109. data/lib/picky.rb +21 -13
  110. data/lib/tasks/application.rake +1 -1
  111. data/lib/tasks/index.rake +3 -3
  112. data/lib/tasks/routes.rake +1 -1
  113. data/lib/tasks/server.rake +1 -1
  114. data/spec/lib/adapters/rack/base_spec.rb +1 -1
  115. data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
  116. data/spec/lib/adapters/rack/query_spec.rb +1 -1
  117. data/spec/lib/application_spec.rb +39 -32
  118. data/spec/lib/backend/file/basic_spec.rb +2 -2
  119. data/spec/lib/backend/file/json_spec.rb +2 -2
  120. data/spec/lib/backend/file/marshal_spec.rb +2 -2
  121. data/spec/lib/backend/file/text_spec.rb +1 -1
  122. data/spec/lib/backend/files_spec.rb +14 -24
  123. data/spec/lib/backend/redis/basic_spec.rb +2 -2
  124. data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
  125. data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
  126. data/spec/lib/backend/redis_spec.rb +20 -13
  127. data/spec/lib/calculations/location_spec.rb +1 -1
  128. data/spec/lib/categories_indexed_spec.rb +16 -34
  129. data/spec/lib/category_indexed_spec.rb +9 -27
  130. data/spec/lib/category_indexing_spec.rb +2 -3
  131. data/spec/lib/category_spec.rb +10 -10
  132. data/spec/lib/character_substituters/west_european_spec.rb +6 -5
  133. data/spec/lib/cores_spec.rb +17 -17
  134. data/spec/lib/extensions/symbol_spec.rb +15 -1
  135. data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
  136. data/spec/lib/generators/aliases_spec.rb +3 -3
  137. data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
  138. data/spec/lib/generators/partial/default_spec.rb +3 -3
  139. data/spec/lib/generators/partial/none_spec.rb +2 -2
  140. data/spec/lib/generators/partial/substring_spec.rb +1 -1
  141. data/spec/lib/generators/partial_generator_spec.rb +3 -3
  142. data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
  143. data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
  144. data/spec/lib/generators/similarity/none_spec.rb +1 -1
  145. data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
  146. data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
  147. data/spec/lib/generators/similarity_generator_spec.rb +2 -2
  148. data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
  149. data/spec/lib/generators/weights_generator_spec.rb +1 -1
  150. data/spec/lib/helpers/measuring_spec.rb +2 -2
  151. data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
  152. data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
  153. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
  154. data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
  155. data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
  156. data/spec/lib/indexers/base_spec.rb +1 -1
  157. data/spec/lib/indexers/parallel_spec.rb +1 -1
  158. data/spec/lib/indexers/serial_spec.rb +1 -1
  159. data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
  160. data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
  161. data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
  162. data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
  163. data/spec/lib/indexes_class_spec.rb +2 -2
  164. data/spec/lib/indexes_indexed_spec.rb +1 -1
  165. data/spec/lib/indexes_indexing_spec.rb +1 -1
  166. data/spec/lib/indexes_spec.rb +1 -1
  167. data/spec/lib/indexing/bundle/base_spec.rb +7 -5
  168. data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
  169. data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
  170. data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
  171. data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
  172. data/spec/lib/loader_spec.rb +17 -19
  173. data/spec/lib/loggers/search_spec.rb +2 -2
  174. data/spec/lib/query/allocation_spec.rb +1 -1
  175. data/spec/lib/query/allocations_spec.rb +1 -1
  176. data/spec/lib/query/combination_spec.rb +4 -4
  177. data/spec/lib/query/combinations/base_spec.rb +1 -1
  178. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  179. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  180. data/spec/lib/query/indexes_spec.rb +7 -2
  181. data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
  182. data/spec/lib/query/token_spec.rb +32 -53
  183. data/spec/lib/query/tokens_spec.rb +30 -35
  184. data/spec/lib/query/weights_spec.rb +16 -16
  185. data/spec/lib/rack/harakiri_spec.rb +5 -5
  186. data/spec/lib/results_spec.rb +1 -1
  187. data/spec/lib/search_spec.rb +24 -22
  188. data/spec/lib/sinatra_spec.rb +36 -0
  189. data/spec/lib/sources/base_spec.rb +1 -1
  190. data/spec/lib/sources/couch_spec.rb +9 -9
  191. data/spec/lib/sources/csv_spec.rb +7 -7
  192. data/spec/lib/sources/db_spec.rb +2 -2
  193. data/spec/lib/sources/delicious_spec.rb +5 -5
  194. data/spec/lib/sources/mongo_spec.rb +7 -7
  195. data/spec/lib/sources/wrappers/base_spec.rb +2 -2
  196. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  197. data/spec/lib/statistics_spec.rb +1 -1
  198. data/spec/lib/tokenizers/base_spec.rb +2 -2
  199. data/spec/lib/tokenizers/index_spec.rb +1 -1
  200. data/spec/lib/tokenizers/query_spec.rb +1 -1
  201. metadata +30 -30
  202. data/lib/picky/adapters/rack/query.rb +0 -65
  203. data/lib/picky/index/base.rb +0 -409
  204. data/lib/picky/index/base_indexed.rb +0 -29
  205. data/lib/picky/index/base_indexing.rb +0 -127
  206. data/lib/picky/index/memory.rb +0 -16
  207. data/lib/picky/index/redis.rb +0 -16
  208. data/lib/picky/query/qualifiers.rb +0 -76
  209. data/lib/picky/query/solr.rb +0 -60
  210. data/lib/picky/signals.rb +0 -8
  211. data/lib/picky-tasks.rb +0 -6
  212. data/lib/tasks/spec.rake +0 -11
  213. data/spec/lib/query/qualifiers_spec.rb +0 -31
@@ -1,33 +1,37 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module Indexers
3
+ module Picky
4
4
 
5
- #
6
- #
7
- class Base
5
+ module Indexers
8
6
 
9
- attr_reader :index_or_category
7
+ #
8
+ #
9
+ class Base
10
10
 
11
- delegate :source, :to => :index_or_category
11
+ attr_reader :index_or_category
12
12
 
13
- def initialize index_or_category
14
- @index_or_category = index_or_category
15
- end
13
+ delegate :source, :to => :index_or_category
16
14
 
17
- # Starts the indexing process.
18
- #
19
- def index categories
20
- start_indexing_message
21
- prepare categories
22
- process categories
23
- finish_indexing_message
24
- end
15
+ def initialize index_or_category
16
+ @index_or_category = index_or_category
17
+ end
18
+
19
+ # Starts the indexing process.
20
+ #
21
+ def index categories
22
+ start_indexing_message
23
+ prepare categories
24
+ process categories
25
+ finish_indexing_message
26
+ end
27
+
28
+ # By default, an indexer
29
+ # * prepares the index directories.
30
+ #
31
+ def prepare categories
32
+ categories.each &:prepare_index_directory
33
+ end
25
34
 
26
- # By default, an indexer
27
- # * prepares the index directories.
28
- #
29
- def prepare categories
30
- categories.each &:prepare_index_directory
31
35
  end
32
36
 
33
37
  end
@@ -1,82 +1,86 @@
1
- # encoding: utf-8
1
+ # encoding: utf-8
2
2
  #
3
- module Indexers
3
+ module Picky
4
4
 
5
- # Uses a number of categories, a source, and a tokenizer to index data.
6
- #
7
- # The tokenizer is taken from each category if specified, from the index, if not.
8
- #
9
- class Parallel < Base
5
+ module Indexers
10
6
 
11
- # Process does the actual indexing.
7
+ # Uses a number of categories, a source, and a tokenizer to index data.
12
8
  #
13
- # Parameters:
14
- # * categories: An Enumerable of Category-s.
9
+ # The tokenizer is taken from each category if specified, from the index, if not.
15
10
  #
16
- def process categories
17
- comma = ?,
18
- newline = ?\n
11
+ class Parallel < Base
19
12
 
20
- # Prepare a combined object - array.
13
+ # Process does the actual indexing.
21
14
  #
22
- combined = categories.map do |category|
23
- [category, [], category.prepared_index_file, (category.tokenizer || tokenizer)]
24
- end
25
-
26
- # Index.
15
+ # Parameters:
16
+ # * categories: An Enumerable of Category-s.
27
17
  #
28
- # TODO Extract into flush_every(100_000) do
29
- #
30
- i = 0
18
+ def process categories
19
+ comma = ?,
20
+ newline = ?\n
31
21
 
32
- # Explicitly reset the source to avoid caching trouble.
33
- #
34
- source.reset if source.respond_to?(:reset)
22
+ # Prepare a combined object - array.
23
+ #
24
+ combined = categories.map do |category|
25
+ [category, [], category.prepared_index_file, (category.tokenizer || tokenizer)]
26
+ end
35
27
 
36
- # Go through each object in the source.
37
- #
38
- source.each do |object|
39
- id = object.id
28
+ # Index.
29
+ #
30
+ # TODO Extract into flush_every(100_000) do
31
+ #
32
+ i = 0
40
33
 
41
- # This needs to be rewritten.
34
+ # Explicitly reset the source to avoid caching trouble.
42
35
  #
43
- # Is it a good idea that not the tokenizer has control over when he gets the next text?
36
+ source.reset if source.respond_to?(:reset)
37
+
38
+ # Go through each object in the source.
44
39
  #
45
- combined.each do |category, cache, _, tokenizer|
46
- tokenizer.tokenize(object.send(category.from).to_s).each do |token_text|
47
- next unless token_text
48
- cache << id << comma << token_text << newline
40
+ source.each do |object|
41
+ id = object.id
42
+
43
+ # This needs to be rewritten.
44
+ #
45
+ # Is it a good idea that not the tokenizer has control over when he gets the next text?
46
+ #
47
+ combined.each do |category, cache, _, tokenizer|
48
+ tokenizer.tokenize(object.send(category.from).to_s).each do |token_text|
49
+ next unless token_text
50
+ cache << id << comma << token_text << newline
51
+ end
49
52
  end
50
- end
51
53
 
52
- if i >= 100_000
53
- flush combined
54
- i = 0
54
+ if i >= 100_000
55
+ flush combined
56
+ i = 0
57
+ end
58
+ i += 1
59
+ end
60
+ flush combined
61
+ combined.each do |_, _, file, _|
62
+ timed_exclaim %Q{"#{@index_or_category.identifier}": => #{file.path}.}
63
+ file.close
55
64
  end
56
- i += 1
57
65
  end
58
- flush combined
59
- combined.each do |_, _, file, _|
60
- timed_exclaim %Q{"#{@index_or_category.identifier}": => #{file.path}.}
61
- file.close
66
+
67
+ # Flush the combined array into the file.
68
+ #
69
+ def flush combined # :nodoc:
70
+ combined.each do |_, cache, file, _|
71
+ file.write(cache.join) && cache.clear
72
+ end
62
73
  end
63
- end
64
74
 
65
- # Flush the combined array into the file.
66
- #
67
- def flush combined # :nodoc:
68
- combined.each do |_, cache, file, _|
69
- file.write(cache.join) && cache.clear
75
+ #
76
+ #
77
+ def start_indexing_message # :nodoc:
78
+ timed_exclaim %Q{"#{@index_or_category.identifier}": Starting parallel data preparation.}
79
+ end
80
+ def finish_indexing_message # :nodoc:
81
+ timed_exclaim %Q{"#{@index_or_category.identifier}": Finished parallel data preparation.}
70
82
  end
71
- end
72
83
 
73
- #
74
- #
75
- def start_indexing_message # :nodoc:
76
- timed_exclaim %Q{"#{@index_or_category.identifier}": Starting parallel data preparation.}
77
- end
78
- def finish_indexing_message # :nodoc:
79
- timed_exclaim %Q{"#{@index_or_category.identifier}": Finished parallel data preparation.}
80
84
  end
81
85
 
82
86
  end
@@ -1,55 +1,59 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module Indexers
3
+ module Picky
4
4
 
5
- # Uses a category to index its data.
6
- #
7
- # Note: It is called serial since it indexes each category separately.
8
- #
9
- class Serial < Base
5
+ module Indexers
10
6
 
11
- # Harvest the data from the source, tokenize,
12
- # and write to an intermediate "prepared index" file.
7
+ # Uses a category to index its data.
13
8
  #
14
- # Parameters:
15
- # * categories: An enumerable of Category-s.
9
+ # Note: It is called serial since it indexes each category separately.
16
10
  #
17
- def process categories
18
- comma = ?,
19
- newline = ?\n
20
-
21
- categories.each do |category|
22
-
23
- tokenizer = category.tokenizer
11
+ class Serial < Base
12
+
13
+ # Harvest the data from the source, tokenize,
14
+ # and write to an intermediate "prepared index" file.
15
+ #
16
+ # Parameters:
17
+ # * categories: An enumerable of Category-s.
18
+ #
19
+ def process categories
20
+ comma = ?,
21
+ newline = ?\n
22
+
23
+ categories.each do |category|
24
+
25
+ tokenizer = category.tokenizer
26
+
27
+ category.prepared_index_file do |file|
28
+ result = []
29
+
30
+ source.harvest(category) do |indexed_id, text|
31
+ tokenizer.tokenize(text).each do |token_text|
32
+ next unless token_text
33
+ result << indexed_id << comma << token_text << newline
34
+ end
35
+ file.write(result.join) && result.clear if result.size > 100_000
36
+ end
24
37
 
25
- category.prepared_index_file do |file|
26
- result = []
38
+ timed_exclaim %Q{"#{@index_or_category.identifier}": => #{file.path}.}
27
39
 
28
- source.harvest(category) do |indexed_id, text|
29
- tokenizer.tokenize(text).each do |token_text|
30
- next unless token_text
31
- result << indexed_id << comma << token_text << newline
32
- end
33
- file.write(result.join) && result.clear if result.size > 100_000
40
+ file.write result.join
34
41
  end
35
42
 
36
- timed_exclaim %Q{"#{@index_or_category.identifier}": => #{file.path}.}
37
-
38
- file.write result.join
39
43
  end
40
44
 
41
45
  end
42
46
 
43
- end
47
+ #
48
+ #
49
+ def start_indexing_message # :nodoc:
50
+ timed_exclaim %Q{"#{@index_or_category.identifier}": Starting serial data preparation.}
51
+ end
52
+ def finish_indexing_message # :nodoc:
53
+ timed_exclaim %Q{"#{@index_or_category.identifier}": Finished serial data preparation.}
54
+ end
44
55
 
45
- #
46
- #
47
- def start_indexing_message # :nodoc:
48
- timed_exclaim %Q{"#{@index_or_category.identifier}": Starting serial data preparation.}
49
56
  end
50
- def finish_indexing_message # :nodoc:
51
- timed_exclaim %Q{"#{@index_or_category.identifier}": Finished serial data preparation.}
52
- end
53
-
54
57
  end
58
+
55
59
  end