picky 2.7.0 → 3.0.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. data/lib/picky/adapters/rack/base.rb +20 -16
  2. data/lib/picky/adapters/rack/live_parameters.rb +28 -24
  3. data/lib/picky/adapters/rack/search.rb +67 -0
  4. data/lib/picky/adapters/rack.rb +27 -23
  5. data/lib/picky/application.rb +246 -236
  6. data/lib/picky/backend/base.rb +115 -119
  7. data/lib/picky/backend/file/basic.rb +102 -98
  8. data/lib/picky/backend/file/json.rb +27 -23
  9. data/lib/picky/backend/file/marshal.rb +32 -28
  10. data/lib/picky/backend/file/text.rb +45 -41
  11. data/lib/picky/backend/files.rb +19 -15
  12. data/lib/picky/backend/redis/basic.rb +76 -72
  13. data/lib/picky/backend/redis/list_hash.rb +40 -36
  14. data/lib/picky/backend/redis/string_hash.rb +30 -26
  15. data/lib/picky/backend/redis.rb +32 -28
  16. data/lib/picky/bundle.rb +82 -57
  17. data/lib/{bundling.rb → picky/bundling.rb} +0 -0
  18. data/lib/picky/calculations/location.rb +51 -47
  19. data/lib/picky/categories.rb +60 -56
  20. data/lib/picky/categories_indexed.rb +73 -82
  21. data/lib/picky/categories_indexing.rb +12 -8
  22. data/lib/picky/category.rb +109 -120
  23. data/lib/picky/category_indexed.rb +39 -41
  24. data/lib/picky/category_indexing.rb +123 -125
  25. data/lib/picky/character_substituters/west_european.rb +32 -26
  26. data/lib/{constants.rb → picky/constants.rb} +0 -0
  27. data/lib/picky/cores.rb +96 -92
  28. data/lib/{deployment.rb → picky/deployment.rb} +0 -0
  29. data/lib/picky/frontend_adapters/rack.rb +133 -118
  30. data/lib/picky/generators/aliases.rb +5 -3
  31. data/lib/picky/generators/base.rb +11 -7
  32. data/lib/picky/generators/partial/default.rb +7 -3
  33. data/lib/picky/generators/partial/none.rb +24 -20
  34. data/lib/picky/generators/partial/strategy.rb +20 -16
  35. data/lib/picky/generators/partial/substring.rb +94 -90
  36. data/lib/picky/generators/partial_generator.rb +11 -7
  37. data/lib/picky/generators/similarity/default.rb +9 -5
  38. data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
  39. data/lib/picky/generators/similarity/metaphone.rb +20 -16
  40. data/lib/picky/generators/similarity/none.rb +23 -19
  41. data/lib/picky/generators/similarity/phonetic.rb +49 -45
  42. data/lib/picky/generators/similarity/soundex.rb +20 -16
  43. data/lib/picky/generators/similarity/strategy.rb +10 -6
  44. data/lib/picky/generators/similarity_generator.rb +11 -7
  45. data/lib/picky/generators/strategy.rb +14 -10
  46. data/lib/picky/generators/weights/default.rb +9 -5
  47. data/lib/picky/generators/weights/logarithmic.rb +30 -26
  48. data/lib/picky/generators/weights/strategy.rb +10 -6
  49. data/lib/picky/generators/weights_generator.rb +11 -7
  50. data/lib/picky/helpers/measuring.rb +20 -16
  51. data/lib/picky/indexed/bundle/base.rb +39 -37
  52. data/lib/picky/indexed/bundle/memory.rb +68 -64
  53. data/lib/picky/indexed/bundle/redis.rb +73 -69
  54. data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
  55. data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
  56. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
  57. data/lib/picky/indexed/wrappers/category/location.rb +17 -13
  58. data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
  59. data/lib/picky/indexers/base.rb +26 -22
  60. data/lib/picky/indexers/parallel.rb +62 -58
  61. data/lib/picky/indexers/serial.rb +41 -37
  62. data/lib/picky/indexes/index.rb +400 -0
  63. data/lib/picky/indexes/index_indexed.rb +24 -0
  64. data/lib/picky/indexes/index_indexing.rb +138 -0
  65. data/lib/picky/indexes/memory.rb +20 -0
  66. data/lib/picky/indexes/redis.rb +20 -0
  67. data/lib/picky/indexes.rb +68 -61
  68. data/lib/picky/indexes_indexed.rb +16 -12
  69. data/lib/picky/indexes_indexing.rb +41 -37
  70. data/lib/picky/indexing/bundle/base.rb +216 -205
  71. data/lib/picky/indexing/bundle/memory.rb +16 -11
  72. data/lib/picky/indexing/bundle/redis.rb +14 -12
  73. data/lib/picky/indexing/wrappers/category/location.rb +17 -13
  74. data/lib/picky/interfaces/live_parameters.rb +159 -154
  75. data/lib/picky/loader.rb +267 -304
  76. data/lib/picky/loggers/search.rb +20 -13
  77. data/lib/picky/no_source_specified_exception.rb +7 -3
  78. data/lib/picky/performant.rb +6 -2
  79. data/lib/picky/query/allocation.rb +71 -67
  80. data/lib/picky/query/allocations.rb +99 -94
  81. data/lib/picky/query/combination.rb +70 -66
  82. data/lib/picky/query/combinations/base.rb +56 -52
  83. data/lib/picky/query/combinations/memory.rb +36 -32
  84. data/lib/picky/query/combinations/redis.rb +66 -62
  85. data/lib/picky/query/indexes.rb +175 -160
  86. data/lib/picky/query/qualifier_category_mapper.rb +43 -0
  87. data/lib/picky/query/token.rb +165 -172
  88. data/lib/picky/query/tokens.rb +86 -82
  89. data/lib/picky/query/weights.rb +44 -48
  90. data/lib/picky/query.rb +5 -1
  91. data/lib/picky/rack/harakiri.rb +51 -47
  92. data/lib/picky/results.rb +81 -77
  93. data/lib/picky/search.rb +169 -158
  94. data/lib/picky/sinatra.rb +34 -0
  95. data/lib/picky/sources/base.rb +73 -70
  96. data/lib/picky/sources/couch.rb +61 -57
  97. data/lib/picky/sources/csv.rb +68 -64
  98. data/lib/picky/sources/db.rb +139 -135
  99. data/lib/picky/sources/delicious.rb +52 -48
  100. data/lib/picky/sources/mongo.rb +68 -63
  101. data/lib/picky/sources/wrappers/base.rb +20 -16
  102. data/lib/picky/sources/wrappers/location.rb +37 -33
  103. data/lib/picky/statistics.rb +46 -43
  104. data/lib/picky/tasks.rb +3 -0
  105. data/lib/picky/tokenizers/base.rb +192 -187
  106. data/lib/picky/tokenizers/index.rb +25 -21
  107. data/lib/picky/tokenizers/location.rb +33 -29
  108. data/lib/picky/tokenizers/query.rb +49 -43
  109. data/lib/picky.rb +21 -13
  110. data/lib/tasks/application.rake +1 -1
  111. data/lib/tasks/index.rake +3 -3
  112. data/lib/tasks/routes.rake +1 -1
  113. data/lib/tasks/server.rake +1 -1
  114. data/spec/lib/adapters/rack/base_spec.rb +1 -1
  115. data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
  116. data/spec/lib/adapters/rack/query_spec.rb +1 -1
  117. data/spec/lib/application_spec.rb +39 -32
  118. data/spec/lib/backend/file/basic_spec.rb +2 -2
  119. data/spec/lib/backend/file/json_spec.rb +2 -2
  120. data/spec/lib/backend/file/marshal_spec.rb +2 -2
  121. data/spec/lib/backend/file/text_spec.rb +1 -1
  122. data/spec/lib/backend/files_spec.rb +14 -24
  123. data/spec/lib/backend/redis/basic_spec.rb +2 -2
  124. data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
  125. data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
  126. data/spec/lib/backend/redis_spec.rb +20 -13
  127. data/spec/lib/calculations/location_spec.rb +1 -1
  128. data/spec/lib/categories_indexed_spec.rb +16 -34
  129. data/spec/lib/category_indexed_spec.rb +9 -27
  130. data/spec/lib/category_indexing_spec.rb +2 -3
  131. data/spec/lib/category_spec.rb +10 -10
  132. data/spec/lib/character_substituters/west_european_spec.rb +6 -5
  133. data/spec/lib/cores_spec.rb +17 -17
  134. data/spec/lib/extensions/symbol_spec.rb +15 -1
  135. data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
  136. data/spec/lib/generators/aliases_spec.rb +3 -3
  137. data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
  138. data/spec/lib/generators/partial/default_spec.rb +3 -3
  139. data/spec/lib/generators/partial/none_spec.rb +2 -2
  140. data/spec/lib/generators/partial/substring_spec.rb +1 -1
  141. data/spec/lib/generators/partial_generator_spec.rb +3 -3
  142. data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
  143. data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
  144. data/spec/lib/generators/similarity/none_spec.rb +1 -1
  145. data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
  146. data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
  147. data/spec/lib/generators/similarity_generator_spec.rb +2 -2
  148. data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
  149. data/spec/lib/generators/weights_generator_spec.rb +1 -1
  150. data/spec/lib/helpers/measuring_spec.rb +2 -2
  151. data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
  152. data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
  153. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
  154. data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
  155. data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
  156. data/spec/lib/indexers/base_spec.rb +1 -1
  157. data/spec/lib/indexers/parallel_spec.rb +1 -1
  158. data/spec/lib/indexers/serial_spec.rb +1 -1
  159. data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
  160. data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
  161. data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
  162. data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
  163. data/spec/lib/indexes_class_spec.rb +2 -2
  164. data/spec/lib/indexes_indexed_spec.rb +1 -1
  165. data/spec/lib/indexes_indexing_spec.rb +1 -1
  166. data/spec/lib/indexes_spec.rb +1 -1
  167. data/spec/lib/indexing/bundle/base_spec.rb +7 -5
  168. data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
  169. data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
  170. data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
  171. data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
  172. data/spec/lib/loader_spec.rb +17 -19
  173. data/spec/lib/loggers/search_spec.rb +2 -2
  174. data/spec/lib/query/allocation_spec.rb +1 -1
  175. data/spec/lib/query/allocations_spec.rb +1 -1
  176. data/spec/lib/query/combination_spec.rb +4 -4
  177. data/spec/lib/query/combinations/base_spec.rb +1 -1
  178. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  179. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  180. data/spec/lib/query/indexes_spec.rb +7 -2
  181. data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
  182. data/spec/lib/query/token_spec.rb +32 -53
  183. data/spec/lib/query/tokens_spec.rb +30 -35
  184. data/spec/lib/query/weights_spec.rb +16 -16
  185. data/spec/lib/rack/harakiri_spec.rb +5 -5
  186. data/spec/lib/results_spec.rb +1 -1
  187. data/spec/lib/search_spec.rb +24 -22
  188. data/spec/lib/sinatra_spec.rb +36 -0
  189. data/spec/lib/sources/base_spec.rb +1 -1
  190. data/spec/lib/sources/couch_spec.rb +9 -9
  191. data/spec/lib/sources/csv_spec.rb +7 -7
  192. data/spec/lib/sources/db_spec.rb +2 -2
  193. data/spec/lib/sources/delicious_spec.rb +5 -5
  194. data/spec/lib/sources/mongo_spec.rb +7 -7
  195. data/spec/lib/sources/wrappers/base_spec.rb +2 -2
  196. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  197. data/spec/lib/statistics_spec.rb +1 -1
  198. data/spec/lib/tokenizers/base_spec.rb +2 -2
  199. data/spec/lib/tokenizers/index_spec.rb +1 -1
  200. data/spec/lib/tokenizers/query_spec.rb +1 -1
  201. metadata +30 -30
  202. data/lib/picky/adapters/rack/query.rb +0 -65
  203. data/lib/picky/index/base.rb +0 -409
  204. data/lib/picky/index/base_indexed.rb +0 -29
  205. data/lib/picky/index/base_indexing.rb +0 -127
  206. data/lib/picky/index/memory.rb +0 -16
  207. data/lib/picky/index/redis.rb +0 -16
  208. data/lib/picky/query/qualifiers.rb +0 -76
  209. data/lib/picky/query/solr.rb +0 -60
  210. data/lib/picky/signals.rb +0 -8
  211. data/lib/picky-tasks.rb +0 -6
  212. data/lib/tasks/spec.rake +0 -11
  213. data/spec/lib/query/qualifiers_spec.rb +0 -31
@@ -1,33 +1,37 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module Indexers
3
+ module Picky
4
4
 
5
- #
6
- #
7
- class Base
5
+ module Indexers
8
6
 
9
- attr_reader :index_or_category
7
+ #
8
+ #
9
+ class Base
10
10
 
11
- delegate :source, :to => :index_or_category
11
+ attr_reader :index_or_category
12
12
 
13
- def initialize index_or_category
14
- @index_or_category = index_or_category
15
- end
13
+ delegate :source, :to => :index_or_category
16
14
 
17
- # Starts the indexing process.
18
- #
19
- def index categories
20
- start_indexing_message
21
- prepare categories
22
- process categories
23
- finish_indexing_message
24
- end
15
+ def initialize index_or_category
16
+ @index_or_category = index_or_category
17
+ end
18
+
19
+ # Starts the indexing process.
20
+ #
21
+ def index categories
22
+ start_indexing_message
23
+ prepare categories
24
+ process categories
25
+ finish_indexing_message
26
+ end
27
+
28
+ # By default, an indexer
29
+ # * prepares the index directories.
30
+ #
31
+ def prepare categories
32
+ categories.each &:prepare_index_directory
33
+ end
25
34
 
26
- # By default, an indexer
27
- # * prepares the index directories.
28
- #
29
- def prepare categories
30
- categories.each &:prepare_index_directory
31
35
  end
32
36
 
33
37
  end
@@ -1,82 +1,86 @@
1
- # encoding: utf-8
1
+ # encoding: utf-8
2
2
  #
3
- module Indexers
3
+ module Picky
4
4
 
5
- # Uses a number of categories, a source, and a tokenizer to index data.
6
- #
7
- # The tokenizer is taken from each category if specified, from the index, if not.
8
- #
9
- class Parallel < Base
5
+ module Indexers
10
6
 
11
- # Process does the actual indexing.
7
+ # Uses a number of categories, a source, and a tokenizer to index data.
12
8
  #
13
- # Parameters:
14
- # * categories: An Enumerable of Category-s.
9
+ # The tokenizer is taken from each category if specified, from the index, if not.
15
10
  #
16
- def process categories
17
- comma = ?,
18
- newline = ?\n
11
+ class Parallel < Base
19
12
 
20
- # Prepare a combined object - array.
13
+ # Process does the actual indexing.
21
14
  #
22
- combined = categories.map do |category|
23
- [category, [], category.prepared_index_file, (category.tokenizer || tokenizer)]
24
- end
25
-
26
- # Index.
15
+ # Parameters:
16
+ # * categories: An Enumerable of Category-s.
27
17
  #
28
- # TODO Extract into flush_every(100_000) do
29
- #
30
- i = 0
18
+ def process categories
19
+ comma = ?,
20
+ newline = ?\n
31
21
 
32
- # Explicitly reset the source to avoid caching trouble.
33
- #
34
- source.reset if source.respond_to?(:reset)
22
+ # Prepare a combined object - array.
23
+ #
24
+ combined = categories.map do |category|
25
+ [category, [], category.prepared_index_file, (category.tokenizer || tokenizer)]
26
+ end
35
27
 
36
- # Go through each object in the source.
37
- #
38
- source.each do |object|
39
- id = object.id
28
+ # Index.
29
+ #
30
+ # TODO Extract into flush_every(100_000) do
31
+ #
32
+ i = 0
40
33
 
41
- # This needs to be rewritten.
34
+ # Explicitly reset the source to avoid caching trouble.
42
35
  #
43
- # Is it a good idea that not the tokenizer has control over when he gets the next text?
36
+ source.reset if source.respond_to?(:reset)
37
+
38
+ # Go through each object in the source.
44
39
  #
45
- combined.each do |category, cache, _, tokenizer|
46
- tokenizer.tokenize(object.send(category.from).to_s).each do |token_text|
47
- next unless token_text
48
- cache << id << comma << token_text << newline
40
+ source.each do |object|
41
+ id = object.id
42
+
43
+ # This needs to be rewritten.
44
+ #
45
+ # Is it a good idea that not the tokenizer has control over when he gets the next text?
46
+ #
47
+ combined.each do |category, cache, _, tokenizer|
48
+ tokenizer.tokenize(object.send(category.from).to_s).each do |token_text|
49
+ next unless token_text
50
+ cache << id << comma << token_text << newline
51
+ end
49
52
  end
50
- end
51
53
 
52
- if i >= 100_000
53
- flush combined
54
- i = 0
54
+ if i >= 100_000
55
+ flush combined
56
+ i = 0
57
+ end
58
+ i += 1
59
+ end
60
+ flush combined
61
+ combined.each do |_, _, file, _|
62
+ timed_exclaim %Q{"#{@index_or_category.identifier}": => #{file.path}.}
63
+ file.close
55
64
  end
56
- i += 1
57
65
  end
58
- flush combined
59
- combined.each do |_, _, file, _|
60
- timed_exclaim %Q{"#{@index_or_category.identifier}": => #{file.path}.}
61
- file.close
66
+
67
+ # Flush the combined array into the file.
68
+ #
69
+ def flush combined # :nodoc:
70
+ combined.each do |_, cache, file, _|
71
+ file.write(cache.join) && cache.clear
72
+ end
62
73
  end
63
- end
64
74
 
65
- # Flush the combined array into the file.
66
- #
67
- def flush combined # :nodoc:
68
- combined.each do |_, cache, file, _|
69
- file.write(cache.join) && cache.clear
75
+ #
76
+ #
77
+ def start_indexing_message # :nodoc:
78
+ timed_exclaim %Q{"#{@index_or_category.identifier}": Starting parallel data preparation.}
79
+ end
80
+ def finish_indexing_message # :nodoc:
81
+ timed_exclaim %Q{"#{@index_or_category.identifier}": Finished parallel data preparation.}
70
82
  end
71
- end
72
83
 
73
- #
74
- #
75
- def start_indexing_message # :nodoc:
76
- timed_exclaim %Q{"#{@index_or_category.identifier}": Starting parallel data preparation.}
77
- end
78
- def finish_indexing_message # :nodoc:
79
- timed_exclaim %Q{"#{@index_or_category.identifier}": Finished parallel data preparation.}
80
84
  end
81
85
 
82
86
  end
@@ -1,55 +1,59 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module Indexers
3
+ module Picky
4
4
 
5
- # Uses a category to index its data.
6
- #
7
- # Note: It is called serial since it indexes each category separately.
8
- #
9
- class Serial < Base
5
+ module Indexers
10
6
 
11
- # Harvest the data from the source, tokenize,
12
- # and write to an intermediate "prepared index" file.
7
+ # Uses a category to index its data.
13
8
  #
14
- # Parameters:
15
- # * categories: An enumerable of Category-s.
9
+ # Note: It is called serial since it indexes each category separately.
16
10
  #
17
- def process categories
18
- comma = ?,
19
- newline = ?\n
20
-
21
- categories.each do |category|
22
-
23
- tokenizer = category.tokenizer
11
+ class Serial < Base
12
+
13
+ # Harvest the data from the source, tokenize,
14
+ # and write to an intermediate "prepared index" file.
15
+ #
16
+ # Parameters:
17
+ # * categories: An enumerable of Category-s.
18
+ #
19
+ def process categories
20
+ comma = ?,
21
+ newline = ?\n
22
+
23
+ categories.each do |category|
24
+
25
+ tokenizer = category.tokenizer
26
+
27
+ category.prepared_index_file do |file|
28
+ result = []
29
+
30
+ source.harvest(category) do |indexed_id, text|
31
+ tokenizer.tokenize(text).each do |token_text|
32
+ next unless token_text
33
+ result << indexed_id << comma << token_text << newline
34
+ end
35
+ file.write(result.join) && result.clear if result.size > 100_000
36
+ end
24
37
 
25
- category.prepared_index_file do |file|
26
- result = []
38
+ timed_exclaim %Q{"#{@index_or_category.identifier}": => #{file.path}.}
27
39
 
28
- source.harvest(category) do |indexed_id, text|
29
- tokenizer.tokenize(text).each do |token_text|
30
- next unless token_text
31
- result << indexed_id << comma << token_text << newline
32
- end
33
- file.write(result.join) && result.clear if result.size > 100_000
40
+ file.write result.join
34
41
  end
35
42
 
36
- timed_exclaim %Q{"#{@index_or_category.identifier}": => #{file.path}.}
37
-
38
- file.write result.join
39
43
  end
40
44
 
41
45
  end
42
46
 
43
- end
47
+ #
48
+ #
49
+ def start_indexing_message # :nodoc:
50
+ timed_exclaim %Q{"#{@index_or_category.identifier}": Starting serial data preparation.}
51
+ end
52
+ def finish_indexing_message # :nodoc:
53
+ timed_exclaim %Q{"#{@index_or_category.identifier}": Finished serial data preparation.}
54
+ end
44
55
 
45
- #
46
- #
47
- def start_indexing_message # :nodoc:
48
- timed_exclaim %Q{"#{@index_or_category.identifier}": Starting serial data preparation.}
49
56
  end
50
- def finish_indexing_message # :nodoc:
51
- timed_exclaim %Q{"#{@index_or_category.identifier}": Finished serial data preparation.}
52
- end
53
-
54
57
  end
58
+
55
59
  end