picky 2.7.0 → 3.0.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. data/lib/picky/adapters/rack/base.rb +20 -16
  2. data/lib/picky/adapters/rack/live_parameters.rb +28 -24
  3. data/lib/picky/adapters/rack/search.rb +67 -0
  4. data/lib/picky/adapters/rack.rb +27 -23
  5. data/lib/picky/application.rb +246 -236
  6. data/lib/picky/backend/base.rb +115 -119
  7. data/lib/picky/backend/file/basic.rb +102 -98
  8. data/lib/picky/backend/file/json.rb +27 -23
  9. data/lib/picky/backend/file/marshal.rb +32 -28
  10. data/lib/picky/backend/file/text.rb +45 -41
  11. data/lib/picky/backend/files.rb +19 -15
  12. data/lib/picky/backend/redis/basic.rb +76 -72
  13. data/lib/picky/backend/redis/list_hash.rb +40 -36
  14. data/lib/picky/backend/redis/string_hash.rb +30 -26
  15. data/lib/picky/backend/redis.rb +32 -28
  16. data/lib/picky/bundle.rb +82 -57
  17. data/lib/{bundling.rb → picky/bundling.rb} +0 -0
  18. data/lib/picky/calculations/location.rb +51 -47
  19. data/lib/picky/categories.rb +60 -56
  20. data/lib/picky/categories_indexed.rb +73 -82
  21. data/lib/picky/categories_indexing.rb +12 -8
  22. data/lib/picky/category.rb +109 -120
  23. data/lib/picky/category_indexed.rb +39 -41
  24. data/lib/picky/category_indexing.rb +123 -125
  25. data/lib/picky/character_substituters/west_european.rb +32 -26
  26. data/lib/{constants.rb → picky/constants.rb} +0 -0
  27. data/lib/picky/cores.rb +96 -92
  28. data/lib/{deployment.rb → picky/deployment.rb} +0 -0
  29. data/lib/picky/frontend_adapters/rack.rb +133 -118
  30. data/lib/picky/generators/aliases.rb +5 -3
  31. data/lib/picky/generators/base.rb +11 -7
  32. data/lib/picky/generators/partial/default.rb +7 -3
  33. data/lib/picky/generators/partial/none.rb +24 -20
  34. data/lib/picky/generators/partial/strategy.rb +20 -16
  35. data/lib/picky/generators/partial/substring.rb +94 -90
  36. data/lib/picky/generators/partial_generator.rb +11 -7
  37. data/lib/picky/generators/similarity/default.rb +9 -5
  38. data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
  39. data/lib/picky/generators/similarity/metaphone.rb +20 -16
  40. data/lib/picky/generators/similarity/none.rb +23 -19
  41. data/lib/picky/generators/similarity/phonetic.rb +49 -45
  42. data/lib/picky/generators/similarity/soundex.rb +20 -16
  43. data/lib/picky/generators/similarity/strategy.rb +10 -6
  44. data/lib/picky/generators/similarity_generator.rb +11 -7
  45. data/lib/picky/generators/strategy.rb +14 -10
  46. data/lib/picky/generators/weights/default.rb +9 -5
  47. data/lib/picky/generators/weights/logarithmic.rb +30 -26
  48. data/lib/picky/generators/weights/strategy.rb +10 -6
  49. data/lib/picky/generators/weights_generator.rb +11 -7
  50. data/lib/picky/helpers/measuring.rb +20 -16
  51. data/lib/picky/indexed/bundle/base.rb +39 -37
  52. data/lib/picky/indexed/bundle/memory.rb +68 -64
  53. data/lib/picky/indexed/bundle/redis.rb +73 -69
  54. data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
  55. data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
  56. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
  57. data/lib/picky/indexed/wrappers/category/location.rb +17 -13
  58. data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
  59. data/lib/picky/indexers/base.rb +26 -22
  60. data/lib/picky/indexers/parallel.rb +62 -58
  61. data/lib/picky/indexers/serial.rb +41 -37
  62. data/lib/picky/indexes/index.rb +400 -0
  63. data/lib/picky/indexes/index_indexed.rb +24 -0
  64. data/lib/picky/indexes/index_indexing.rb +138 -0
  65. data/lib/picky/indexes/memory.rb +20 -0
  66. data/lib/picky/indexes/redis.rb +20 -0
  67. data/lib/picky/indexes.rb +68 -61
  68. data/lib/picky/indexes_indexed.rb +16 -12
  69. data/lib/picky/indexes_indexing.rb +41 -37
  70. data/lib/picky/indexing/bundle/base.rb +216 -205
  71. data/lib/picky/indexing/bundle/memory.rb +16 -11
  72. data/lib/picky/indexing/bundle/redis.rb +14 -12
  73. data/lib/picky/indexing/wrappers/category/location.rb +17 -13
  74. data/lib/picky/interfaces/live_parameters.rb +159 -154
  75. data/lib/picky/loader.rb +267 -304
  76. data/lib/picky/loggers/search.rb +20 -13
  77. data/lib/picky/no_source_specified_exception.rb +7 -3
  78. data/lib/picky/performant.rb +6 -2
  79. data/lib/picky/query/allocation.rb +71 -67
  80. data/lib/picky/query/allocations.rb +99 -94
  81. data/lib/picky/query/combination.rb +70 -66
  82. data/lib/picky/query/combinations/base.rb +56 -52
  83. data/lib/picky/query/combinations/memory.rb +36 -32
  84. data/lib/picky/query/combinations/redis.rb +66 -62
  85. data/lib/picky/query/indexes.rb +175 -160
  86. data/lib/picky/query/qualifier_category_mapper.rb +43 -0
  87. data/lib/picky/query/token.rb +165 -172
  88. data/lib/picky/query/tokens.rb +86 -82
  89. data/lib/picky/query/weights.rb +44 -48
  90. data/lib/picky/query.rb +5 -1
  91. data/lib/picky/rack/harakiri.rb +51 -47
  92. data/lib/picky/results.rb +81 -77
  93. data/lib/picky/search.rb +169 -158
  94. data/lib/picky/sinatra.rb +34 -0
  95. data/lib/picky/sources/base.rb +73 -70
  96. data/lib/picky/sources/couch.rb +61 -57
  97. data/lib/picky/sources/csv.rb +68 -64
  98. data/lib/picky/sources/db.rb +139 -135
  99. data/lib/picky/sources/delicious.rb +52 -48
  100. data/lib/picky/sources/mongo.rb +68 -63
  101. data/lib/picky/sources/wrappers/base.rb +20 -16
  102. data/lib/picky/sources/wrappers/location.rb +37 -33
  103. data/lib/picky/statistics.rb +46 -43
  104. data/lib/picky/tasks.rb +3 -0
  105. data/lib/picky/tokenizers/base.rb +192 -187
  106. data/lib/picky/tokenizers/index.rb +25 -21
  107. data/lib/picky/tokenizers/location.rb +33 -29
  108. data/lib/picky/tokenizers/query.rb +49 -43
  109. data/lib/picky.rb +21 -13
  110. data/lib/tasks/application.rake +1 -1
  111. data/lib/tasks/index.rake +3 -3
  112. data/lib/tasks/routes.rake +1 -1
  113. data/lib/tasks/server.rake +1 -1
  114. data/spec/lib/adapters/rack/base_spec.rb +1 -1
  115. data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
  116. data/spec/lib/adapters/rack/query_spec.rb +1 -1
  117. data/spec/lib/application_spec.rb +39 -32
  118. data/spec/lib/backend/file/basic_spec.rb +2 -2
  119. data/spec/lib/backend/file/json_spec.rb +2 -2
  120. data/spec/lib/backend/file/marshal_spec.rb +2 -2
  121. data/spec/lib/backend/file/text_spec.rb +1 -1
  122. data/spec/lib/backend/files_spec.rb +14 -24
  123. data/spec/lib/backend/redis/basic_spec.rb +2 -2
  124. data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
  125. data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
  126. data/spec/lib/backend/redis_spec.rb +20 -13
  127. data/spec/lib/calculations/location_spec.rb +1 -1
  128. data/spec/lib/categories_indexed_spec.rb +16 -34
  129. data/spec/lib/category_indexed_spec.rb +9 -27
  130. data/spec/lib/category_indexing_spec.rb +2 -3
  131. data/spec/lib/category_spec.rb +10 -10
  132. data/spec/lib/character_substituters/west_european_spec.rb +6 -5
  133. data/spec/lib/cores_spec.rb +17 -17
  134. data/spec/lib/extensions/symbol_spec.rb +15 -1
  135. data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
  136. data/spec/lib/generators/aliases_spec.rb +3 -3
  137. data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
  138. data/spec/lib/generators/partial/default_spec.rb +3 -3
  139. data/spec/lib/generators/partial/none_spec.rb +2 -2
  140. data/spec/lib/generators/partial/substring_spec.rb +1 -1
  141. data/spec/lib/generators/partial_generator_spec.rb +3 -3
  142. data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
  143. data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
  144. data/spec/lib/generators/similarity/none_spec.rb +1 -1
  145. data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
  146. data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
  147. data/spec/lib/generators/similarity_generator_spec.rb +2 -2
  148. data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
  149. data/spec/lib/generators/weights_generator_spec.rb +1 -1
  150. data/spec/lib/helpers/measuring_spec.rb +2 -2
  151. data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
  152. data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
  153. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
  154. data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
  155. data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
  156. data/spec/lib/indexers/base_spec.rb +1 -1
  157. data/spec/lib/indexers/parallel_spec.rb +1 -1
  158. data/spec/lib/indexers/serial_spec.rb +1 -1
  159. data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
  160. data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
  161. data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
  162. data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
  163. data/spec/lib/indexes_class_spec.rb +2 -2
  164. data/spec/lib/indexes_indexed_spec.rb +1 -1
  165. data/spec/lib/indexes_indexing_spec.rb +1 -1
  166. data/spec/lib/indexes_spec.rb +1 -1
  167. data/spec/lib/indexing/bundle/base_spec.rb +7 -5
  168. data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
  169. data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
  170. data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
  171. data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
  172. data/spec/lib/loader_spec.rb +17 -19
  173. data/spec/lib/loggers/search_spec.rb +2 -2
  174. data/spec/lib/query/allocation_spec.rb +1 -1
  175. data/spec/lib/query/allocations_spec.rb +1 -1
  176. data/spec/lib/query/combination_spec.rb +4 -4
  177. data/spec/lib/query/combinations/base_spec.rb +1 -1
  178. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  179. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  180. data/spec/lib/query/indexes_spec.rb +7 -2
  181. data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
  182. data/spec/lib/query/token_spec.rb +32 -53
  183. data/spec/lib/query/tokens_spec.rb +30 -35
  184. data/spec/lib/query/weights_spec.rb +16 -16
  185. data/spec/lib/rack/harakiri_spec.rb +5 -5
  186. data/spec/lib/results_spec.rb +1 -1
  187. data/spec/lib/search_spec.rb +24 -22
  188. data/spec/lib/sinatra_spec.rb +36 -0
  189. data/spec/lib/sources/base_spec.rb +1 -1
  190. data/spec/lib/sources/couch_spec.rb +9 -9
  191. data/spec/lib/sources/csv_spec.rb +7 -7
  192. data/spec/lib/sources/db_spec.rb +2 -2
  193. data/spec/lib/sources/delicious_spec.rb +5 -5
  194. data/spec/lib/sources/mongo_spec.rb +7 -7
  195. data/spec/lib/sources/wrappers/base_spec.rb +2 -2
  196. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  197. data/spec/lib/statistics_spec.rb +1 -1
  198. data/spec/lib/tokenizers/base_spec.rb +2 -2
  199. data/spec/lib/tokenizers/index_spec.rb +1 -1
  200. data/spec/lib/tokenizers/query_spec.rb +1 -1
  201. metadata +30 -30
  202. data/lib/picky/adapters/rack/query.rb +0 -65
  203. data/lib/picky/index/base.rb +0 -409
  204. data/lib/picky/index/base_indexed.rb +0 -29
  205. data/lib/picky/index/base_indexing.rb +0 -127
  206. data/lib/picky/index/memory.rb +0 -16
  207. data/lib/picky/index/redis.rb +0 -16
  208. data/lib/picky/query/qualifiers.rb +0 -76
  209. data/lib/picky/query/solr.rb +0 -60
  210. data/lib/picky/signals.rb +0 -8
  211. data/lib/picky-tasks.rb +0 -6
  212. data/lib/tasks/spec.rake +0 -11
  213. data/spec/lib/query/qualifiers_spec.rb +0 -31
@@ -1,63 +1,67 @@
1
- class Categories
2
-
3
- attr_reader :categories, :category_hash
4
-
5
- delegate :each,
6
- :first,
7
- :map,
8
- :to => :categories
9
-
10
- each_delegate :reindex,
11
- :each_category,
12
- :to => :categories
13
-
14
- # A list of indexed categories.
15
- #
16
- # Options:
17
- # * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
18
- # The default behaviour is that if a token does not match to
19
- # any category, the query will not return anything (since a
20
- # single token cannot be matched). If you set this option to
21
- # true, any token that cannot be matched to a category will be
22
- # simply ignored.
23
- # Use this if only a few matched words are important, like for
24
- # example of the query "Jonathan Myers 86455 Las Cucarachas"
25
- # you only want to match the zipcode, to have the search engine
26
- # display advertisements on the side for the zipcode.
27
- # Nifty! :)
28
- #
29
- def initialize options = {}
30
- clear_categories
31
-
32
- @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
33
- end
1
+ module Picky
34
2
 
35
- # Clears both the array of categories and the hash of categories.
36
- #
37
- def clear_categories
38
- @categories = []
39
- @category_hash = {}
40
- end
3
+ class Categories
41
4
 
42
- # Add the given category to the list of categories.
43
- #
44
- def << category
45
- categories << category
46
- category_hash[category.name] = category
47
- end
5
+ attr_reader :categories, :category_hash
48
6
 
49
- # Find a given category in the categories.
50
- #
51
- def [] category_name
52
- category_name = category_name.to_sym
53
- category_hash[category_name] || raise_not_found(category_name)
54
- end
55
- def raise_not_found category_name
56
- raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
57
- end
7
+ delegate :each,
8
+ :first,
9
+ :map,
10
+ :to => :categories
11
+
12
+ each_delegate :reindex,
13
+ :each_category,
14
+ :to => :categories
15
+
16
+ # A list of indexed categories.
17
+ #
18
+ # Options:
19
+ # * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
20
+ # The default behaviour is that if a token does not match to
21
+ # any category, the query will not return anything (since a
22
+ # single token cannot be matched). If you set this option to
23
+ # true, any token that cannot be matched to a category will be
24
+ # simply ignored.
25
+ # Use this if only a few matched words are important, like for
26
+ # example of the query "Jonathan Myers 86455 Las Cucarachas"
27
+ # you only want to match the zipcode, to have the search engine
28
+ # display advertisements on the side for the zipcode.
29
+ # Nifty! :)
30
+ #
31
+ def initialize options = {}
32
+ clear_categories
33
+
34
+ @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
35
+ end
36
+
37
+ # Clears both the array of categories and the hash of categories.
38
+ #
39
+ def clear_categories
40
+ @categories = []
41
+ @category_hash = {}
42
+ end
43
+
44
+ # Add the given category to the list of categories.
45
+ #
46
+ def << category
47
+ categories << category
48
+ category_hash[category.name] = category
49
+ end
50
+
51
+ # Find a given category in the categories.
52
+ #
53
+ def [] category_name
54
+ category_name = category_name.to_sym
55
+ category_hash[category_name] || raise_not_found(category_name)
56
+ end
57
+ def raise_not_found category_name
58
+ raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
59
+ end
60
+
61
+ def to_s
62
+ categories.join(', ')
63
+ end
58
64
 
59
- def to_s
60
- categories.join(', ')
61
65
  end
62
66
 
63
67
  end
@@ -1,98 +1,89 @@
1
- class Categories
1
+ module Picky
2
2
 
3
- attr_reader :ignore_unassigned_tokens
3
+ class Categories
4
4
 
5
- each_delegate :load_from_cache,
6
- :analyze,
7
- :to => :categories
5
+ attr_reader :ignore_unassigned_tokens
8
6
 
9
- # Return all possible combinations for the given token.
10
- #
11
- # This checks if it needs to also search through similar
12
- # tokens, if for example, the token is one with ~.
13
- # If yes, it puts together all solutions.
14
- #
15
- def possible_combinations_for token
16
- token.similar? ? similar_possible_for(token) : possible_for(token)
17
- end
7
+ each_delegate :load_from_cache,
8
+ :analyze,
9
+ :to => :categories
18
10
 
19
- # Gets all similar tokens and puts together the possible combinations
20
- # for each found similar token.
21
- #
22
- def similar_possible_for token
23
- tokens = similar_tokens_for token
24
- inject_possible_for tokens
25
- end
11
+ # Return all possible combinations for the given token.
12
+ #
13
+ # This checks if it needs to also search through similar
14
+ # tokens, if for example, the token is one with ~.
15
+ # If yes, it puts together all solutions.
16
+ #
17
+ def possible_combinations token
18
+ token.similar? ? similar_possible_for(token) : possible_for(token)
19
+ end
26
20
 
27
- # Returns all possible similar tokens for the given token.
28
- #
29
- def similar_tokens_for token
30
- text = token.text
31
- categories.inject([]) do |result, category|
32
- next_token = token
33
- # Note: We could also break off here if not all the available
34
- # similars are needed.
35
- # Wait for a concrete case that needs this before taking
36
- # action.
37
- #
38
- while next_token = next_token.next_similar_token(category)
39
- result << next_token if next_token && next_token.text != text
40
- end
41
- result
21
+ # Gets all similar tokens and puts together the possible combinations
22
+ # for each found similar token.
23
+ #
24
+ def similar_possible_for token
25
+ tokens = similar_tokens_for token
26
+ inject_possible_for tokens
42
27
  end
43
- end
44
28
 
45
- #
46
- #
47
- def inject_possible_for tokens
48
- tokens.inject([]) do |result, token|
49
- possible = possible_categories token
50
- result + possible_for(token, possible)
29
+ # Returns all possible similar tokens for the given token.
30
+ #
31
+ def similar_tokens_for token
32
+ text = token.text
33
+ categories.inject([]) do |result, category|
34
+ next_token = token
35
+ # Note: We could also break off here if not all the available
36
+ # similars are needed.
37
+ # Wait for a concrete case that needs this before taking
38
+ # action.
39
+ #
40
+ while next_token = next_token.next_similar_token(category)
41
+ result << next_token if next_token && next_token.text != text
42
+ end
43
+ result
44
+ end
51
45
  end
52
- end
53
46
 
54
- # Returns possible Combinations for the token.
55
- #
56
- # Note: The preselected_categories param is an optimization.
57
- #
58
- # Note: Returns [] if no categories matched (will produce no result).
59
- # Returns nil if this token needs to be removed from the query.
60
- # (Also none of the categories matched, but the ignore unassigned
61
- # tokens option is true)
62
- #
63
- def possible_for token, preselected_categories = nil
64
- possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
65
- combination = category.combination_for token
66
- combination ? combinations << combination : combinations
47
+ #
48
+ #
49
+ def inject_possible_for tokens
50
+ tokens.inject([]) do |result, token|
51
+ possible = possible_categories token
52
+ result + possible_for(token, possible)
53
+ end
67
54
  end
68
- # This is an optimization to mark tokens that are ignored.
55
+
56
+ # Returns possible Combinations for the token.
69
57
  #
70
- return if ignore_unassigned_tokens && possible.empty?
71
- possible
72
- end
58
+ # Note: The preselected_categories param is an optimization.
59
+ #
60
+ # Note: Returns [] if no categories matched (will produce no result).
61
+ # Returns nil if this token needs to be removed from the query.
62
+ # (Also none of the categories matched, but the ignore unassigned
63
+ # tokens option is true)
64
+ #
65
+ def possible_for token, preselected_categories = nil
66
+ possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
67
+ combination = category.combination_for token
68
+ combination ? combinations << combination : combinations
69
+ end
70
+ # This is an optimization to mark tokens that are ignored.
71
+ #
72
+ return if ignore_unassigned_tokens && possible.empty?
73
+ possible
74
+ end
73
75
 
74
- # This returns the possible categories for this token.
75
- # If the user has already preselected a category for this token,
76
- # like "artist:moby", if not just return all for the given token,
77
- # since all are possible.
78
- #
79
- # Note: Once I thought this was called too often. But it is not (18.01.2011).
80
- #
81
- def possible_categories token
82
- user_defined_categories(token) || categories
83
- end
76
+ # This returns the possible categories for this token.
77
+ # If the user has already preselected a category for this token,
78
+ # like "artist:moby", if not just return all for the given token,
79
+ # since all are possible.
80
+ #
81
+ # Note: Once I thought this was called too often. But it is not (18.01.2011).
82
+ #
83
+ def possible_categories token
84
+ token.user_defined_categories || categories
85
+ end
84
86
 
85
- # This returns the array of categories if the user has defined
86
- # an existing category.
87
- #
88
- # Note: Returns nil if the user did not define one
89
- # or [] if he/she has defined a non-existing one.
90
- #
91
- def user_defined_categories token
92
- names = token.user_defined_category_names
93
- names && names.map do |name|
94
- category_hash[name]
95
- end.compact
96
87
  end
97
88
 
98
89
  end
@@ -1,10 +1,14 @@
1
- class Categories
2
-
3
- each_delegate :cache,
4
- :check,
5
- :clear,
6
- :backup,
7
- :restore,
8
- :to => :categories
1
+ module Picky
2
+
3
+ class Categories
4
+
5
+ each_delegate :cache,
6
+ :check,
7
+ :clear,
8
+ :backup,
9
+ :restore,
10
+ :to => :categories
11
+
12
+ end
9
13
 
10
14
  end
@@ -1,139 +1,128 @@
1
- class Category
1
+ module Picky
2
2
 
3
- attr_reader :name
3
+ class Category
4
4
 
5
- # Mandatory params:
6
- # * name: Category name to use as identifier and file names.
7
- # * index: Index to which this category is attached to.
8
- #
9
- # Options:
10
- # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
11
- # * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
12
- # * from: The source category identifier to take the data from.
13
- #
14
- # Advanced Options:
15
- # * source: Use if the category should use a different source.
16
- # * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
17
- # * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
18
- # * key_format: What this category's keys are formatted with (default is :to_i)
19
- #
20
- def initialize name, index, options = {}
21
- @name = name
22
- @index = index
5
+ attr_reader :name
23
6
 
24
- # Indexing.
7
+ # Mandatory params:
8
+ # * name: Category name to use as identifier and file names.
9
+ # * index: Index to which this category is attached to.
25
10
  #
26
- @source = options[:source]
27
- @from = options[:from]
28
- @tokenizer = options[:tokenizer]
29
- @key_format = options[:key_format]
30
-
31
- # TODO Push into Bundle. At least the weights.
11
+ # Options:
12
+ # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
13
+ # * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
14
+ # * from: The source category identifier to take the data from.
32
15
  #
33
- weights = options[:weights] || Generators::Weights::Default
34
- partial = options[:partial] || Generators::Partial::Default
35
- similarity = options[:similarity] || Generators::Similarity::Default
36
-
37
- @indexing_exact = index.indexing_bundle_class.new(:exact, self, weights, Generators::Partial::None.new, similarity)
38
- @indexing_partial = index.indexing_bundle_class.new(:partial, self, weights, partial, Generators::Similarity::None.new)
39
-
40
- # Indexed.
16
+ # Advanced Options:
17
+ # * source: Use if the category should use a different source.
18
+ # * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
19
+ # * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
20
+ # * key_format: What this category's keys are formatted with (default is :to_i)
41
21
  #
42
- # TODO Push the defaults out into the index.
22
+ def initialize name, index, options = {}
23
+ @name = name
24
+ @index = index
25
+
26
+ # Indexing.
27
+ #
28
+ @source = options[:source]
29
+ @from = options[:from]
30
+ @tokenizer = options[:tokenizer]
31
+ @key_format = options[:key_format]
32
+ @qualifiers = extract_qualifiers_from options
33
+
34
+ weights = options[:weights] || Generators::Weights::Default
35
+ partial = options[:partial] || Generators::Partial::Default
36
+ similarity = options[:similarity] || Generators::Similarity::Default
37
+
38
+ @indexing_exact = index.indexing_bundle_class.new :exact, self, weights, Generators::Partial::None.new, similarity, options
39
+ @indexing_partial = index.indexing_bundle_class.new :partial, self, weights, partial, Generators::Similarity::None.new, options
40
+
41
+ # Indexed.
42
+ #
43
+ @indexed_exact = index.indexed_bundle_class.new :exact, self, similarity
44
+ if partial.use_exact_for_partial?
45
+ @indexed_partial = @indexed_exact
46
+ else
47
+ @indexed_partial = index.indexed_bundle_class.new :partial, self, similarity
48
+ end
49
+
50
+ # @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
51
+ # @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
52
+ end
53
+
54
+ # Indexes and reloads the category.
43
55
  #
44
- @partial_strategy = partial # TODO Duplicate work.
45
-
46
- @indexed_exact = index.indexed_bundle_class.new :exact, self, similarity
47
- @indexed_partial = index.indexed_bundle_class.new :partial, self, similarity
48
-
49
- # @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
50
- # @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
56
+ def reindex
57
+ index
58
+ reload
59
+ end
51
60
 
52
- # TODO Extract? Yes.
61
+ # Index name.
53
62
  #
54
- Query::Qualifiers.add(name, generate_qualifiers_from(options) || [name])
55
- end
56
-
57
- # TODO Move to Index.
58
- #
59
- def generate_qualifiers_from options
60
- options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
61
- end
63
+ def index_name
64
+ @index.name
65
+ end
62
66
 
63
- # Indexes and reloads the category.
64
- #
65
- def reindex
66
- index
67
- reload
68
- end
69
-
70
- # Category name.
71
- #
72
- def category_name
73
- name
74
- end
75
-
76
- # Index name.
77
- #
78
- def index_name
79
- @index.name
80
- end
67
+ # Returns the qualifiers if set or
68
+ # just the name if not.
69
+ #
70
+ def qualifiers
71
+ @qualifiers || [name]
72
+ end
73
+ # Extract qualifiers from the options.
74
+ #
75
+ def extract_qualifiers_from options
76
+ options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
77
+ end
81
78
 
82
- # The category itself just yields itself.
83
- #
84
- def each_category
85
- yield self
86
- end
79
+ # The category itself just yields itself.
80
+ #
81
+ def each_category
82
+ yield self
83
+ end
87
84
 
88
- # Path and partial filename of the prepared index on this category.
89
- #
90
- def prepared_index_path
91
- @prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
92
- end
93
- # Get an opened index file.
94
- #
95
- # Note: If you don't use it with the block, do not forget to close it.
96
- #
97
- def prepared_index_file &block
98
- @prepared_index_file ||= Backend::File::Text.new prepared_index_path
99
- @prepared_index_file.open &block
100
- end
101
- # Creates the index directory including all necessary paths above it.
102
- #
103
- # Note: Interface method called by any indexers.
104
- #
105
- def prepare_index_directory
106
- FileUtils.mkdir_p index_directory
107
- end
85
+ # Path and partial filename of the prepared index on this category.
86
+ #
87
+ def prepared_index_path
88
+ @prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
89
+ end
90
+ # Get an opened index file.
91
+ #
92
+ # Note: If you don't use it with the block, do not forget to close it.
93
+ #
94
+ def prepared_index_file &block
95
+ @prepared_index_file ||= Backend::File::Text.new prepared_index_path
96
+ @prepared_index_file.open &block
97
+ end
98
+ # Creates the index directory including all necessary paths above it.
99
+ #
100
+ # Note: Interface method called by any indexers.
101
+ #
102
+ def prepare_index_directory
103
+ FileUtils.mkdir_p index_directory
104
+ end
108
105
 
109
- # The index directory for this category.
110
- #
111
- def index_directory
112
- @index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{@index.name}"
113
- end
106
+ # The index directory for this category.
107
+ #
108
+ # TODO Push down into files?
109
+ #
110
+ def index_directory
111
+ @index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{@index.name}"
112
+ end
114
113
 
115
- # Path and partial filename of a specific subindex on this category.
116
- #
117
- # Subindexes are:
118
- # * inverted index
119
- # * weights index
120
- # * partial index
121
- # * similarity index
122
- #
123
- def index_path bundle_name, type
124
- "#{index_directory}/#{name}_#{bundle_name}_#{type}"
125
- end
114
+ # Identifier for technical output.
115
+ #
116
+ def identifier
117
+ "#{@index.identifier}:#{name}"
118
+ end
126
119
 
127
- # Identifier for technical output.
128
- #
129
- def identifier
130
- @identifier ||= "#{PICKY_ENVIRONMENT}:#{index_name}:#{name}"
131
- end
120
+ #
121
+ #
122
+ def to_s
123
+ "#{self.class}(#{identifier})"
124
+ end
132
125
 
133
- #
134
- #
135
- def to_s
136
- "Category(#{name})"
137
126
  end
138
127
 
139
128
  end
@@ -1,48 +1,46 @@
1
- #
2
- #
3
- class Category
1
+ module Picky
4
2
 
5
- attr_reader :indexed_exact
6
-
7
- # Loads the index from cache.
8
- #
9
- def load_from_cache
10
- timed_exclaim %Q{"#{identifier}": Loading index from cache.}
11
- indexed_exact.load
12
- indexed_partial.load
13
- end
14
- alias reload load_from_cache
15
-
16
- # Gets the weight for this token's text.
17
- #
18
- def weight token
19
- bundle_for(token).weight token.text
20
- end
21
-
22
- # Gets the ids for this token's text.
23
3
  #
24
- def ids token
25
- bundle_for(token).ids token.text
26
- end
27
-
28
- # Returns the right index bundle for this token.
29
4
  #
30
- def bundle_for token
31
- token.partial? ? indexed_partial : indexed_exact
32
- end
5
+ class Category
6
+
7
+ attr_reader :indexed_exact,
8
+ :indexed_partial
9
+
10
+ # Loads the index from cache.
11
+ #
12
+ def load_from_cache
13
+ timed_exclaim %Q{"#{identifier}": Loading index from cache.}
14
+ indexed_exact.load
15
+ indexed_partial.load
16
+ end
17
+ alias reload load_from_cache
18
+
19
+ # Gets the weight for this token's text.
20
+ #
21
+ def weight token
22
+ bundle_for(token).weight token.text
23
+ end
24
+
25
+ # Gets the ids for this token's text.
26
+ #
27
+ def ids token
28
+ bundle_for(token).ids token.text
29
+ end
30
+
31
+ # Returns the right index bundle for this token.
32
+ #
33
+ def bundle_for token
34
+ token.partial? ? indexed_partial : indexed_exact
35
+ end
36
+
37
+ # Returns a combination for the token,
38
+ # or nil, if there is none.
39
+ #
40
+ def combination_for token
41
+ weight(token) && Query::Combination.new(token, self)
42
+ end
33
43
 
34
- # The partial strategy defines whether to
35
- # really use the partial index.
36
- #
37
- def indexed_partial
38
- @partial_strategy.use_exact_for_partial? ? @indexed_exact : @indexed_partial
39
- end
40
-
41
- # Returns a combination for the token,
42
- # or nil, if there is none.
43
- #
44
- def combination_for token
45
- weight(token) && Query::Combination.new(token, self)
46
44
  end
47
45
 
48
46
  end