picky 3.0.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. data/lib/picky/application.rb +12 -12
  2. data/lib/picky/backends/backend.rb +17 -0
  3. data/lib/picky/{backend → backends}/file/basic.rb +1 -1
  4. data/lib/picky/{backend → backends}/file/json.rb +1 -1
  5. data/lib/picky/{backend → backends}/file/marshal.rb +1 -1
  6. data/lib/picky/{backend → backends}/file/text.rb +1 -1
  7. data/lib/picky/backends/memory.rb +53 -0
  8. data/lib/picky/{backend → backends}/redis/basic.rb +9 -14
  9. data/lib/picky/backends/redis/float_hash.rb +26 -0
  10. data/lib/picky/{backend → backends}/redis/list_hash.rb +7 -11
  11. data/lib/picky/{backend → backends}/redis/string_hash.rb +7 -11
  12. data/lib/picky/backends/redis.rb +87 -0
  13. data/lib/picky/bundle.rb +107 -11
  14. data/lib/picky/category.rb +5 -5
  15. data/lib/picky/index.rb +329 -0
  16. data/lib/picky/index_indexed.rb +31 -0
  17. data/lib/picky/index_indexing.rb +161 -0
  18. data/lib/picky/indexed/bundle.rb +112 -0
  19. data/lib/picky/indexed/wrappers/exact_first.rb +1 -1
  20. data/lib/picky/indexers/parallel.rb +2 -1
  21. data/lib/picky/indexers/serial.rb +2 -1
  22. data/lib/picky/indexes_indexing.rb +1 -1
  23. data/lib/picky/indexing/bundle.rb +188 -0
  24. data/lib/picky/indexing/wrappers/category/location.rb +1 -1
  25. data/lib/picky/interfaces/live_parameters.rb +8 -8
  26. data/lib/picky/loader.rb +24 -38
  27. data/lib/picky/migrations/from_30_to_31.rb +61 -0
  28. data/lib/picky/query/allocation.rb +10 -5
  29. data/lib/picky/query/combinations.rb +70 -0
  30. data/lib/picky/query/indexes.rb +8 -7
  31. data/lib/picky/query/indexes_check.rb +47 -0
  32. data/lib/picky/query/token.rb +16 -29
  33. data/lib/picky/query/tokens.rb +4 -20
  34. data/lib/picky/search.rb +51 -58
  35. data/lib/picky/tokenizer.rb +231 -0
  36. data/lib/picky/tokenizers/location.rb +1 -1
  37. data/lib/tasks/try.rake +4 -12
  38. data/lib/tasks/try.rb +37 -0
  39. data/spec/lib/application_spec.rb +5 -5
  40. data/spec/lib/{backend → backends}/file/basic_spec.rb +2 -2
  41. data/spec/lib/{backend → backends}/file/json_spec.rb +2 -2
  42. data/spec/lib/{backend → backends}/file/marshal_spec.rb +2 -2
  43. data/spec/lib/{backend → backends}/file/text_spec.rb +1 -1
  44. data/spec/lib/backends/memory_spec.rb +77 -0
  45. data/spec/lib/{backend → backends}/redis/basic_spec.rb +19 -21
  46. data/spec/lib/backends/redis/float_hash_spec.rb +38 -0
  47. data/spec/lib/backends/redis/list_hash_spec.rb +27 -0
  48. data/spec/lib/backends/redis/string_hash_spec.rb +38 -0
  49. data/spec/lib/backends/redis_spec.rb +79 -0
  50. data/spec/lib/categories_indexed_spec.rb +3 -3
  51. data/spec/lib/category_indexed_spec.rb +6 -6
  52. data/spec/lib/category_indexing_spec.rb +1 -1
  53. data/spec/lib/category_spec.rb +1 -1
  54. data/spec/lib/frontend_adapters/rack_spec.rb +2 -2
  55. data/spec/lib/{indexes/index_indexed_spec.rb → index_indexed_spec.rb} +1 -1
  56. data/spec/lib/{indexes/index_indexing_spec.rb → index_indexing_spec.rb} +1 -1
  57. data/spec/lib/{indexes/index_spec.rb → index_spec.rb} +1 -1
  58. data/spec/lib/indexed/{bundle/memory_spec.rb → memory_spec.rb} +18 -18
  59. data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
  60. data/spec/lib/indexing/{bundle/memory_partial_generation_speed_spec.rb → bundle_partial_generation_speed_spec.rb} +3 -3
  61. data/spec/lib/indexing/bundle_spec.rb +302 -0
  62. data/spec/lib/query/allocation_spec.rb +21 -11
  63. data/spec/lib/query/combination_spec.rb +2 -2
  64. data/spec/lib/query/{combinations/base_spec.rb → combinations_spec.rb} +1 -1
  65. data/spec/lib/query/indexes_check_spec.rb +25 -0
  66. data/spec/lib/query/indexes_spec.rb +5 -1
  67. data/spec/lib/query/token_spec.rb +18 -20
  68. data/spec/lib/query/tokens_spec.rb +14 -65
  69. data/spec/lib/search_spec.rb +36 -37
  70. data/spec/lib/tasks/try_spec.rb +51 -0
  71. data/spec/lib/{tokenizers/base_spec.rb → tokenizer_spec.rb} +15 -44
  72. metadata +64 -81
  73. data/lib/picky/backend/base.rb +0 -121
  74. data/lib/picky/backend/files.rb +0 -28
  75. data/lib/picky/backend/redis.rb +0 -44
  76. data/lib/picky/indexed/bundle/base.rb +0 -47
  77. data/lib/picky/indexed/bundle/memory.rb +0 -88
  78. data/lib/picky/indexed/bundle/redis.rb +0 -91
  79. data/lib/picky/indexes/index.rb +0 -328
  80. data/lib/picky/indexes/index_indexed.rb +0 -35
  81. data/lib/picky/indexes/index_indexing.rb +0 -165
  82. data/lib/picky/indexes/memory.rb +0 -20
  83. data/lib/picky/indexes/redis.rb +0 -20
  84. data/lib/picky/indexing/bundle/base.rb +0 -242
  85. data/lib/picky/indexing/bundle/memory.rb +0 -26
  86. data/lib/picky/indexing/bundle/redis.rb +0 -26
  87. data/lib/picky/query/combinations/base.rb +0 -74
  88. data/lib/picky/query/combinations/memory.rb +0 -52
  89. data/lib/picky/query/combinations/redis.rb +0 -90
  90. data/lib/picky/query.rb +0 -6
  91. data/lib/picky/tokenizers/base.rb +0 -231
  92. data/lib/picky/tokenizers/index.rb +0 -34
  93. data/lib/picky/tokenizers/query.rb +0 -61
  94. data/spec/lib/backend/files_spec.rb +0 -189
  95. data/spec/lib/backend/redis/list_hash_spec.rb +0 -40
  96. data/spec/lib/backend/redis/string_hash_spec.rb +0 -47
  97. data/spec/lib/backend/redis_spec.rb +0 -170
  98. data/spec/lib/indexed/bundle/redis_spec.rb +0 -41
  99. data/spec/lib/indexes/redis_spec.rb +0 -15
  100. data/spec/lib/indexing/bundle/base_spec.rb +0 -38
  101. data/spec/lib/indexing/bundle/memory_spec.rb +0 -287
  102. data/spec/lib/indexing/bundle/redis_spec.rb +0 -283
  103. data/spec/lib/query/combinations/memory_spec.rb +0 -158
  104. data/spec/lib/query/combinations/redis_spec.rb +0 -172
  105. data/spec/lib/tokenizers/index_spec.rb +0 -69
  106. data/spec/lib/tokenizers/query_spec.rb +0 -121
@@ -20,17 +20,16 @@ module Picky
20
20
 
21
21
  # Creates a new Tokens object from a number of Strings.
22
22
  #
23
- # Options:
24
- # * downcase: Whether to downcase the passed strings (default is true)
25
- #
26
- def self.processed words, downcase = true
27
- new words.collect! { |word| Token.processed word, downcase }
23
+ def self.processed words, originals
24
+ new words.zip(originals).collect! { |word, original| Token.processed word, original }
28
25
  end
29
26
 
30
27
  # Tokenizes each token.
31
28
  #
32
29
  # Note: Passed tokenizer needs to offer #normalize(text).
33
30
  #
31
+ # TODO Still needed?
32
+ #
34
33
  def tokenize_with tokenizer
35
34
  @tokens.each { |token| token.tokenize_with(tokenizer) }
36
35
  end
@@ -63,21 +62,6 @@ module Picky
63
62
  @tokens.last.partial = true unless empty?
64
63
  end
65
64
 
66
- # Caps the tokens to the maximum.
67
- #
68
- def cap maximum
69
- @tokens.slice!(maximum..-1) if cap?(maximum)
70
- end
71
- def cap? maximum
72
- @tokens.size > maximum
73
- end
74
-
75
- # Rejects blank tokens.
76
- #
77
- def reject
78
- @tokens.reject! &:blank?
79
- end
80
-
81
65
  #
82
66
  #
83
67
  def categorize mapper
data/lib/picky/search.rb CHANGED
@@ -18,7 +18,8 @@ module Picky
18
18
  include Helpers::Measuring
19
19
 
20
20
  attr_reader :indexes
21
- attr_accessor :tokenizer, :weights
21
+ attr_accessor :tokenizer,
22
+ :weights
22
23
 
23
24
  # Takes:
24
25
  # * A number of indexes
@@ -29,16 +30,17 @@ module Picky
29
30
  # It is also possible to define the tokenizer and weights like so.
30
31
  # Example:
31
32
  # search = Search.new(index1, index2, index3) do
32
- # searching removes_characters: /[^a-z]/, etc.
33
- # weights [:author, :title] => +3, [:title, :isbn] => +1
33
+ # searching removes_characters: /[^a-z]/ # etc.
34
+ # weights [:author, :title] => +3,
35
+ # [:title, :isbn] => +1
34
36
  # end
35
37
  #
36
38
  def initialize *index_definitions
37
- @indexes = Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
39
+ @indexes = Query::Indexes.new *index_definitions
38
40
 
39
41
  instance_eval(&Proc.new) if block_given?
40
42
 
41
- @tokenizer ||= Tokenizers::Query.default
43
+ @tokenizer ||= Tokenizer.query_default # THINK Not dynamic. Ok?
42
44
  @weights ||= Query::Weights.new
43
45
 
44
46
  self
@@ -58,28 +60,50 @@ module Picky
58
60
  @tokenizer = if options.respond_to?(:tokenize)
59
61
  options
60
62
  else
61
- options && Tokenizers::Query.new(options)
63
+ options && Tokenizer.new(options)
62
64
  end
63
65
  end
64
66
 
65
- # Example:
67
+ # Examples:
66
68
  # search = Search.new(books_index, dvd_index, mp3_index) do
67
69
  # boost [:author, :title] => +3,
68
70
  # [:title, :isbn] => +1
69
71
  # end
70
72
  #
73
+ # or
74
+ #
75
+ # # Explicitly add a random number (0...1) to the weights.
76
+ # #
77
+ # my_weights = Class.new do
78
+ # # Instance only needs to implement
79
+ # # score_for combinations
80
+ # # and return a number that is
81
+ # # added to the weight.
82
+ # #
83
+ # def score_for combinations
84
+ # rand
85
+ # end
86
+ # end.new
87
+ #
88
+ # search = Search.new(books_index, dvd_index, mp3_index) do
89
+ # boost my_weights
90
+ # end
91
+ #
71
92
  def boost weights
72
- weights ||= Query::Weights.new
73
- @weights = Hash === weights ? Query::Weights.new(weights) : weights
93
+ @weights = if weights.respond_to?(:score_for)
94
+ weights
95
+ else
96
+ Query::Weights.new weights
97
+ end
74
98
  end
75
99
 
76
100
  # This is the main entry point for a query.
77
101
  # Use this in specs and also for running queries.
78
102
  #
79
103
  # Parameters:
80
- # * text: The search text.
81
- # * ids = 20: _optional_ The amount of ids to calculate (with offset).
82
- # * offset = 0: _optional_ The offset from which position to return the ids. Useful for pagination.
104
+ # * text: The search text.
105
+ # * ids = 20: The amount of ids to calculate (with offset).
106
+ # * offset = 0: The offset from which position to return the ids. Useful for pagination.
83
107
  #
84
108
  # Note: The Rack adapter calls this method after unravelling the HTTP request.
85
109
  #
@@ -89,7 +113,7 @@ module Picky
89
113
 
90
114
  # Runs the actual search using Query::Tokens.
91
115
  #
92
- # Note: Internal method, use #search
116
+ # Note: Internal method, use #search to search.
93
117
  #
94
118
  def search_with tokens, ids = 20, offset = 0, original_text = nil
95
119
  results = nil
@@ -104,7 +128,7 @@ module Picky
104
128
 
105
129
  # Execute a search using Query::Tokens.
106
130
  #
107
- # Note: Internal method, use #search.
131
+ # Note: Internal method, use #search to search.
108
132
  #
109
133
  def execute tokens, ids, offset, original_text = nil
110
134
  Results.from original_text, ids, offset, sorted_allocations(tokens)
@@ -113,10 +137,16 @@ module Picky
113
137
  # Delegates the tokenizing to the query tokenizer.
114
138
  #
115
139
  # Parameters:
116
- # * text: The text to tokenize.
140
+ # * text: The string to tokenize.
141
+ #
142
+ # Returns:
143
+ # * A Picky::Query::Tokens instance.
117
144
  #
118
145
  def tokenized text
119
- tokenizer.tokenize text
146
+ tokens, originals = tokenizer.tokenize text
147
+ tokens = Query::Tokens.processed tokens, originals || tokens
148
+ tokens.partialize_last # Note: In the standard Picky search, the last token is always partial.
149
+ tokens
120
150
  end
121
151
 
122
152
  # Gets sorted allocations for the tokens.
@@ -125,52 +155,15 @@ module Picky
125
155
  indexes.prepared_allocations_for tokens, weights
126
156
  end
127
157
 
128
- # Returns the right combinations strategy for
129
- # a number of query indexes.
130
- #
131
- # Currently it isn't possible using Memory and Redis etc.
132
- # indexes in the same query index group.
133
- #
134
- # Picky will raise a Query::Indexes::DifferentTypesError.
135
- #
136
- @@mapping = {
137
- Indexes::Memory => Query::Combinations::Memory,
138
- Indexes::Redis => Query::Combinations::Redis
139
- }
140
- def combinations_type_for index_definitions_ary
141
- index_types = extract_index_types index_definitions_ary
142
- !index_types.empty? && @@mapping[*index_types] || Query::Combinations::Memory
143
- end
144
- def extract_index_types index_definitions_ary
145
- index_types = index_definitions_ary.map(&:class)
146
- index_types.uniq!
147
- check_index_types index_types
148
- index_types
149
- end
150
- def check_index_types index_types
151
- raise_different index_types if index_types.size > 1
152
- end
153
- # Currently it isn't possible using Memory and Redis etc.
154
- # indexes in the same query index group.
155
- #
156
- class DifferentTypesError < StandardError
157
- def initialize types
158
- @types = types
159
- end
160
- def to_s
161
- "Currently it isn't possible to mix #{@types.join(" and ")} Indexes in the same Search instance."
162
- end
163
- end
164
- def raise_different index_types
165
- raise DifferentTypesError.new(index_types)
166
- end
167
-
168
158
  # Display some nice information for the user.
169
159
  #
170
160
  def to_s
171
161
  s = "#{self.class}("
172
- s << @indexes.indexes.map(&:name).join(', ')
173
- s << ", weights: #{@weights}" unless @weights.empty?
162
+ unless @indexes.indexes.empty?
163
+ s << @indexes.indexes.map(&:name).join(', ')
164
+ s << ", "
165
+ end
166
+ s << "weights: #{@weights}"
174
167
  s << ")"
175
168
  s
176
169
  end
@@ -0,0 +1,231 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Picky
4
+
5
+ # Defines tokenizing processes used both in indexing and querying.
6
+ #
7
+ class Tokenizer
8
+
9
+ def self.index_default= new_default
10
+ @index_default = new_default
11
+ end
12
+ def self.index_default
13
+ @index_default ||= new
14
+ end
15
+
16
+ def self.query_default= new_default
17
+ @query_default = new_default
18
+ end
19
+ def self.query_default
20
+ @query_default ||= new
21
+ end
22
+
23
+ # TODO Move EMPTY_STRING top level.
24
+ #
25
+ EMPTY_STRING = ''.freeze
26
+
27
+ def to_s
28
+ reject_condition_location = @reject_condition.to_s[/:(\d+) \(lambda\)/, 1]
29
+ <<-TOKENIZER
30
+ Removes characters: #{@removes_characters_regexp ? "/#{@removes_characters_regexp.source}/" : '-'}
31
+ Stopwords: #{@remove_stopwords_regexp ? "/#{@remove_stopwords_regexp.source}/" : '-'}
32
+ Splits text on: #{@splits_text_on.respond_to?(:source) ? "/#{@splits_text_on.source}/" : (@splits_text_on ? @splits_text_on : '-')}
33
+ Normalizes words: #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
34
+ Rejects tokens? #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
35
+ Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-' }
36
+ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
37
+ TOKENIZER
38
+ end
39
+
40
+ # Stopwords.
41
+ #
42
+ # We only allow regexps (even if string would be okay
43
+ # too for gsub! - it's too hard to understand)
44
+ #
45
+ def stopwords regexp
46
+ check_argument_in __method__, Regexp, regexp
47
+ @remove_stopwords_regexp = regexp
48
+ end
49
+ def remove_stopwords text
50
+ text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
51
+ text
52
+ end
53
+ @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
54
+ def remove_non_single_stopwords text
55
+ return text unless @remove_stopwords_regexp
56
+ return text if text.match @@non_single_stopword_regexp
57
+ remove_stopwords text
58
+ end
59
+
60
+ # Illegals.
61
+ #
62
+ # We only allow regexps (even if string would be okay
63
+ # too for gsub! - it's too hard to understand)
64
+ #
65
+ def removes_characters regexp
66
+ check_argument_in __method__, Regexp, regexp
67
+ @removes_characters_regexp = regexp
68
+ end
69
+ def remove_illegals text
70
+ text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
71
+ text
72
+ end
73
+
74
+ # Splitting.
75
+ #
76
+ # We allow Strings and Regexps.
77
+ # Note: We do not test against to_str since symbols do not work with String#split.
78
+ #
79
+ def splits_text_on regexp_or_string
80
+ raise ArgumentError.new "#{__method__} takes a Regexp or String as argument, not a #{regexp_or_string.class}." unless Regexp === regexp_or_string || String === regexp_or_string
81
+ @splits_text_on = regexp_or_string
82
+ end
83
+ def split text
84
+ text.split @splits_text_on
85
+ end
86
+
87
+ # Normalizing.
88
+ #
89
+ # We only allow arrays.
90
+ #
91
+ def normalizes_words regexp_replaces
92
+ raise ArgumentError.new "#{__method__} takes an Array of replaces as argument, not a #{regexp_replaces.class}." unless regexp_replaces.respond_to?(:to_ary)
93
+ @normalizes_words_regexp_replaces = regexp_replaces
94
+ end
95
+ def normalize_with_patterns text
96
+ return text unless @normalizes_words_regexp_replaces
97
+
98
+ @normalizes_words_regexp_replaces.each do |regex, replace|
99
+ # This should be sufficient
100
+ #
101
+ text.gsub!(regex, replace) and break
102
+ end
103
+
104
+ text
105
+ end
106
+ def normalize_with_patterns?
107
+ @normalizes_words_regexp_replaces
108
+ end
109
+
110
+ # Substitute Characters with this substituter.
111
+ #
112
+ # Default is European Character substitution.
113
+ #
114
+ def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
115
+ raise ArgumentError.new "The substitutes_characters_with option needs a character substituter, which responds to #substitute." unless substituter.respond_to?(:substitute)
116
+ @substituter = substituter
117
+ end
118
+ def substitute_characters text
119
+ substituter?? substituter.substitute(text) : text
120
+ end
121
+
122
+ # Reject tokens after tokenizing based on the given criteria.
123
+ #
124
+ def rejects_token_if &condition
125
+ @reject_condition = condition
126
+ end
127
+ def reject tokens
128
+ tokens.reject! &@reject_condition
129
+ end
130
+
131
+ def case_sensitive case_sensitive
132
+ @case_sensitive = case_sensitive
133
+ end
134
+ def downcase?
135
+ !@case_sensitive
136
+ end
137
+
138
+ def maximum_tokens amount
139
+ @maximum_tokens = amount
140
+ end
141
+ def cap words
142
+ words.slice!(@maximum_tokens..-1) if cap?(words)
143
+ end
144
+ def cap? words
145
+ @maximum_tokens && words.size > @maximum_tokens
146
+ end
147
+
148
+ # Checks if the right argument type has been given.
149
+ #
150
+ def check_argument_in method, type, argument, &condition
151
+ raise ArgumentError.new "Application##{method} takes a #{type} as argument, not a #{argument.class}." unless type === argument
152
+ end
153
+
154
+ attr_reader :substituter
155
+ alias substituter? substituter
156
+
157
+ def initialize options = {}
158
+ substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
159
+ removes_characters options[:removes_characters] if options[:removes_characters]
160
+ stopwords options[:stopwords] if options[:stopwords]
161
+ splits_text_on options[:splits_text_on] || /\s/
162
+ normalizes_words options[:normalizes_words] if options[:normalizes_words]
163
+ maximum_tokens options[:maximum_tokens]
164
+ rejects_token_if &(options[:rejects_token_if] || :blank?)
165
+ case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil?
166
+ end
167
+
168
+ # Returns a number of tokens, generated from the given text,
169
+ # based on the parameters given.
170
+ #
171
+ # Returns:
172
+ # [[:token1, :token2], ["Original1", "Original2"]]
173
+ #
174
+ def tokenize text
175
+ text = preprocess text # processing the text
176
+ return empty_tokens if text.blank?
177
+ words = pretokenize text # splitting and preparations for tokenizing
178
+ return empty_tokens if words.empty?
179
+ tokens = tokens_for words # creating tokens / strings
180
+ [tokens, words]
181
+ end
182
+
183
+ # Default preprocessing hook.
184
+ #
185
+ # Does:
186
+ # 1. Character substitution.
187
+ # 2. Remove illegal expressions.
188
+ # 3. Remove non-single stopwords. (Stopwords that occur with other words)
189
+ #
190
+ def preprocess text
191
+ text = substitute_characters text
192
+ remove_illegals text
193
+ # We do not remove single stopwords e.g. in the indexer for
194
+ # an entirely different reason than in the query tokenizer.
195
+ # An indexed thing with just name "UND" (a possible stopword)
196
+ # should not lose its name.
197
+ #
198
+ remove_non_single_stopwords text
199
+ text
200
+ end
201
+
202
+ # Pretokenizing.
203
+ #
204
+ # Does:
205
+ # * Split the text into words.
206
+ # * Cap the amount of tokens if maximum_tokens is set.
207
+ #
208
+ def pretokenize text
209
+ words = split text
210
+ words.collect! { |word| normalize_with_patterns word } if normalize_with_patterns?
211
+ reject words
212
+ cap words if cap?(words)
213
+ words
214
+ end
215
+
216
+ # Downcases.
217
+ #
218
+ def tokens_for words
219
+ words.collect! { |word| word.downcase!; word } if downcase?
220
+ words
221
+ end
222
+
223
+ # Returns empty tokens.
224
+ #
225
+ def empty_tokens
226
+ [[], []]
227
+ end
228
+
229
+ end
230
+
231
+ end
@@ -2,7 +2,7 @@ module Picky
2
2
 
3
3
  module Tokenizers
4
4
 
5
- class Location < Base
5
+ class Location < Picky::Tokenizer # TODO Still needed?
6
6
 
7
7
  attr_reader :calculation
8
8
 
data/lib/tasks/try.rake CHANGED
@@ -2,18 +2,10 @@
2
2
  #
3
3
  desc "Try the given text in the indexer/query (index and category optional)."
4
4
  task :try, [:text, :index, :category] => :application do |_, options|
5
- text, index, category = options.text, options.index, options.category
6
-
7
5
  puts
8
- fail "\x1b[31mrake try needs a text to try indexing and query preparation\x1b[m, e.g. rake 'try[yourtext]'." unless text
9
-
10
- specific = Picky::Indexes
11
- specific = specific[index] if index
12
- specific = specific[category] if category
6
+ fail "\x1b[31mrake try needs a text to try indexing and query preparation\x1b[m, e.g. rake 'try[yourtext]'." unless options.text
13
7
 
14
- puts "\"#{text}\" is saved in the #{specific.identifier} index as #{specific.tokenizer.tokenize(text.dup).to_a}"
15
-
16
- puts "\"#{text}\" as a search will be tokenized into #{Picky::Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
17
- puts
18
- puts "(category qualifiers, e.g. title: are removed if they do not exist as a qualifier, so 'toitle:bla' -> 'bla')"
8
+ require File.expand_path '../try', __FILE__
9
+ try = Picky::Try.new options.text, options.index, options.category
10
+ try.to_stdout
19
11
  end
data/lib/tasks/try.rb ADDED
@@ -0,0 +1,37 @@
1
+ module Picky
2
+
3
+ class Try
4
+
5
+ attr_reader :text, :specific
6
+
7
+ def initialize text, index = nil, category = nil
8
+ @text = text
9
+ @specific = Picky::Indexes
10
+ @specific = @specific[index.to_sym] if index
11
+ @specific = @specific[category.to_sym] if category
12
+ end
13
+
14
+ def saved
15
+ specific.tokenizer.tokenize(text.dup).first
16
+ end
17
+
18
+ def searched
19
+ Picky::Tokenizer.query_default.tokenize(text.dup).first
20
+ end
21
+
22
+ def output
23
+ <<-OUTPUT
24
+ \"#{text}\" is saved in the #{specific.identifier} index as #{saved}
25
+ \"#{text}\" as a search will be tokenized as #{searched}
26
+
27
+ (category qualifiers, e.g. title: are removed if they do not exist as a qualifier, so 'toitle:bla' -> 'bla')
28
+ OUTPUT
29
+ end
30
+
31
+ def to_stdout
32
+ puts output
33
+ end
34
+
35
+ end
36
+
37
+ end
@@ -8,7 +8,7 @@ describe Picky::Application do
8
8
  it "should run ok" do
9
9
  lambda {
10
10
  class MinimalTestApplication < described_class
11
- books = Picky::Indexes::Memory.new :books do
11
+ books = Picky::Index.new :books do
12
12
  source Picky::Sources::DB.new(
13
13
  'SELECT id, title FROM books',
14
14
  :file => 'app/db.yml'
@@ -20,8 +20,8 @@ describe Picky::Application do
20
20
 
21
21
  route %r{^/books} => Picky::Search.new(books)
22
22
  end
23
- Picky::Tokenizers::Index.default.tokenize 'some text'
24
- Picky::Tokenizers::Query.default.tokenize 'some text'
23
+ Picky::Tokenizer.index_default.tokenize 'some text'
24
+ Picky::Tokenizer.query_default.tokenize 'some text'
25
25
  }.should_not raise_error
26
26
  end
27
27
  it "should run ok" do
@@ -44,7 +44,7 @@ describe Picky::Application do
44
44
  substitutes_characters_with: Picky::CharacterSubstituters::WestEuropean.new,
45
45
  maximum_tokens: 5
46
46
 
47
- books_index = Picky::Indexes::Memory.new :books do
47
+ books_index = Picky::Index.new :books do
48
48
  source Picky::Sources::DB.new(
49
49
  'SELECT id, title, author, isbn13 as isbn FROM books',
50
50
  :file => 'app/db.yml'
@@ -58,7 +58,7 @@ describe Picky::Application do
58
58
  books_index.define_category :isbn,
59
59
  partial: Picky::Partial::None.new # Partially searching on an ISBN makes not much sense.
60
60
 
61
- geo_index = Picky::Indexes::Memory.new :geo do
61
+ geo_index = Picky::Index.new :geo do
62
62
  source Picky::Sources::CSV.new(:location, :north, :east, file: 'data/ch.csv', col_sep: ',')
63
63
  indexing removes_characters: /[^a-z]/
64
64
  category :location,
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Picky::Backend::File::Basic do
3
+ describe Picky::Backends::File::Basic do
4
4
 
5
5
  let(:file) { described_class.new 'some/cache/path/to/file' }
6
6
 
@@ -18,7 +18,7 @@ describe Picky::Backend::File::Basic do
18
18
 
19
19
  describe 'to_s' do
20
20
  it 'returns the cache path with the default file extension' do
21
- file.to_s.should == 'Picky::Backend::File::Basic(some/cache/path/to/file.index)'
21
+ file.to_s.should == 'Picky::Backends::File::Basic(some/cache/path/to/file.index)'
22
22
  end
23
23
  end
24
24
 
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Picky::Backend::File::JSON do
3
+ describe Picky::Backends::File::JSON do
4
4
 
5
5
  let(:file) { described_class.new 'some/cache/path/to/file' }
6
6
 
@@ -24,7 +24,7 @@ describe Picky::Backend::File::JSON do
24
24
 
25
25
  describe 'to_s' do
26
26
  it 'returns the cache path with the default file extension' do
27
- file.to_s.should == 'Picky::Backend::File::JSON(some/cache/path/to/file.json)'
27
+ file.to_s.should == 'Picky::Backends::File::JSON(some/cache/path/to/file.json)'
28
28
  end
29
29
  end
30
30
 
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Picky::Backend::File::Marshal do
3
+ describe Picky::Backends::File::Marshal do
4
4
 
5
5
  let(:file) { described_class.new 'some/cache/path/to/file' }
6
6
 
@@ -24,7 +24,7 @@ describe Picky::Backend::File::Marshal do
24
24
 
25
25
  describe 'to_s' do
26
26
  it 'returns the cache path with the default file extension' do
27
- file.to_s.should == 'Picky::Backend::File::Marshal(some/cache/path/to/file.dump)'
27
+ file.to_s.should == 'Picky::Backends::File::Marshal(some/cache/path/to/file.dump)'
28
28
  end
29
29
  end
30
30
 
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Picky::Backend::File::Text do
3
+ describe Picky::Backends::File::Text do
4
4
 
5
5
  before(:each) do
6
6
  @file = described_class.new "some_cache_path"