picky 3.4.3 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/aux/picky/cli.rb +1 -1
  2. data/lib/picky/backends/memory/json.rb +1 -1
  3. data/lib/picky/backends/memory/text.rb +2 -2
  4. data/lib/picky/backends/redis/string.rb +6 -0
  5. data/lib/picky/bundle.rb +0 -1
  6. data/lib/picky/bundle_indexing.rb +11 -107
  7. data/lib/picky/bundle_realtime.rb +16 -8
  8. data/lib/picky/calculations/location.rb +18 -14
  9. data/lib/picky/categories.rb +1 -1
  10. data/lib/picky/category.rb +7 -1
  11. data/lib/picky/category_indexed.rb +1 -0
  12. data/lib/picky/category_indexing.rb +17 -17
  13. data/lib/picky/category_realtime.rb +23 -11
  14. data/lib/picky/deployment.rb +33 -33
  15. data/lib/picky/generators/partial/substring.rb +0 -2
  16. data/lib/picky/generators/similarity/double_metaphone.rb +1 -1
  17. data/lib/picky/generators/similarity/metaphone.rb +1 -1
  18. data/lib/picky/generators/similarity/soundex.rb +1 -1
  19. data/lib/picky/index.rb +22 -5
  20. data/lib/picky/index_indexing.rb +3 -15
  21. data/lib/picky/indexers/base.rb +7 -3
  22. data/lib/picky/indexers/parallel.rb +1 -10
  23. data/lib/picky/indexers/serial.rb +1 -10
  24. data/lib/picky/indexes.rb +1 -1
  25. data/lib/picky/loader.rb +2 -6
  26. data/lib/picky/query/qualifier_category_mapper.rb +2 -2
  27. data/lib/picky/query/token.rb +1 -2
  28. data/lib/picky/query/tokens.rb +6 -0
  29. data/lib/picky/search.rb +1 -0
  30. data/lib/picky/sources/couch.rb +1 -1
  31. data/lib/picky/sources/csv.rb +1 -1
  32. data/lib/picky/sources/mongo.rb +1 -1
  33. data/lib/picky/wrappers/bundle/calculation.rb +8 -8
  34. data/lib/picky/wrappers/bundle/delegators.rb +4 -1
  35. data/lib/picky/wrappers/bundle/exact_partial.rb +1 -1
  36. data/lib/picky/wrappers/bundle/location.rb +30 -13
  37. data/lib/picky/wrappers/category/location.rb +14 -9
  38. data/lib/tasks/try.rb +2 -2
  39. data/spec/lib/backends/memory/text_spec.rb +6 -6
  40. data/spec/lib/bundle_spec.rb +4 -4
  41. data/spec/lib/calculations/location_spec.rb +27 -29
  42. data/spec/lib/category_indexed_spec.rb +1 -0
  43. data/spec/lib/category_indexing_spec.rb +23 -36
  44. data/spec/lib/category_spec.rb +2 -0
  45. data/spec/lib/extensions/string_spec.rb +1 -1
  46. data/spec/lib/generators/partial/infix_spec.rb +2 -2
  47. data/spec/lib/index_indexing_spec.rb +5 -3
  48. data/spec/lib/indexed/bundle_spec.rb +2 -2
  49. data/spec/lib/indexers/base_spec.rb +2 -4
  50. data/spec/lib/indexers/serial_spec.rb +3 -19
  51. data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +42 -42
  52. data/spec/lib/indexing/bundle_spec.rb +4 -133
  53. data/spec/lib/query/combination_spec.rb +6 -6
  54. data/spec/lib/query/token_spec.rb +32 -19
  55. data/spec/lib/query/tokens_spec.rb +23 -10
  56. metadata +27 -34
  57. data/lib/picky/no_source_specified_exception.rb +0 -7
  58. data/lib/picky/wrappers/sources/base.rb +0 -35
  59. data/lib/picky/wrappers/sources/location.rb +0 -56
  60. data/spec/lib/sources/wrappers/base_spec.rb +0 -38
  61. data/spec/lib/sources/wrappers/location_spec.rb +0 -55
data/aux/picky/cli.rb CHANGED
@@ -14,7 +14,7 @@ module Picky
14
14
  executor.execute selector, args, params
15
15
  end
16
16
  def executor_class_for selector = nil
17
- selector && @@mapping[selector.to_sym] || [Help]
17
+ selector && @@mapping[selector.intern] || [Help]
18
18
  end
19
19
 
20
20
  class Base
@@ -17,7 +17,7 @@ module Picky
17
17
  # Loads the index hash from json format.
18
18
  #
19
19
  def load
20
- Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true # TODO to_sym
20
+ Yajl::Parser.parse ::File.open(cache_path, 'r') # , symbolize_keys: true # TODO Symbols.
21
21
  end
22
22
 
23
23
  # Dumps the index hash in json format.
@@ -41,7 +41,7 @@ module Picky
41
41
  # * id,data\n
42
42
  # * id,data\n
43
43
  #
44
- # Yields an id string and a symbol token.
44
+ # Yields an id string and a token.
45
45
  #
46
46
  def retrieve
47
47
  id = nil
@@ -49,7 +49,7 @@ module Picky
49
49
  ::File.open(cache_path, 'r:utf-8') do |file|
50
50
  file.each_line do |line|
51
51
  id, token = line.split ?,, 2
52
- yield id, (token.chomp! || token).to_sym # TODO to_sym
52
+ yield id, (token.chomp! || token)
53
53
  end
54
54
  end
55
55
  end
@@ -31,6 +31,12 @@ module Picky
31
31
  client.hget namespace, key
32
32
  end
33
33
 
34
+ # Set a single value
35
+ #
36
+ def []= key, value
37
+ client.hset namespace, key, value
38
+ end
39
+
34
40
  end
35
41
 
36
42
  end
data/lib/picky/bundle.rb CHANGED
@@ -50,7 +50,6 @@ module Picky
50
50
  # TODO Tidy up a bit.
51
51
  #
52
52
  @key_format = options[:key_format]
53
- @prepared = Backends::Memory::Text.new category.prepared_index_path
54
53
 
55
54
  @weights_strategy = weights_strategy
56
55
  @partial_strategy = partial_strategy
@@ -28,134 +28,38 @@ module Picky
28
28
  #
29
29
  class Bundle
30
30
 
31
- attr_reader :backend,
32
- :prepared
31
+ attr_reader :backend
33
32
 
34
33
  # When indexing, clear only clears the inverted index.
35
34
  #
36
- delegate :clear, :to => :inverted
37
-
38
- # Sets up a piece of the index for the given token.
39
- #
40
- def initialize_inverted_index_for token
41
- self.inverted[token] ||= []
42
- end
43
-
44
- # Generation
45
- #
46
-
47
- # This method
48
- # * Loads the base index from the "prepared..." file.
49
- # * Generates derived indexes.
50
- # * Dumps all the indexes into files.
51
- #
52
- def generate_caches_from_source
53
- load_from_prepared_index_file
54
- generate_caches_from_memory
55
- end
56
- # Generates derived indexes from the index and dumps.
57
- #
58
- # Note: assumes that there is something in the index
59
- #
60
- def generate_caches_from_memory
61
- cache_from_memory_generation_message
62
- generate_derived
63
- end
64
- def cache_from_memory_generation_message
65
- timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
66
- end
67
-
68
- # Generates the weights and similarity from the main index.
69
- #
70
- def generate_derived
71
- generate_weights
72
- generate_similarity
73
- end
35
+ delegate :clear,
36
+ :to => :inverted
74
37
 
75
38
  # "Empties" the index(es) by getting a new empty
76
39
  # internal backend instance.
77
40
  #
78
41
  def empty
79
42
  empty_inverted
43
+ empty_weights
44
+ empty_similarity
80
45
  empty_configuration
81
46
  end
82
47
  def empty_inverted
83
48
  @inverted = @backend_inverted.empty
84
49
  end
85
- def empty_configuration
86
- @configuration = @backend_configuration.empty
87
- end
88
-
89
- # Load the data from the db.
90
- #
91
- def load_from_prepared_index_file
92
- load_from_prepared_index_generation_message
93
- retrieve
94
- end
95
- def load_from_prepared_index_generation_message
96
- timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
50
+ def empty_weights
51
+ @weights = @backend_weights.empty
97
52
  end
98
- # Retrieves the prepared index data into the index.
99
- #
100
- # This is in preparation for generating
101
- # derived indexes (like weights, similarity)
102
- # and later dumping the optimized index.
103
- #
104
- # TODO Move this out to the category?
105
- #
106
- # Note: The clean way to do this would be to
107
- # self.inverted.values.each &:uniq!
108
- #
109
- # Note 2:
110
- # initialize_inverted_index_for token
111
- # id = id.send(format)
112
- # next if last_id == id
113
- # self.inverted[token] << id
114
- # last_id = id
115
- #
116
- def retrieve
117
- format = key_format || :to_i
118
- empty_inverted
119
- id, last_id = nil, nil
120
- prepared.retrieve do |id, token|
121
- initialize_inverted_index_for token
122
- self.inverted[token] << id.send(format)
123
- end
124
- self.inverted.values.each &:uniq!
53
+ def empty_similarity
54
+ @similarity = @backend_similarity.empty
125
55
  end
126
-
127
- # Generate a partial index from the given exact inverted index.
128
- #
129
- def generate_partial_from exact_inverted_index
130
- timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
131
- self.inverted = exact_inverted_index
132
- self.generate_partial
133
- self
134
- end
135
-
136
- # Generates a new index (writes its index) using the
137
- # partial caching strategy of this bundle.
138
- #
139
- def generate_partial
140
- self.inverted = partial_strategy.generate_from self.inverted
141
- end
142
- # Generates a new weights index (writes its index) using the
143
- # given weight caching strategy.
144
- #
145
- def generate_weights
146
- self.weights = weights_strategy.generate_from self.inverted
147
- end
148
- # Generates a new similarity index (writes its index) using the
149
- # given similarity caching strategy.
150
- #
151
- def generate_similarity
152
- self.similarity = similarity_strategy.generate_from self.inverted
56
+ def empty_configuration
57
+ @configuration = @backend_configuration.empty
153
58
  end
154
59
 
155
60
  # Saves the indexes in a dump file.
156
61
  #
157
62
  def dump
158
- timed_exclaim %Q{"#{identifier}": Dumping data.}
159
63
  dump_inverted
160
64
  dump_similarity
161
65
  dump_weights
@@ -32,7 +32,9 @@ module Picky
32
32
 
33
33
  # Returns a reference to the array where the id has been added.
34
34
  #
35
- def add id, sym
35
+ # TODO Rename sym.
36
+ #
37
+ def add id, sym, where = :unshift
36
38
  ary = @inverted[sym]
37
39
 
38
40
  syms = @realtime_mapping[id]
@@ -42,12 +44,12 @@ module Picky
42
44
  #
43
45
  ids = if syms.include? sym
44
46
  ids = @inverted[sym]
45
- ids.delete id # Move id
46
- ids.unshift id # to front
47
+ ids.delete id
48
+ ids.send where, id
47
49
  else
48
50
  syms << sym
49
51
  ids = @inverted[sym] ||= []
50
- ids.unshift id
52
+ ids.send where, id
51
53
  end
52
54
 
53
55
  # Weights.
@@ -60,21 +62,27 @@ module Picky
60
62
  similarity = @similarity[encoded] ||= []
61
63
  if similarity.include? sym
62
64
  similarity.delete sym # Not completely correct, as others will also be affected, but meh.
63
- similarity.unshift sym #
65
+ similarity.send where, sym #
64
66
  else
65
- similarity.unshift sym
67
+ similarity.send where, sym
66
68
  end
67
69
  end
68
70
  end
69
71
 
70
72
  # Partializes the text and then adds each.
71
73
  #
72
- def add_partialized id, text
74
+ def add_partialized id, text, where = :unshift
73
75
  self.partial_strategy.each_partial text do |partial_text|
74
- add id, partial_text
76
+ add id, partial_text, where
75
77
  end
76
78
  end
77
79
 
80
+ # Clears the realtime mapping.
81
+ #
82
+ def clear_realtime_mapping
83
+ @realtime_mapping.clear
84
+ end
85
+
78
86
  end
79
87
 
80
88
  end
@@ -11,37 +11,41 @@ module Picky
11
11
  #
12
12
  class Location
13
13
 
14
- attr_reader :minimum, :precision, :grid
14
+ attr_reader :anchor,
15
+ :precision,
16
+ :grid
15
17
 
16
- def initialize user_grid, precision = nil
17
- @user_grid = user_grid
18
- @precision = precision || 1
19
- @grid = @user_grid / (@precision + 0.5)
18
+ def initialize user_grid, anchor = 0.0, precision = nil
19
+ @user_grid = user_grid
20
+ @precision = precision || 1
21
+ @grid = @user_grid / (@precision + 0.5)
22
+
23
+ self.anchor = anchor
20
24
  end
21
25
 
22
- def minimum= minimum
26
+ def anchor= value
23
27
  # Add a margin of 1 user grid.
24
28
  #
25
- minimum -= @user_grid
29
+ value -= @user_grid
26
30
 
27
31
  # Add plus 1 grid so that the index key never falls on 0.
28
32
  # Why? to_i maps by default to 0.
29
33
  #
30
- minimum -= @grid
34
+ value -= @grid
31
35
 
32
- @minimum = minimum
36
+ @anchor = value
33
37
  end
34
38
 
35
39
  #
36
40
  #
37
41
  def add_margin length
38
- @minimum -= length
42
+ @anchor -= length
39
43
  end
40
44
 
41
45
  #
42
46
  #
43
- def recalculated_range location
44
- range recalculate(location)
47
+ def calculated_range location
48
+ range calculate(location)
45
49
  end
46
50
  #
47
51
  #
@@ -50,8 +54,8 @@ module Picky
50
54
  end
51
55
  #
52
56
  #
53
- def recalculate location
54
- ((location - @minimum) / @grid).floor
57
+ def calculate location
58
+ ((location - @anchor) / @grid).floor
55
59
  end
56
60
 
57
61
  end
@@ -40,7 +40,7 @@ module Picky
40
40
  # Find a given category in the categories.
41
41
  #
42
42
  def [] category_name
43
- category_name = category_name.to_sym
43
+ category_name = category_name.intern
44
44
  category_hash[category_name] || raise_not_found(category_name)
45
45
  end
46
46
  def raise_not_found category_name
@@ -4,7 +4,8 @@ module Picky
4
4
 
5
5
  attr_reader :name,
6
6
  :exact,
7
- :partial
7
+ :partial,
8
+ :prepared
8
9
 
9
10
  # Mandatory params:
10
11
  # * name: Category name to use as identifier and file names.
@@ -20,6 +21,7 @@ module Picky
20
21
  # * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
21
22
  # * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
22
23
  # * key_format: What this category's keys are formatted with (default is :to_i)
24
+ # * use_symbols: Whether to use symbols internally instead of strings.
23
25
  #
24
26
  def initialize name, index, options = {}
25
27
  @name = name
@@ -31,6 +33,7 @@ module Picky
31
33
  @from = options[:from]
32
34
  @tokenizer = options[:tokenizer]
33
35
  @key_format = options[:key_format]
36
+ # @symbols = options[:use_symbols] || index.use_symbols? # TODO Symbols.
34
37
  @qualifiers = extract_qualifiers_from options
35
38
 
36
39
  weights = options[:weights] || Generators::Weights::Default
@@ -46,6 +49,8 @@ module Picky
46
49
  else
47
50
  @partial = Bundle.new :partial, self, index.backend, weights, partial, no_similarity, options
48
51
  end
52
+
53
+ @prepared = Backends::Memory::Text.new prepared_index_path
49
54
  end
50
55
 
51
56
  # Indexes and reloads the category.
@@ -58,6 +63,7 @@ module Picky
58
63
  def dump
59
64
  exact.dump
60
65
  partial.dump
66
+ timed_exclaim %Q{"#{identifier}": Generated -> #{index_directory.gsub("#{PICKY_ROOT}/", '')}.}
61
67
  end
62
68
 
63
69
  # Index name.
@@ -8,6 +8,7 @@ module Picky
8
8
  #
9
9
  def load_from_cache
10
10
  timed_exclaim %Q{"#{identifier}": Loading index from cache.}
11
+ clear_realtime_mapping
11
12
  exact.load
12
13
  partial.load
13
14
  end
@@ -24,9 +24,12 @@ module Picky
24
24
  indexer.index [self]
25
25
  end
26
26
  end
27
+
28
+ # Empty all the indexes.
29
+ #
27
30
  def empty
28
31
  exact.empty
29
- partial.empty_configuration
32
+ partial.empty
30
33
  end
31
34
 
32
35
  # Take a data snapshot if the source offers it.
@@ -44,22 +47,17 @@ module Picky
44
47
  # Generates all caches for this category.
45
48
  #
46
49
  def cache
47
- generate_caches_from_source
48
- generate_partial
49
- generate_caches_from_memory
50
+ empty
51
+ retrieve
50
52
  dump
51
- timed_exclaim %Q{"#{identifier}": Caching finished.}
53
+ clear_realtime_mapping # TODO To call or not to call, that is the question.
52
54
  end
53
- # Generate the cache data.
55
+
56
+ # Retrieves the prepared index data into the indexes and
57
+ # generates the necessary derived indexes.
54
58
  #
55
- def generate_caches_from_source
56
- exact.generate_caches_from_source
57
- end
58
- def generate_partial
59
- partial.generate_partial_from exact.inverted
60
- end
61
- def generate_caches_from_memory
62
- partial.generate_caches_from_memory
59
+ def retrieve
60
+ prepared.retrieve { |id, token| add_tokenized_token id, token, :<< }
63
61
  end
64
62
 
65
63
  # Return an appropriate source.
@@ -80,12 +78,14 @@ module Picky
80
78
 
81
79
  # Return the key format.
82
80
  #
83
- # If the source has no key format, and
84
- # none is defined on this category, ask
81
+ # If no key_format is defined on the category
82
+ # and the source has no key format, ask
85
83
  # the index for one.
86
84
  #
85
+ # Default is to_i.
86
+ #
87
87
  def key_format
88
- source.respond_to?(:key_format) && source.key_format || @key_format || @index.key_format
88
+ @key_format ||= source.respond_to?(:key_format) && source.key_format || @index.key_format || :to_i
89
89
  end
90
90
 
91
91
  # Where the data is taken from.
@@ -13,29 +13,41 @@ module Picky
13
13
  # Adds and indexes this category of the
14
14
  # given object.
15
15
  #
16
- def add object
16
+ def add object, where = :unshift
17
17
  tokens, _ = tokenizer.tokenize object.send(from)
18
- add_tokenized object.id, tokens
18
+ add_tokenized object.id, tokens, where
19
19
  end
20
20
 
21
21
  # Removes the object's id, and then
22
22
  # adds it again.
23
23
  #
24
- def replace object
24
+ def replace object, where = :unshift
25
25
  remove object.id
26
- add object
26
+ add object, where
27
27
  end
28
28
 
29
29
  # For the given id, adds the list of
30
30
  # strings to the index for the given id.
31
31
  #
32
- def add_tokenized id, tokens
33
- tokens.each do |text|
34
- next unless text
35
- text = text.to_sym # TODO to_sym
36
- exact.add id, text
37
- partial.add_partialized id, text
38
- end
32
+ def add_tokenized id, tokens, where = :unshift
33
+ tokens.each { |text| add_tokenized_token id, text, where }
34
+ end
35
+
36
+ #
37
+ #
38
+ def add_tokenized_token id, text, where = :unshift
39
+ return unless text
40
+ id = id.send key_format # TODO Speed this up!
41
+ # text = text.to_sym if @symbols # TODO Symbols.
42
+ exact.add id, text, where
43
+ partial.add_partialized id, text, where
44
+ end
45
+
46
+ # Clears the realtime mapping.
47
+ #
48
+ def clear_realtime_mapping
49
+ exact.clear_realtime_mapping
50
+ partial.clear_realtime_mapping
39
51
  end
40
52
 
41
53
  end