picky 3.4.3 → 3.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/aux/picky/cli.rb +1 -1
  2. data/lib/picky/backends/memory/json.rb +1 -1
  3. data/lib/picky/backends/memory/text.rb +2 -2
  4. data/lib/picky/backends/redis/string.rb +6 -0
  5. data/lib/picky/bundle.rb +0 -1
  6. data/lib/picky/bundle_indexing.rb +11 -107
  7. data/lib/picky/bundle_realtime.rb +16 -8
  8. data/lib/picky/calculations/location.rb +18 -14
  9. data/lib/picky/categories.rb +1 -1
  10. data/lib/picky/category.rb +7 -1
  11. data/lib/picky/category_indexed.rb +1 -0
  12. data/lib/picky/category_indexing.rb +17 -17
  13. data/lib/picky/category_realtime.rb +23 -11
  14. data/lib/picky/deployment.rb +33 -33
  15. data/lib/picky/generators/partial/substring.rb +0 -2
  16. data/lib/picky/generators/similarity/double_metaphone.rb +1 -1
  17. data/lib/picky/generators/similarity/metaphone.rb +1 -1
  18. data/lib/picky/generators/similarity/soundex.rb +1 -1
  19. data/lib/picky/index.rb +22 -5
  20. data/lib/picky/index_indexing.rb +3 -15
  21. data/lib/picky/indexers/base.rb +7 -3
  22. data/lib/picky/indexers/parallel.rb +1 -10
  23. data/lib/picky/indexers/serial.rb +1 -10
  24. data/lib/picky/indexes.rb +1 -1
  25. data/lib/picky/loader.rb +2 -6
  26. data/lib/picky/query/qualifier_category_mapper.rb +2 -2
  27. data/lib/picky/query/token.rb +1 -2
  28. data/lib/picky/query/tokens.rb +6 -0
  29. data/lib/picky/search.rb +1 -0
  30. data/lib/picky/sources/couch.rb +1 -1
  31. data/lib/picky/sources/csv.rb +1 -1
  32. data/lib/picky/sources/mongo.rb +1 -1
  33. data/lib/picky/wrappers/bundle/calculation.rb +8 -8
  34. data/lib/picky/wrappers/bundle/delegators.rb +4 -1
  35. data/lib/picky/wrappers/bundle/exact_partial.rb +1 -1
  36. data/lib/picky/wrappers/bundle/location.rb +30 -13
  37. data/lib/picky/wrappers/category/location.rb +14 -9
  38. data/lib/tasks/try.rb +2 -2
  39. data/spec/lib/backends/memory/text_spec.rb +6 -6
  40. data/spec/lib/bundle_spec.rb +4 -4
  41. data/spec/lib/calculations/location_spec.rb +27 -29
  42. data/spec/lib/category_indexed_spec.rb +1 -0
  43. data/spec/lib/category_indexing_spec.rb +23 -36
  44. data/spec/lib/category_spec.rb +2 -0
  45. data/spec/lib/extensions/string_spec.rb +1 -1
  46. data/spec/lib/generators/partial/infix_spec.rb +2 -2
  47. data/spec/lib/index_indexing_spec.rb +5 -3
  48. data/spec/lib/indexed/bundle_spec.rb +2 -2
  49. data/spec/lib/indexers/base_spec.rb +2 -4
  50. data/spec/lib/indexers/serial_spec.rb +3 -19
  51. data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +42 -42
  52. data/spec/lib/indexing/bundle_spec.rb +4 -133
  53. data/spec/lib/query/combination_spec.rb +6 -6
  54. data/spec/lib/query/token_spec.rb +32 -19
  55. data/spec/lib/query/tokens_spec.rb +23 -10
  56. metadata +27 -34
  57. data/lib/picky/no_source_specified_exception.rb +0 -7
  58. data/lib/picky/wrappers/sources/base.rb +0 -35
  59. data/lib/picky/wrappers/sources/location.rb +0 -56
  60. data/spec/lib/sources/wrappers/base_spec.rb +0 -38
  61. data/spec/lib/sources/wrappers/location_spec.rb +0 -55
data/aux/picky/cli.rb CHANGED
@@ -14,7 +14,7 @@ module Picky
14
14
  executor.execute selector, args, params
15
15
  end
16
16
  def executor_class_for selector = nil
17
- selector && @@mapping[selector.to_sym] || [Help]
17
+ selector && @@mapping[selector.intern] || [Help]
18
18
  end
19
19
 
20
20
  class Base
@@ -17,7 +17,7 @@ module Picky
17
17
  # Loads the index hash from json format.
18
18
  #
19
19
  def load
20
- Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true # TODO to_sym
20
+ Yajl::Parser.parse ::File.open(cache_path, 'r') # , symbolize_keys: true # TODO Symbols.
21
21
  end
22
22
 
23
23
  # Dumps the index hash in json format.
@@ -41,7 +41,7 @@ module Picky
41
41
  # * id,data\n
42
42
  # * id,data\n
43
43
  #
44
- # Yields an id string and a symbol token.
44
+ # Yields an id string and a token.
45
45
  #
46
46
  def retrieve
47
47
  id = nil
@@ -49,7 +49,7 @@ module Picky
49
49
  ::File.open(cache_path, 'r:utf-8') do |file|
50
50
  file.each_line do |line|
51
51
  id, token = line.split ?,, 2
52
- yield id, (token.chomp! || token).to_sym # TODO to_sym
52
+ yield id, (token.chomp! || token)
53
53
  end
54
54
  end
55
55
  end
@@ -31,6 +31,12 @@ module Picky
31
31
  client.hget namespace, key
32
32
  end
33
33
 
34
+ # Set a single value
35
+ #
36
+ def []= key, value
37
+ client.hset namespace, key, value
38
+ end
39
+
34
40
  end
35
41
 
36
42
  end
data/lib/picky/bundle.rb CHANGED
@@ -50,7 +50,6 @@ module Picky
50
50
  # TODO Tidy up a bit.
51
51
  #
52
52
  @key_format = options[:key_format]
53
- @prepared = Backends::Memory::Text.new category.prepared_index_path
54
53
 
55
54
  @weights_strategy = weights_strategy
56
55
  @partial_strategy = partial_strategy
@@ -28,134 +28,38 @@ module Picky
28
28
  #
29
29
  class Bundle
30
30
 
31
- attr_reader :backend,
32
- :prepared
31
+ attr_reader :backend
33
32
 
34
33
  # When indexing, clear only clears the inverted index.
35
34
  #
36
- delegate :clear, :to => :inverted
37
-
38
- # Sets up a piece of the index for the given token.
39
- #
40
- def initialize_inverted_index_for token
41
- self.inverted[token] ||= []
42
- end
43
-
44
- # Generation
45
- #
46
-
47
- # This method
48
- # * Loads the base index from the "prepared..." file.
49
- # * Generates derived indexes.
50
- # * Dumps all the indexes into files.
51
- #
52
- def generate_caches_from_source
53
- load_from_prepared_index_file
54
- generate_caches_from_memory
55
- end
56
- # Generates derived indexes from the index and dumps.
57
- #
58
- # Note: assumes that there is something in the index
59
- #
60
- def generate_caches_from_memory
61
- cache_from_memory_generation_message
62
- generate_derived
63
- end
64
- def cache_from_memory_generation_message
65
- timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
66
- end
67
-
68
- # Generates the weights and similarity from the main index.
69
- #
70
- def generate_derived
71
- generate_weights
72
- generate_similarity
73
- end
35
+ delegate :clear,
36
+ :to => :inverted
74
37
 
75
38
  # "Empties" the index(es) by getting a new empty
76
39
  # internal backend instance.
77
40
  #
78
41
  def empty
79
42
  empty_inverted
43
+ empty_weights
44
+ empty_similarity
80
45
  empty_configuration
81
46
  end
82
47
  def empty_inverted
83
48
  @inverted = @backend_inverted.empty
84
49
  end
85
- def empty_configuration
86
- @configuration = @backend_configuration.empty
87
- end
88
-
89
- # Load the data from the db.
90
- #
91
- def load_from_prepared_index_file
92
- load_from_prepared_index_generation_message
93
- retrieve
94
- end
95
- def load_from_prepared_index_generation_message
96
- timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
50
+ def empty_weights
51
+ @weights = @backend_weights.empty
97
52
  end
98
- # Retrieves the prepared index data into the index.
99
- #
100
- # This is in preparation for generating
101
- # derived indexes (like weights, similarity)
102
- # and later dumping the optimized index.
103
- #
104
- # TODO Move this out to the category?
105
- #
106
- # Note: The clean way to do this would be to
107
- # self.inverted.values.each &:uniq!
108
- #
109
- # Note 2:
110
- # initialize_inverted_index_for token
111
- # id = id.send(format)
112
- # next if last_id == id
113
- # self.inverted[token] << id
114
- # last_id = id
115
- #
116
- def retrieve
117
- format = key_format || :to_i
118
- empty_inverted
119
- id, last_id = nil, nil
120
- prepared.retrieve do |id, token|
121
- initialize_inverted_index_for token
122
- self.inverted[token] << id.send(format)
123
- end
124
- self.inverted.values.each &:uniq!
53
+ def empty_similarity
54
+ @similarity = @backend_similarity.empty
125
55
  end
126
-
127
- # Generate a partial index from the given exact inverted index.
128
- #
129
- def generate_partial_from exact_inverted_index
130
- timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
131
- self.inverted = exact_inverted_index
132
- self.generate_partial
133
- self
134
- end
135
-
136
- # Generates a new index (writes its index) using the
137
- # partial caching strategy of this bundle.
138
- #
139
- def generate_partial
140
- self.inverted = partial_strategy.generate_from self.inverted
141
- end
142
- # Generates a new weights index (writes its index) using the
143
- # given weight caching strategy.
144
- #
145
- def generate_weights
146
- self.weights = weights_strategy.generate_from self.inverted
147
- end
148
- # Generates a new similarity index (writes its index) using the
149
- # given similarity caching strategy.
150
- #
151
- def generate_similarity
152
- self.similarity = similarity_strategy.generate_from self.inverted
56
+ def empty_configuration
57
+ @configuration = @backend_configuration.empty
153
58
  end
154
59
 
155
60
  # Saves the indexes in a dump file.
156
61
  #
157
62
  def dump
158
- timed_exclaim %Q{"#{identifier}": Dumping data.}
159
63
  dump_inverted
160
64
  dump_similarity
161
65
  dump_weights
@@ -32,7 +32,9 @@ module Picky
32
32
 
33
33
  # Returns a reference to the array where the id has been added.
34
34
  #
35
- def add id, sym
35
+ # TODO Rename sym.
36
+ #
37
+ def add id, sym, where = :unshift
36
38
  ary = @inverted[sym]
37
39
 
38
40
  syms = @realtime_mapping[id]
@@ -42,12 +44,12 @@ module Picky
42
44
  #
43
45
  ids = if syms.include? sym
44
46
  ids = @inverted[sym]
45
- ids.delete id # Move id
46
- ids.unshift id # to front
47
+ ids.delete id
48
+ ids.send where, id
47
49
  else
48
50
  syms << sym
49
51
  ids = @inverted[sym] ||= []
50
- ids.unshift id
52
+ ids.send where, id
51
53
  end
52
54
 
53
55
  # Weights.
@@ -60,21 +62,27 @@ module Picky
60
62
  similarity = @similarity[encoded] ||= []
61
63
  if similarity.include? sym
62
64
  similarity.delete sym # Not completely correct, as others will also be affected, but meh.
63
- similarity.unshift sym #
65
+ similarity.send where, sym #
64
66
  else
65
- similarity.unshift sym
67
+ similarity.send where, sym
66
68
  end
67
69
  end
68
70
  end
69
71
 
70
72
  # Partializes the text and then adds each.
71
73
  #
72
- def add_partialized id, text
74
+ def add_partialized id, text, where = :unshift
73
75
  self.partial_strategy.each_partial text do |partial_text|
74
- add id, partial_text
76
+ add id, partial_text, where
75
77
  end
76
78
  end
77
79
 
80
+ # Clears the realtime mapping.
81
+ #
82
+ def clear_realtime_mapping
83
+ @realtime_mapping.clear
84
+ end
85
+
78
86
  end
79
87
 
80
88
  end
@@ -11,37 +11,41 @@ module Picky
11
11
  #
12
12
  class Location
13
13
 
14
- attr_reader :minimum, :precision, :grid
14
+ attr_reader :anchor,
15
+ :precision,
16
+ :grid
15
17
 
16
- def initialize user_grid, precision = nil
17
- @user_grid = user_grid
18
- @precision = precision || 1
19
- @grid = @user_grid / (@precision + 0.5)
18
+ def initialize user_grid, anchor = 0.0, precision = nil
19
+ @user_grid = user_grid
20
+ @precision = precision || 1
21
+ @grid = @user_grid / (@precision + 0.5)
22
+
23
+ self.anchor = anchor
20
24
  end
21
25
 
22
- def minimum= minimum
26
+ def anchor= value
23
27
  # Add a margin of 1 user grid.
24
28
  #
25
- minimum -= @user_grid
29
+ value -= @user_grid
26
30
 
27
31
  # Add plus 1 grid so that the index key never falls on 0.
28
32
  # Why? to_i maps by default to 0.
29
33
  #
30
- minimum -= @grid
34
+ value -= @grid
31
35
 
32
- @minimum = minimum
36
+ @anchor = value
33
37
  end
34
38
 
35
39
  #
36
40
  #
37
41
  def add_margin length
38
- @minimum -= length
42
+ @anchor -= length
39
43
  end
40
44
 
41
45
  #
42
46
  #
43
- def recalculated_range location
44
- range recalculate(location)
47
+ def calculated_range location
48
+ range calculate(location)
45
49
  end
46
50
  #
47
51
  #
@@ -50,8 +54,8 @@ module Picky
50
54
  end
51
55
  #
52
56
  #
53
- def recalculate location
54
- ((location - @minimum) / @grid).floor
57
+ def calculate location
58
+ ((location - @anchor) / @grid).floor
55
59
  end
56
60
 
57
61
  end
@@ -40,7 +40,7 @@ module Picky
40
40
  # Find a given category in the categories.
41
41
  #
42
42
  def [] category_name
43
- category_name = category_name.to_sym
43
+ category_name = category_name.intern
44
44
  category_hash[category_name] || raise_not_found(category_name)
45
45
  end
46
46
  def raise_not_found category_name
@@ -4,7 +4,8 @@ module Picky
4
4
 
5
5
  attr_reader :name,
6
6
  :exact,
7
- :partial
7
+ :partial,
8
+ :prepared
8
9
 
9
10
  # Mandatory params:
10
11
  # * name: Category name to use as identifier and file names.
@@ -20,6 +21,7 @@ module Picky
20
21
  # * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
21
22
  # * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
22
23
  # * key_format: What this category's keys are formatted with (default is :to_i)
24
+ # * use_symbols: Whether to use symbols internally instead of strings.
23
25
  #
24
26
  def initialize name, index, options = {}
25
27
  @name = name
@@ -31,6 +33,7 @@ module Picky
31
33
  @from = options[:from]
32
34
  @tokenizer = options[:tokenizer]
33
35
  @key_format = options[:key_format]
36
+ # @symbols = options[:use_symbols] || index.use_symbols? # TODO Symbols.
34
37
  @qualifiers = extract_qualifiers_from options
35
38
 
36
39
  weights = options[:weights] || Generators::Weights::Default
@@ -46,6 +49,8 @@ module Picky
46
49
  else
47
50
  @partial = Bundle.new :partial, self, index.backend, weights, partial, no_similarity, options
48
51
  end
52
+
53
+ @prepared = Backends::Memory::Text.new prepared_index_path
49
54
  end
50
55
 
51
56
  # Indexes and reloads the category.
@@ -58,6 +63,7 @@ module Picky
58
63
  def dump
59
64
  exact.dump
60
65
  partial.dump
66
+ timed_exclaim %Q{"#{identifier}": Generated -> #{index_directory.gsub("#{PICKY_ROOT}/", '')}.}
61
67
  end
62
68
 
63
69
  # Index name.
@@ -8,6 +8,7 @@ module Picky
8
8
  #
9
9
  def load_from_cache
10
10
  timed_exclaim %Q{"#{identifier}": Loading index from cache.}
11
+ clear_realtime_mapping
11
12
  exact.load
12
13
  partial.load
13
14
  end
@@ -24,9 +24,12 @@ module Picky
24
24
  indexer.index [self]
25
25
  end
26
26
  end
27
+
28
+ # Empty all the indexes.
29
+ #
27
30
  def empty
28
31
  exact.empty
29
- partial.empty_configuration
32
+ partial.empty
30
33
  end
31
34
 
32
35
  # Take a data snapshot if the source offers it.
@@ -44,22 +47,17 @@ module Picky
44
47
  # Generates all caches for this category.
45
48
  #
46
49
  def cache
47
- generate_caches_from_source
48
- generate_partial
49
- generate_caches_from_memory
50
+ empty
51
+ retrieve
50
52
  dump
51
- timed_exclaim %Q{"#{identifier}": Caching finished.}
53
+ clear_realtime_mapping # TODO To call or not to call, that is the question.
52
54
  end
53
- # Generate the cache data.
55
+
56
+ # Retrieves the prepared index data into the indexes and
57
+ # generates the necessary derived indexes.
54
58
  #
55
- def generate_caches_from_source
56
- exact.generate_caches_from_source
57
- end
58
- def generate_partial
59
- partial.generate_partial_from exact.inverted
60
- end
61
- def generate_caches_from_memory
62
- partial.generate_caches_from_memory
59
+ def retrieve
60
+ prepared.retrieve { |id, token| add_tokenized_token id, token, :<< }
63
61
  end
64
62
 
65
63
  # Return an appropriate source.
@@ -80,12 +78,14 @@ module Picky
80
78
 
81
79
  # Return the key format.
82
80
  #
83
- # If the source has no key format, and
84
- # none is defined on this category, ask
81
+ # If no key_format is defined on the category
82
+ # and the source has no key format, ask
85
83
  # the index for one.
86
84
  #
85
+ # Default is to_i.
86
+ #
87
87
  def key_format
88
- source.respond_to?(:key_format) && source.key_format || @key_format || @index.key_format
88
+ @key_format ||= source.respond_to?(:key_format) && source.key_format || @index.key_format || :to_i
89
89
  end
90
90
 
91
91
  # Where the data is taken from.
@@ -13,29 +13,41 @@ module Picky
13
13
  # Adds and indexes this category of the
14
14
  # given object.
15
15
  #
16
- def add object
16
+ def add object, where = :unshift
17
17
  tokens, _ = tokenizer.tokenize object.send(from)
18
- add_tokenized object.id, tokens
18
+ add_tokenized object.id, tokens, where
19
19
  end
20
20
 
21
21
  # Removes the object's id, and then
22
22
  # adds it again.
23
23
  #
24
- def replace object
24
+ def replace object, where = :unshift
25
25
  remove object.id
26
- add object
26
+ add object, where
27
27
  end
28
28
 
29
29
  # For the given id, adds the list of
30
30
  # strings to the index for the given id.
31
31
  #
32
- def add_tokenized id, tokens
33
- tokens.each do |text|
34
- next unless text
35
- text = text.to_sym # TODO to_sym
36
- exact.add id, text
37
- partial.add_partialized id, text
38
- end
32
+ def add_tokenized id, tokens, where = :unshift
33
+ tokens.each { |text| add_tokenized_token id, text, where }
34
+ end
35
+
36
+ #
37
+ #
38
+ def add_tokenized_token id, text, where = :unshift
39
+ return unless text
40
+ id = id.send key_format # TODO Speed this up!
41
+ # text = text.to_sym if @symbols # TODO Symbols.
42
+ exact.add id, text, where
43
+ partial.add_partialized id, text, where
44
+ end
45
+
46
+ # Clears the realtime mapping.
47
+ #
48
+ def clear_realtime_mapping
49
+ exact.clear_realtime_mapping
50
+ partial.clear_realtime_mapping
39
51
  end
40
52
 
41
53
  end