picky 0.12.1 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/lib/picky/Index_api.rb +10 -3
  2. data/lib/picky/cacher/partial/substring.rb +4 -4
  3. data/lib/picky/calculations/location.rb +39 -0
  4. data/lib/picky/configuration/index.rb +2 -2
  5. data/lib/picky/extensions/array.rb +0 -8
  6. data/lib/picky/extensions/symbol.rb +2 -16
  7. data/lib/picky/generator.rb +0 -2
  8. data/lib/picky/index/bundle.rb +7 -5
  9. data/lib/picky/index/file/basic.rb +2 -6
  10. data/lib/picky/index/files.rb +24 -14
  11. data/lib/picky/indexed/bundle.rb +7 -14
  12. data/lib/picky/indexed/category.rb +2 -1
  13. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  14. data/lib/picky/indexed/wrappers/bundle/location.rb +40 -0
  15. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +25 -0
  16. data/lib/picky/indexers/serial.rb +1 -1
  17. data/lib/picky/indexing/bundle.rb +7 -0
  18. data/lib/picky/indexing/category.rb +22 -1
  19. data/lib/picky/loader.rb +10 -0
  20. data/lib/picky/sources/couch.rb +1 -1
  21. data/lib/picky/sources/csv.rb +6 -4
  22. data/lib/picky/sources/db.rb +1 -1
  23. data/lib/picky/sources/delicious.rb +1 -1
  24. data/lib/picky/sources/wrappers/base.rb +5 -4
  25. data/lib/picky/sources/wrappers/location.rb +16 -22
  26. data/lib/tasks/try.rake +2 -2
  27. data/spec/lib/calculations/location_spec.rb +35 -0
  28. data/spec/lib/extensions/array_spec.rb +0 -10
  29. data/spec/lib/extensions/symbol_spec.rb +1 -69
  30. data/spec/lib/index/files_spec.rb +54 -34
  31. data/spec/lib/indexed/bundle_spec.rb +17 -14
  32. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +37 -0
  33. data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +27 -0
  34. data/spec/lib/indexing/bundle_spec.rb +5 -1
  35. data/spec/lib/sources/couch_spec.rb +1 -1
  36. data/spec/lib/sources/csv_spec.rb +41 -11
  37. data/spec/lib/sources/db_spec.rb +5 -5
  38. data/spec/lib/sources/delicious_spec.rb +6 -6
  39. data/spec/lib/sources/wrappers/base_spec.rb +7 -3
  40. data/spec/lib/sources/wrappers/location_spec.rb +11 -12
  41. metadata +13 -3
@@ -35,13 +35,20 @@ class IndexAPI
35
35
  end
36
36
  alias category define_category
37
37
 
38
+ # TODO Rewrite wrap_exact, wrap_source ?
39
+ #
38
40
  def define_location name, options = {}
39
- grid = options[:grid]
41
+ grid = options[:grid] || raise("Grid size needs to be given to a location")
40
42
  precision = options[:precision]
41
43
 
42
- define_category name, options do |indexing, _|
43
- indexing.source = Sources::Wrappers::Location.new indexing.source, grid: grid, precision: precision
44
+ define_category name, options do |indexing, indexed|
45
+ indexing.source = Sources::Wrappers::Location.new indexing, grid: grid, precision: precision
44
46
  indexing.tokenizer = Tokenizers::Index.new
47
+ # indexing.partial = Partial::None.new
48
+
49
+ exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: grid
50
+ indexed.exact = exact_bundle
51
+ indexed.partial = exact_bundle
45
52
  end
46
53
  end
47
54
  alias location define_location
@@ -83,8 +83,8 @@ module Cacher
83
83
 
84
84
  # Remove duplicate ids.
85
85
  #
86
- # TODO If it is unique for a subtoken, it is
87
- # unique for all derived longer tokens.
86
+ # THINK If it is unique for a subtoken, it is
87
+ # unique for all derived longer tokens.
88
88
  #
89
89
  result.each_value &:uniq!
90
90
 
@@ -99,14 +99,14 @@ module Cacher
99
99
  #
100
100
  # "token" here means just text.
101
101
  #
102
- # TODO Could be improved by appending the aforegoing ids?
102
+ # THINK Could be improved by appending the aforegoing ids?
103
103
  #
104
104
  def generate_for token, index, result
105
105
  @generator.each_subtoken(token) do |subtoken|
106
106
  if result[subtoken]
107
107
  result[subtoken] += index[token] # unique
108
108
  else
109
- result[subtoken] = index[token].dup # TODO Spec this dup
109
+ result[subtoken] = index[token].dup
110
110
  end
111
111
  end
112
112
  end
@@ -0,0 +1,39 @@
1
+ module Calculations
2
+
3
+ # A location calculation recalculates a 1-d location
4
+ # to the Picky internal 1-d "grid".
5
+ #
6
+ # For example, if you have a location x == 12.3456,
7
+ # it will be recalculated into 3, if the minimum is 9
8
+ # and the gridlength is 1.
9
+ #
10
+ class Location
11
+
12
+ attr_reader :minimum
13
+
14
+ def initialize user_grid, precision = nil
15
+ @user_grid = user_grid
16
+ @precision = precision || 1
17
+ @grid = @user_grid / (@precision + 0.5)
18
+ end
19
+
20
+ def minimum= minimum
21
+ minimum -= @user_grid
22
+ @minimum = minimum
23
+ end
24
+
25
+ #
26
+ #
27
+ def add_margin length
28
+ @minimum -= length
29
+ end
30
+
31
+ #
32
+ #
33
+ def recalculate location
34
+ ((location - @minimum) / @grid).floor
35
+ end
36
+
37
+ end
38
+
39
+ end
@@ -10,8 +10,8 @@ module Configuration
10
10
  attr_reader :index, :category
11
11
 
12
12
  def initialize index, category
13
- @index = index
14
- @category = category
13
+ @index = index
14
+ @category = category
15
15
  end
16
16
 
17
17
  def index_name
@@ -28,14 +28,6 @@ class Array
28
28
  result
29
29
  end
30
30
 
31
- # Accesses a random element of this array.
32
- #
33
- # TODO Remove?
34
- #
35
- def random
36
- self[Kernel.rand(self.length)]
37
- end
38
-
39
31
  # Sort the array using distance from levenshtein.
40
32
  #
41
33
  # Will raise if encounters not to_s-able element.
@@ -2,22 +2,8 @@
2
2
  #
3
3
  class Symbol
4
4
 
5
- # :keys.subtokens # => [:keys, :key, :ke, :k]
6
- # :keys.subtokens(2) # => [:keys, :key, :ke]
7
- #
8
- def subtokens from_length = 1
9
- sub = self.id2name
10
-
11
- size = sub.size
12
- from_length = size + from_length if from_length < 0
13
- from_length = size if size < from_length
14
-
15
- result = [self]
16
- size.downto(from_length + 1) { result << sub.chop!.intern }
17
- result
18
- end
19
-
20
- # TODO Duplicate code.
5
+ # :keys.each_subtoken # => yields each of [:keys, :key, :ke, :k]
6
+ # :keys.each_subtoken(2) # => yields each of [:keys, :key, :ke]
21
7
  #
22
8
  def each_subtoken from_length = 1
23
9
  sub = self.id2name
@@ -187,8 +187,6 @@ module Picky
187
187
  exclaim "#{entry} \x1b[31mexists\x1b[m, skipping."
188
188
  end
189
189
 
190
- # TODO Remove?
191
- #
192
190
  def exclaim something
193
191
  puts something
194
192
  end
@@ -22,17 +22,19 @@ module Index
22
22
  class Bundle
23
23
 
24
24
  attr_reader :identifier, :files
25
- attr_accessor :index, :weights, :similarity, :similarity_strategy
25
+ attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
26
26
 
27
- delegate :[], :[]=, :clear, :to => :index
27
+ delegate :clear, :to => :index
28
+ delegate :[], :[]=, :to => :configuration
28
29
 
29
30
  def initialize name, configuration, similarity_strategy
30
31
  @identifier = "#{configuration.identifier} (#{name})"
31
32
  @files = Files.new name, configuration
32
33
 
33
- @index = {}
34
- @weights = {}
35
- @similarity = {}
34
+ @index = {}
35
+ @weights = {}
36
+ @similarity = {}
37
+ @configuration = {} # A hash with config options.
36
38
 
37
39
  @similarity_strategy = similarity_strategy
38
40
  end
@@ -33,15 +33,13 @@ module Index
33
33
  # the directory the index file is in.
34
34
  #
35
35
  def backup
36
- prepare_backup backup_path
36
+ prepare_backup backup_directory
37
37
  FileUtils.cp cache_path, target, verbose: true
38
38
  end
39
39
  # The backup directory of this file.
40
40
  # Equal to the file's dirname plus /backup
41
41
  #
42
- # TODO: Rename to backup_dir.
43
- #
44
- def backup_path
42
+ def backup_directory
45
43
  ::File.join ::File.dirname(cache_path), 'backup'
46
44
  end
47
45
  # Prepares the backup directory for the file.
@@ -58,8 +56,6 @@ module Index
58
56
  end
59
57
  # The backup filename.
60
58
  #
61
- # TODO: Duplicate work done here?
62
- #
63
59
  def backup_file_path_of path
64
60
  dir, name = ::File.split path
65
61
  ::File.join dir, 'backup', name
@@ -3,22 +3,23 @@ module Index
3
3
  class Files
4
4
 
5
5
  attr_reader :bundle_name
6
- attr_reader :prepared, :index, :similarity, :weights
6
+ attr_reader :prepared, :index, :weights, :similarity, :configuration
7
7
 
8
- delegate :index_name, :category_name, :to => :@configuration
8
+ delegate :index_name, :category_name, :to => :@config
9
9
 
10
- def initialize bundle_name, configuration
11
- @bundle_name = bundle_name
12
- @configuration = configuration
10
+ def initialize bundle_name, config
11
+ @bundle_name = bundle_name
12
+ @config = config
13
13
 
14
14
  # Note: We marshal the similarity, as the
15
15
  # Yajl json lib cannot load symbolized
16
16
  # values, just keys.
17
17
  #
18
- @prepared = File::Text.new configuration.prepared_index_path
19
- @index = File::JSON.new configuration.index_path(bundle_name, :index)
20
- @similarity = File::Marshal.new configuration.index_path(bundle_name, :similarity)
21
- @weights = File::JSON.new configuration.index_path(bundle_name, :weights)
18
+ @prepared = File::Text.new config.prepared_index_path
19
+ @index = File::JSON.new config.index_path(bundle_name, :index)
20
+ @weights = File::JSON.new config.index_path(bundle_name, :weights)
21
+ @similarity = File::Marshal.new config.index_path(bundle_name, :similarity)
22
+ @configuration = File::JSON.new config.index_path(bundle_name, :configuration)
22
23
  end
23
24
 
24
25
  # Delegators.
@@ -35,11 +36,14 @@ module Index
35
36
  def dump_index index_hash
36
37
  index.dump index_hash
37
38
  end
39
+ def dump_weights weights_hash
40
+ weights.dump weights_hash
41
+ end
38
42
  def dump_similarity similarity_hash
39
43
  similarity.dump similarity_hash
40
44
  end
41
- def dump_weights weights_hash
42
- weights.dump weights_hash
45
+ def dump_configuration configuration_hash
46
+ configuration.dump configuration_hash
43
47
  end
44
48
 
45
49
  # Loading.
@@ -53,6 +57,9 @@ module Index
53
57
  def load_weights
54
58
  weights.load
55
59
  end
60
+ def load_configuration
61
+ configuration.load
62
+ end
56
63
 
57
64
  # Cache ok?
58
65
  #
@@ -82,16 +89,18 @@ module Index
82
89
  #
83
90
  def backup
84
91
  index.backup
85
- similarity.backup
86
92
  weights.backup
93
+ similarity.backup
94
+ configuration.backup
87
95
  end
88
96
 
89
97
  # Restores the indexes from the "backup" directory.
90
98
  #
91
99
  def restore
92
100
  index.restore
93
- similarity.restore
94
101
  weights.restore
102
+ similarity.restore
103
+ configuration.restore
95
104
  end
96
105
 
97
106
 
@@ -99,8 +108,9 @@ module Index
99
108
  #
100
109
  def delete
101
110
  index.delete
102
- similarity.delete
103
111
  weights.delete
112
+ similarity.delete
113
+ configuration.delete
104
114
  end
105
115
 
106
116
  end
@@ -21,25 +21,13 @@ module Indexed
21
21
  @weights[sym]
22
22
  end
23
23
 
24
- # Load the data from the db.
25
- #
26
- def load_from_index_file
27
- load_from_index_generation_message
28
- clear
29
- retrieve
30
- end
31
- # Notifies the user that the index is being loaded.
32
- #
33
- def load_from_index_generation_message
34
- timed_exclaim "LOAD INDEX #{identifier}."
35
- end
36
-
37
24
  # Loads all indexes.
38
25
  #
39
26
  def load
40
27
  load_index
41
- load_similarity
42
28
  load_weights
29
+ load_similarity
30
+ load_configuration
43
31
  end
44
32
  # Loads the core index.
45
33
  #
@@ -56,6 +44,11 @@ module Indexed
56
44
  def load_similarity
57
45
  self.similarity = files.load_similarity
58
46
  end
47
+ # Loads the configuration.
48
+ #
49
+ def load_configuration
50
+ self.configuration = files.load_configuration
51
+ end
59
52
 
60
53
  end
61
54
  end
@@ -7,7 +7,8 @@ module Indexed
7
7
  #
8
8
  class Category
9
9
 
10
- attr_reader :exact, :partial, :identifier, :name
10
+ attr_accessor :exact, :partial
11
+ attr_reader :identifier, :name
11
12
 
12
13
  #
13
14
  #
@@ -0,0 +1,35 @@
1
+ module Indexed
2
+ module Wrappers
3
+
4
+ module Bundle
5
+
6
+ # A calculation rewrites the symbol into a float.
7
+ #
8
+ # TODO I really need to allow integers as keys. The code below is just not ok.
9
+ #
10
+ class Calculation < Wrapper
11
+
12
+ #
13
+ #
14
+ def recalculate float
15
+ float
16
+ end
17
+
18
+ #
19
+ #
20
+ def ids sym
21
+ @bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
22
+ end
23
+
24
+ #
25
+ #
26
+ def weight sym
27
+ @bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
28
+ end
29
+
30
+ end
31
+
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,40 @@
1
+ module Indexed
2
+ module Wrappers
3
+
4
+ module Bundle
5
+
6
+ # A location calculation recalculates a location to the Picky internal location.
7
+ #
8
+ class Location < Calculation
9
+
10
+ def initialize bundle, options = {}
11
+ super bundle
12
+
13
+ precision = options[:precision] || 1
14
+ user_grid = options[:grid] || raise("Gridsize needs to be given for location #{bundle.identifier}.")
15
+
16
+ @calculation = Calculations::Location.new user_grid, precision
17
+ end
18
+
19
+ #
20
+ #
21
+ def recalculate float
22
+ @calculation.recalculate float
23
+ end
24
+
25
+ #
26
+ #
27
+ def load
28
+ # Load first the bundle, then extract the config.
29
+ #
30
+ bundle.load
31
+ minimum = bundle[:location_minimum] || raise("Configuration :location_minimum for #{bundle.identifier} missing.")
32
+ @calculation.minimum = minimum
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,25 @@
1
+ module Indexed
2
+ module Wrappers
3
+
4
+ # Per Bundle wrappers.
5
+ #
6
+ module Bundle
7
+
8
+ # Base wrapper. Just delegates all methods to the bundle.
9
+ #
10
+ class Wrapper
11
+
12
+ attr_reader :bundle
13
+
14
+ def initialize bundle
15
+ @bundle = bundle
16
+ end
17
+
18
+ delegate :load, :ids, :weight, :to => :@bundle
19
+
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+ end
@@ -57,7 +57,7 @@ module Indexers
57
57
  end
58
58
  end
59
59
  def indexing_message
60
- timed_exclaim "INDEX #{@configuration.identifier}"
60
+ timed_exclaim "INDEX #{@configuration.identifier}" # TODO from ...
61
61
  end
62
62
 
63
63
  end
@@ -120,6 +120,7 @@ module Indexing
120
120
  dump_index
121
121
  dump_similarity
122
122
  dump_weights
123
+ dump_configuration
123
124
  end
124
125
  # Dumps the core index.
125
126
  #
@@ -139,6 +140,12 @@ module Indexing
139
140
  timed_exclaim "DUMP SIMILARITY #{identifier}."
140
141
  files.dump_similarity similarity
141
142
  end
143
+ # Dumps the similarity index.
144
+ #
145
+ def dump_configuration
146
+ timed_exclaim "DUMP CONFIGURATION #{identifier}."
147
+ files.dump_configuration configuration
148
+ end
142
149
 
143
150
  # Alerts the user if an index is missing.
144
151
  #
@@ -4,15 +4,32 @@ module Indexing
4
4
 
5
5
  attr_reader :exact, :partial, :name, :configuration, :indexer
6
6
 
7
+ # Mandatory params:
8
+ # * name: Category name to use as identifier and file names.
9
+ # * index: Index to which this category is attached to.
10
+ # Options:
11
+ # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
12
+ # * similarity: Similarity::None.new (default), Similarity::Phonetic.new(amount_of_similarly_linked_words)
13
+ # * source: Use if the category should use a different source.
14
+ # * from: The source category identifier to take the data from.
15
+ #
16
+ # Advanced Options (TODO):
17
+ #
18
+ # * weights:
19
+ # * tokenizer:
20
+ # * exact_indexing_bundle:
21
+ # * partial_indexing_bundle:
22
+ #
7
23
  def initialize name, index, options = {}
8
24
  @name = name
25
+ @from = options[:from]
9
26
 
10
27
  # Now we have enough info to combine the index and the category.
11
28
  #
12
29
  @configuration = Configuration::Index.new index, self
13
30
 
14
31
  @tokenizer = options[:tokenizer] || Tokenizers::Index.default
15
- @indexer = Indexers::Serial.new configuration, options[:source], @tokenizer #, :as => options[:as] # TODO option as.
32
+ @indexer = Indexers::Serial.new configuration, options[:source], @tokenizer
16
33
 
17
34
  # TODO Push into Bundle.
18
35
  #
@@ -27,6 +44,10 @@ module Indexing
27
44
  delegate :identifier, :prepare_index_directory, :to => :configuration
28
45
  delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
29
46
 
47
+ def from
48
+ @from || name
49
+ end
50
+
30
51
  # TODO Spec.
31
52
  #
32
53
  def backup_caches
data/lib/picky/loader.rb CHANGED
@@ -108,6 +108,10 @@ module Loader
108
108
  #
109
109
  load_relative 'character_substituters/west_european'
110
110
 
111
+ # Calculations.
112
+ #
113
+ load_relative 'calculations/location'
114
+
111
115
  # Signal handling
112
116
  #
113
117
  load_relative 'signals'
@@ -188,6 +192,12 @@ module Loader
188
192
 
189
193
  load_relative 'indexed/wrappers/exact_first'
190
194
 
195
+ # Bundle Wrapper
196
+ #
197
+ load_relative 'indexed/wrappers/bundle/wrapper'
198
+ load_relative 'indexed/wrappers/bundle/calculation'
199
+ load_relative 'indexed/wrappers/bundle/location'
200
+
191
201
  # Tokens.
192
202
  #
193
203
  load_relative 'query/token'
@@ -24,7 +24,7 @@ module Sources
24
24
  # Harvests the data to index.
25
25
  #
26
26
  def harvest type, category
27
- category_name = category.name.to_s
27
+ category_name = category.from.to_s
28
28
  get_data do |doc|
29
29
  yield doc['_id'].to_i, doc[category_name] || next
30
30
  end
@@ -7,12 +7,14 @@ module Sources
7
7
 
8
8
  class CSV < Base
9
9
 
10
- attr_reader :file_name, :category_names
10
+ attr_reader :file_name, :csv_options, :category_names
11
11
 
12
12
  def initialize *category_names, options
13
13
  require 'csv'
14
14
  @category_names = category_names
15
- @file_name = Hash === options && options[:file] || raise_no_file_given(category_names)
15
+
16
+ @csv_options = Hash === options && options || {}
17
+ @file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
16
18
  end
17
19
 
18
20
  #
@@ -24,7 +26,7 @@ module Sources
24
26
  # Harvests the data to index.
25
27
  #
26
28
  def harvest _, category
27
- index = category_names.index category.name
29
+ index = category_names.index category.from
28
30
  get_data do |ary|
29
31
  indexed_id = ary.shift.to_i # TODO is to_i necessary?
30
32
  text = ary[index]
@@ -37,7 +39,7 @@ module Sources
37
39
  #
38
40
  #
39
41
  def get_data &block
40
- ::CSV.foreach file_name, &block
42
+ ::CSV.foreach file_name, csv_options, &block
41
43
  end
42
44
 
43
45
  end
@@ -126,7 +126,7 @@ module Sources
126
126
  # Base harvest statement for dbs.
127
127
  #
128
128
  def harvest_statement type, category
129
- "SELECT indexed_id, #{category.name} FROM #{snapshot_table_name(type)} st"
129
+ "SELECT indexed_id, #{category.from} FROM #{snapshot_table_name(type)} st"
130
130
  end
131
131
 
132
132
  # Override in subclasses.
@@ -19,7 +19,7 @@ module Sources
19
19
  def harvest _, category
20
20
  get_data do |uid, data|
21
21
  indexed_id = uid
22
- text = data[category.name]
22
+ text = data[category.from]
23
23
  next unless text
24
24
  text.force_encoding 'utf-8' # TODO Still needed?
25
25
  yield indexed_id, text
@@ -4,12 +4,13 @@ module Sources
4
4
 
5
5
  class Base
6
6
 
7
- attr_reader :backend
7
+ attr_reader :backend, :category
8
8
 
9
- # Wraps a backend
9
+ # Wraps an indexing category.
10
10
  #
11
- def initialize backend
12
- @backend = backend
11
+ def initialize category
12
+ @category = category
13
+ @backend = category.source
13
14
  end
14
15
 
15
16
  # Default is delegation for all methods