picky 0.12.1 → 0.12.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/lib/picky/Index_api.rb +10 -3
  2. data/lib/picky/cacher/partial/substring.rb +4 -4
  3. data/lib/picky/calculations/location.rb +39 -0
  4. data/lib/picky/configuration/index.rb +2 -2
  5. data/lib/picky/extensions/array.rb +0 -8
  6. data/lib/picky/extensions/symbol.rb +2 -16
  7. data/lib/picky/generator.rb +0 -2
  8. data/lib/picky/index/bundle.rb +7 -5
  9. data/lib/picky/index/file/basic.rb +2 -6
  10. data/lib/picky/index/files.rb +24 -14
  11. data/lib/picky/indexed/bundle.rb +7 -14
  12. data/lib/picky/indexed/category.rb +2 -1
  13. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  14. data/lib/picky/indexed/wrappers/bundle/location.rb +40 -0
  15. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +25 -0
  16. data/lib/picky/indexers/serial.rb +1 -1
  17. data/lib/picky/indexing/bundle.rb +7 -0
  18. data/lib/picky/indexing/category.rb +22 -1
  19. data/lib/picky/loader.rb +10 -0
  20. data/lib/picky/sources/couch.rb +1 -1
  21. data/lib/picky/sources/csv.rb +6 -4
  22. data/lib/picky/sources/db.rb +1 -1
  23. data/lib/picky/sources/delicious.rb +1 -1
  24. data/lib/picky/sources/wrappers/base.rb +5 -4
  25. data/lib/picky/sources/wrappers/location.rb +16 -22
  26. data/lib/tasks/try.rake +2 -2
  27. data/spec/lib/calculations/location_spec.rb +35 -0
  28. data/spec/lib/extensions/array_spec.rb +0 -10
  29. data/spec/lib/extensions/symbol_spec.rb +1 -69
  30. data/spec/lib/index/files_spec.rb +54 -34
  31. data/spec/lib/indexed/bundle_spec.rb +17 -14
  32. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +37 -0
  33. data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +27 -0
  34. data/spec/lib/indexing/bundle_spec.rb +5 -1
  35. data/spec/lib/sources/couch_spec.rb +1 -1
  36. data/spec/lib/sources/csv_spec.rb +41 -11
  37. data/spec/lib/sources/db_spec.rb +5 -5
  38. data/spec/lib/sources/delicious_spec.rb +6 -6
  39. data/spec/lib/sources/wrappers/base_spec.rb +7 -3
  40. data/spec/lib/sources/wrappers/location_spec.rb +11 -12
  41. metadata +13 -3
@@ -35,13 +35,20 @@ class IndexAPI
35
35
  end
36
36
  alias category define_category
37
37
 
38
+ # TODO Rewrite wrap_exact, wrap_source ?
39
+ #
38
40
  def define_location name, options = {}
39
- grid = options[:grid]
41
+ grid = options[:grid] || raise("Grid size needs to be given to a location")
40
42
  precision = options[:precision]
41
43
 
42
- define_category name, options do |indexing, _|
43
- indexing.source = Sources::Wrappers::Location.new indexing.source, grid: grid, precision: precision
44
+ define_category name, options do |indexing, indexed|
45
+ indexing.source = Sources::Wrappers::Location.new indexing, grid: grid, precision: precision
44
46
  indexing.tokenizer = Tokenizers::Index.new
47
+ # indexing.partial = Partial::None.new
48
+
49
+ exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: grid
50
+ indexed.exact = exact_bundle
51
+ indexed.partial = exact_bundle
45
52
  end
46
53
  end
47
54
  alias location define_location
@@ -83,8 +83,8 @@ module Cacher
83
83
 
84
84
  # Remove duplicate ids.
85
85
  #
86
- # TODO If it is unique for a subtoken, it is
87
- # unique for all derived longer tokens.
86
+ # THINK If it is unique for a subtoken, it is
87
+ # unique for all derived longer tokens.
88
88
  #
89
89
  result.each_value &:uniq!
90
90
 
@@ -99,14 +99,14 @@ module Cacher
99
99
  #
100
100
  # "token" here means just text.
101
101
  #
102
- # TODO Could be improved by appending the aforegoing ids?
102
+ # THINK Could be improved by appending the aforegoing ids?
103
103
  #
104
104
  def generate_for token, index, result
105
105
  @generator.each_subtoken(token) do |subtoken|
106
106
  if result[subtoken]
107
107
  result[subtoken] += index[token] # unique
108
108
  else
109
- result[subtoken] = index[token].dup # TODO Spec this dup
109
+ result[subtoken] = index[token].dup
110
110
  end
111
111
  end
112
112
  end
@@ -0,0 +1,39 @@
1
+ module Calculations
2
+
3
+ # A location calculation recalculates a 1-d location
4
+ # to the Picky internal 1-d "grid".
5
+ #
6
+ # For example, if you have a location x == 12.3456,
7
+ # it will be recalculated into 3, if the minimum is 9
8
+ # and the gridlength is 1.
9
+ #
10
+ class Location
11
+
12
+ attr_reader :minimum
13
+
14
+ def initialize user_grid, precision = nil
15
+ @user_grid = user_grid
16
+ @precision = precision || 1
17
+ @grid = @user_grid / (@precision + 0.5)
18
+ end
19
+
20
+ def minimum= minimum
21
+ minimum -= @user_grid
22
+ @minimum = minimum
23
+ end
24
+
25
+ #
26
+ #
27
+ def add_margin length
28
+ @minimum -= length
29
+ end
30
+
31
+ #
32
+ #
33
+ def recalculate location
34
+ ((location - @minimum) / @grid).floor
35
+ end
36
+
37
+ end
38
+
39
+ end
@@ -10,8 +10,8 @@ module Configuration
10
10
  attr_reader :index, :category
11
11
 
12
12
  def initialize index, category
13
- @index = index
14
- @category = category
13
+ @index = index
14
+ @category = category
15
15
  end
16
16
 
17
17
  def index_name
@@ -28,14 +28,6 @@ class Array
28
28
  result
29
29
  end
30
30
 
31
- # Accesses a random element of this array.
32
- #
33
- # TODO Remove?
34
- #
35
- def random
36
- self[Kernel.rand(self.length)]
37
- end
38
-
39
31
  # Sort the array using distance from levenshtein.
40
32
  #
41
33
  # Will raise if encounters not to_s-able element.
@@ -2,22 +2,8 @@
2
2
  #
3
3
  class Symbol
4
4
 
5
- # :keys.subtokens # => [:keys, :key, :ke, :k]
6
- # :keys.subtokens(2) # => [:keys, :key, :ke]
7
- #
8
- def subtokens from_length = 1
9
- sub = self.id2name
10
-
11
- size = sub.size
12
- from_length = size + from_length if from_length < 0
13
- from_length = size if size < from_length
14
-
15
- result = [self]
16
- size.downto(from_length + 1) { result << sub.chop!.intern }
17
- result
18
- end
19
-
20
- # TODO Duplicate code.
5
+ # :keys.each_subtoken # => yields each of [:keys, :key, :ke, :k]
6
+ # :keys.each_subtoken(2) # => yields each of [:keys, :key, :ke]
21
7
  #
22
8
  def each_subtoken from_length = 1
23
9
  sub = self.id2name
@@ -187,8 +187,6 @@ module Picky
187
187
  exclaim "#{entry} \x1b[31mexists\x1b[m, skipping."
188
188
  end
189
189
 
190
- # TODO Remove?
191
- #
192
190
  def exclaim something
193
191
  puts something
194
192
  end
@@ -22,17 +22,19 @@ module Index
22
22
  class Bundle
23
23
 
24
24
  attr_reader :identifier, :files
25
- attr_accessor :index, :weights, :similarity, :similarity_strategy
25
+ attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
26
26
 
27
- delegate :[], :[]=, :clear, :to => :index
27
+ delegate :clear, :to => :index
28
+ delegate :[], :[]=, :to => :configuration
28
29
 
29
30
  def initialize name, configuration, similarity_strategy
30
31
  @identifier = "#{configuration.identifier} (#{name})"
31
32
  @files = Files.new name, configuration
32
33
 
33
- @index = {}
34
- @weights = {}
35
- @similarity = {}
34
+ @index = {}
35
+ @weights = {}
36
+ @similarity = {}
37
+ @configuration = {} # A hash with config options.
36
38
 
37
39
  @similarity_strategy = similarity_strategy
38
40
  end
@@ -33,15 +33,13 @@ module Index
33
33
  # the directory the index file is in.
34
34
  #
35
35
  def backup
36
- prepare_backup backup_path
36
+ prepare_backup backup_directory
37
37
  FileUtils.cp cache_path, target, verbose: true
38
38
  end
39
39
  # The backup directory of this file.
40
40
  # Equal to the file's dirname plus /backup
41
41
  #
42
- # TODO: Rename to backup_dir.
43
- #
44
- def backup_path
42
+ def backup_directory
45
43
  ::File.join ::File.dirname(cache_path), 'backup'
46
44
  end
47
45
  # Prepares the backup directory for the file.
@@ -58,8 +56,6 @@ module Index
58
56
  end
59
57
  # The backup filename.
60
58
  #
61
- # TODO: Duplicate work done here?
62
- #
63
59
  def backup_file_path_of path
64
60
  dir, name = ::File.split path
65
61
  ::File.join dir, 'backup', name
@@ -3,22 +3,23 @@ module Index
3
3
  class Files
4
4
 
5
5
  attr_reader :bundle_name
6
- attr_reader :prepared, :index, :similarity, :weights
6
+ attr_reader :prepared, :index, :weights, :similarity, :configuration
7
7
 
8
- delegate :index_name, :category_name, :to => :@configuration
8
+ delegate :index_name, :category_name, :to => :@config
9
9
 
10
- def initialize bundle_name, configuration
11
- @bundle_name = bundle_name
12
- @configuration = configuration
10
+ def initialize bundle_name, config
11
+ @bundle_name = bundle_name
12
+ @config = config
13
13
 
14
14
  # Note: We marshal the similarity, as the
15
15
  # Yajl json lib cannot load symbolized
16
16
  # values, just keys.
17
17
  #
18
- @prepared = File::Text.new configuration.prepared_index_path
19
- @index = File::JSON.new configuration.index_path(bundle_name, :index)
20
- @similarity = File::Marshal.new configuration.index_path(bundle_name, :similarity)
21
- @weights = File::JSON.new configuration.index_path(bundle_name, :weights)
18
+ @prepared = File::Text.new config.prepared_index_path
19
+ @index = File::JSON.new config.index_path(bundle_name, :index)
20
+ @weights = File::JSON.new config.index_path(bundle_name, :weights)
21
+ @similarity = File::Marshal.new config.index_path(bundle_name, :similarity)
22
+ @configuration = File::JSON.new config.index_path(bundle_name, :configuration)
22
23
  end
23
24
 
24
25
  # Delegators.
@@ -35,11 +36,14 @@ module Index
35
36
  def dump_index index_hash
36
37
  index.dump index_hash
37
38
  end
39
+ def dump_weights weights_hash
40
+ weights.dump weights_hash
41
+ end
38
42
  def dump_similarity similarity_hash
39
43
  similarity.dump similarity_hash
40
44
  end
41
- def dump_weights weights_hash
42
- weights.dump weights_hash
45
+ def dump_configuration configuration_hash
46
+ configuration.dump configuration_hash
43
47
  end
44
48
 
45
49
  # Loading.
@@ -53,6 +57,9 @@ module Index
53
57
  def load_weights
54
58
  weights.load
55
59
  end
60
+ def load_configuration
61
+ configuration.load
62
+ end
56
63
 
57
64
  # Cache ok?
58
65
  #
@@ -82,16 +89,18 @@ module Index
82
89
  #
83
90
  def backup
84
91
  index.backup
85
- similarity.backup
86
92
  weights.backup
93
+ similarity.backup
94
+ configuration.backup
87
95
  end
88
96
 
89
97
  # Restores the indexes from the "backup" directory.
90
98
  #
91
99
  def restore
92
100
  index.restore
93
- similarity.restore
94
101
  weights.restore
102
+ similarity.restore
103
+ configuration.restore
95
104
  end
96
105
 
97
106
 
@@ -99,8 +108,9 @@ module Index
99
108
  #
100
109
  def delete
101
110
  index.delete
102
- similarity.delete
103
111
  weights.delete
112
+ similarity.delete
113
+ configuration.delete
104
114
  end
105
115
 
106
116
  end
@@ -21,25 +21,13 @@ module Indexed
21
21
  @weights[sym]
22
22
  end
23
23
 
24
- # Load the data from the db.
25
- #
26
- def load_from_index_file
27
- load_from_index_generation_message
28
- clear
29
- retrieve
30
- end
31
- # Notifies the user that the index is being loaded.
32
- #
33
- def load_from_index_generation_message
34
- timed_exclaim "LOAD INDEX #{identifier}."
35
- end
36
-
37
24
  # Loads all indexes.
38
25
  #
39
26
  def load
40
27
  load_index
41
- load_similarity
42
28
  load_weights
29
+ load_similarity
30
+ load_configuration
43
31
  end
44
32
  # Loads the core index.
45
33
  #
@@ -56,6 +44,11 @@ module Indexed
56
44
  def load_similarity
57
45
  self.similarity = files.load_similarity
58
46
  end
47
+ # Loads the configuration.
48
+ #
49
+ def load_configuration
50
+ self.configuration = files.load_configuration
51
+ end
59
52
 
60
53
  end
61
54
  end
@@ -7,7 +7,8 @@ module Indexed
7
7
  #
8
8
  class Category
9
9
 
10
- attr_reader :exact, :partial, :identifier, :name
10
+ attr_accessor :exact, :partial
11
+ attr_reader :identifier, :name
11
12
 
12
13
  #
13
14
  #
@@ -0,0 +1,35 @@
1
+ module Indexed
2
+ module Wrappers
3
+
4
+ module Bundle
5
+
6
+ # A calculation rewrites the symbol into a float.
7
+ #
8
+ # TODO I really need to allow integers as keys. The code below is just not ok.
9
+ #
10
+ class Calculation < Wrapper
11
+
12
+ #
13
+ #
14
+ def recalculate float
15
+ float
16
+ end
17
+
18
+ #
19
+ #
20
+ def ids sym
21
+ @bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
22
+ end
23
+
24
+ #
25
+ #
26
+ def weight sym
27
+ @bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
28
+ end
29
+
30
+ end
31
+
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,40 @@
1
+ module Indexed
2
+ module Wrappers
3
+
4
+ module Bundle
5
+
6
+ # A location calculation recalculates a location to the Picky internal location.
7
+ #
8
+ class Location < Calculation
9
+
10
+ def initialize bundle, options = {}
11
+ super bundle
12
+
13
+ precision = options[:precision] || 1
14
+ user_grid = options[:grid] || raise("Gridsize needs to be given for location #{bundle.identifier}.")
15
+
16
+ @calculation = Calculations::Location.new user_grid, precision
17
+ end
18
+
19
+ #
20
+ #
21
+ def recalculate float
22
+ @calculation.recalculate float
23
+ end
24
+
25
+ #
26
+ #
27
+ def load
28
+ # Load first the bundle, then extract the config.
29
+ #
30
+ bundle.load
31
+ minimum = bundle[:location_minimum] || raise("Configuration :location_minimum for #{bundle.identifier} missing.")
32
+ @calculation.minimum = minimum
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,25 @@
1
+ module Indexed
2
+ module Wrappers
3
+
4
+ # Per Bundle wrappers.
5
+ #
6
+ module Bundle
7
+
8
+ # Base wrapper. Just delegates all methods to the bundle.
9
+ #
10
+ class Wrapper
11
+
12
+ attr_reader :bundle
13
+
14
+ def initialize bundle
15
+ @bundle = bundle
16
+ end
17
+
18
+ delegate :load, :ids, :weight, :to => :@bundle
19
+
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+ end
@@ -57,7 +57,7 @@ module Indexers
57
57
  end
58
58
  end
59
59
  def indexing_message
60
- timed_exclaim "INDEX #{@configuration.identifier}"
60
+ timed_exclaim "INDEX #{@configuration.identifier}" # TODO from ...
61
61
  end
62
62
 
63
63
  end
@@ -120,6 +120,7 @@ module Indexing
120
120
  dump_index
121
121
  dump_similarity
122
122
  dump_weights
123
+ dump_configuration
123
124
  end
124
125
  # Dumps the core index.
125
126
  #
@@ -139,6 +140,12 @@ module Indexing
139
140
  timed_exclaim "DUMP SIMILARITY #{identifier}."
140
141
  files.dump_similarity similarity
141
142
  end
143
+ # Dumps the similarity index.
144
+ #
145
+ def dump_configuration
146
+ timed_exclaim "DUMP CONFIGURATION #{identifier}."
147
+ files.dump_configuration configuration
148
+ end
142
149
 
143
150
  # Alerts the user if an index is missing.
144
151
  #
@@ -4,15 +4,32 @@ module Indexing
4
4
 
5
5
  attr_reader :exact, :partial, :name, :configuration, :indexer
6
6
 
7
+ # Mandatory params:
8
+ # * name: Category name to use as identifier and file names.
9
+ # * index: Index to which this category is attached to.
10
+ # Options:
11
+ # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
12
+ # * similarity: Similarity::None.new (default), Similarity::Phonetic.new(amount_of_similarly_linked_words)
13
+ # * source: Use if the category should use a different source.
14
+ # * from: The source category identifier to take the data from.
15
+ #
16
+ # Advanced Options (TODO):
17
+ #
18
+ # * weights:
19
+ # * tokenizer:
20
+ # * exact_indexing_bundle:
21
+ # * partial_indexing_bundle:
22
+ #
7
23
  def initialize name, index, options = {}
8
24
  @name = name
25
+ @from = options[:from]
9
26
 
10
27
  # Now we have enough info to combine the index and the category.
11
28
  #
12
29
  @configuration = Configuration::Index.new index, self
13
30
 
14
31
  @tokenizer = options[:tokenizer] || Tokenizers::Index.default
15
- @indexer = Indexers::Serial.new configuration, options[:source], @tokenizer #, :as => options[:as] # TODO option as.
32
+ @indexer = Indexers::Serial.new configuration, options[:source], @tokenizer
16
33
 
17
34
  # TODO Push into Bundle.
18
35
  #
@@ -27,6 +44,10 @@ module Indexing
27
44
  delegate :identifier, :prepare_index_directory, :to => :configuration
28
45
  delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
29
46
 
47
+ def from
48
+ @from || name
49
+ end
50
+
30
51
  # TODO Spec.
31
52
  #
32
53
  def backup_caches
data/lib/picky/loader.rb CHANGED
@@ -108,6 +108,10 @@ module Loader
108
108
  #
109
109
  load_relative 'character_substituters/west_european'
110
110
 
111
+ # Calculations.
112
+ #
113
+ load_relative 'calculations/location'
114
+
111
115
  # Signal handling
112
116
  #
113
117
  load_relative 'signals'
@@ -188,6 +192,12 @@ module Loader
188
192
 
189
193
  load_relative 'indexed/wrappers/exact_first'
190
194
 
195
+ # Bundle Wrapper
196
+ #
197
+ load_relative 'indexed/wrappers/bundle/wrapper'
198
+ load_relative 'indexed/wrappers/bundle/calculation'
199
+ load_relative 'indexed/wrappers/bundle/location'
200
+
191
201
  # Tokens.
192
202
  #
193
203
  load_relative 'query/token'
@@ -24,7 +24,7 @@ module Sources
24
24
  # Harvests the data to index.
25
25
  #
26
26
  def harvest type, category
27
- category_name = category.name.to_s
27
+ category_name = category.from.to_s
28
28
  get_data do |doc|
29
29
  yield doc['_id'].to_i, doc[category_name] || next
30
30
  end
@@ -7,12 +7,14 @@ module Sources
7
7
 
8
8
  class CSV < Base
9
9
 
10
- attr_reader :file_name, :category_names
10
+ attr_reader :file_name, :csv_options, :category_names
11
11
 
12
12
  def initialize *category_names, options
13
13
  require 'csv'
14
14
  @category_names = category_names
15
- @file_name = Hash === options && options[:file] || raise_no_file_given(category_names)
15
+
16
+ @csv_options = Hash === options && options || {}
17
+ @file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
16
18
  end
17
19
 
18
20
  #
@@ -24,7 +26,7 @@ module Sources
24
26
  # Harvests the data to index.
25
27
  #
26
28
  def harvest _, category
27
- index = category_names.index category.name
29
+ index = category_names.index category.from
28
30
  get_data do |ary|
29
31
  indexed_id = ary.shift.to_i # TODO is to_i necessary?
30
32
  text = ary[index]
@@ -37,7 +39,7 @@ module Sources
37
39
  #
38
40
  #
39
41
  def get_data &block
40
- ::CSV.foreach file_name, &block
42
+ ::CSV.foreach file_name, csv_options, &block
41
43
  end
42
44
 
43
45
  end
@@ -126,7 +126,7 @@ module Sources
126
126
  # Base harvest statement for dbs.
127
127
  #
128
128
  def harvest_statement type, category
129
- "SELECT indexed_id, #{category.name} FROM #{snapshot_table_name(type)} st"
129
+ "SELECT indexed_id, #{category.from} FROM #{snapshot_table_name(type)} st"
130
130
  end
131
131
 
132
132
  # Override in subclasses.
@@ -19,7 +19,7 @@ module Sources
19
19
  def harvest _, category
20
20
  get_data do |uid, data|
21
21
  indexed_id = uid
22
- text = data[category.name]
22
+ text = data[category.from]
23
23
  next unless text
24
24
  text.force_encoding 'utf-8' # TODO Still needed?
25
25
  yield indexed_id, text
@@ -4,12 +4,13 @@ module Sources
4
4
 
5
5
  class Base
6
6
 
7
- attr_reader :backend
7
+ attr_reader :backend, :category
8
8
 
9
- # Wraps a backend
9
+ # Wraps an indexing category.
10
10
  #
11
- def initialize backend
12
- @backend = backend
11
+ def initialize category
12
+ @category = category
13
+ @backend = category.source
13
14
  end
14
15
 
15
16
  # Default is delegation for all methods