picky 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  module Cacher
2
2
  module Partial
3
- Default = Subtoken.new :down_to => -3, :starting_at => -1
3
+ Default = Substring.new :from => -3, :to => -1
4
4
  end
5
5
  end
@@ -4,20 +4,20 @@ module Cacher
4
4
 
5
5
  # Generates the right subtokens for use in the subtoken strategy.
6
6
  #
7
- class SubtokenGenerator
7
+ class SubstringGenerator
8
8
 
9
- attr_reader :down_to, :starting_at
9
+ attr_reader :from, :to
10
10
 
11
- def initialize down_to, starting_at
12
- @down_to, @starting_at = down_to, starting_at
11
+ def initialize from, to
12
+ @from, @to = from, to
13
13
 
14
- if @starting_at.zero?
14
+ if @to.zero?
15
15
  def each_subtoken token, &block
16
- token.each_subtoken @down_to, &block
16
+ token.each_subtoken @from, &block
17
17
  end
18
18
  else
19
19
  def each_subtoken token, &block
20
- token[0..@starting_at].intern.each_subtoken @down_to, &block
20
+ token[0..@to].intern.each_subtoken @from, &block
21
21
  end
22
22
  end
23
23
 
@@ -36,31 +36,31 @@ module Cacher
36
36
  # "flo"
37
37
  # "fl"
38
38
  # "f"
39
- # Depending on what the given down_to value is. (Example with down_to == 1)
39
+ # Depending on what the given from value is. (Example with from == 1)
40
40
  #
41
- class Subtoken < Strategy
41
+ class Substring < Strategy
42
42
 
43
43
  # Down to is how far it will go down in generating the subtokens.
44
44
  #
45
45
  # Examples:
46
- # With :hello, and starting_at -1
46
+ # With :hello, and to -1
47
47
  # * down to == 1: [:hello, :hell, :hel, :he, :h]
48
48
  # * down to == 4: [:hello, :hell]
49
49
  #
50
- # With :hello, and starting_at -2
50
+ # With :hello, and to -2
51
51
  # * down to == 1: [:hell, :hel, :he, :h]
52
52
  # * down to == 4: [:hell]
53
53
  #
54
54
  def initialize options = {}
55
- down_to = options[:down_to] || 1
56
- starting_at = options[:starting_at] || -1
57
- @generator = SubtokenGenerator.new down_to, starting_at
55
+ from = options[:from] || 1
56
+ to = options[:to] || -1
57
+ @generator = SubstringGenerator.new from, to
58
58
  end
59
- def down_to
60
- @generator.down_to
59
+ def from
60
+ @generator.from
61
61
  end
62
- def starting_at
63
- @generator.starting_at
62
+ def to
63
+ @generator.to
64
64
  end
65
65
 
66
66
  # Generates a partial index from the given index.
@@ -6,7 +6,7 @@ module Cacher
6
6
 
7
7
  # Generate a partial index based on the given index.
8
8
  #
9
- def generate strategy = Partial::Subtoken.new(:down_to => 1)
9
+ def generate strategy = Partial::Substring.new(:from => 1)
10
10
  strategy.generate_from self.index
11
11
  end
12
12
 
@@ -4,16 +4,16 @@ class Hash
4
4
 
5
5
  # Dumps jsonized self to the path given. Minus extension.
6
6
  #
7
- def dump_to_json path
8
- File.open("#{path}.json", 'w') do |out_file|
7
+ def dump_json path
8
+ File.open(path, 'w') do |out_file|
9
9
  Yajl::Encoder.encode self, out_file
10
10
  end
11
11
  end
12
12
 
13
13
  # Dumps binary self to the path given. Minus extension.
14
14
  #
15
- def dump_to_marshalled path
16
- File.open("#{path}.dump", 'w:binary') do |out_file|
15
+ def dump_marshalled path
16
+ File.open(path, 'w:binary') do |out_file|
17
17
  Marshal.dump self, out_file
18
18
  end
19
19
  end
@@ -5,30 +5,30 @@ class Symbol
5
5
  # :keys.subtokens # => [:keys, :key, :ke, :k]
6
6
  # :keys.subtokens(2) # => [:keys, :key, :ke]
7
7
  #
8
- def subtokens down_to_length = 1
8
+ def subtokens from_length = 1
9
9
  sub = self.id2name
10
10
 
11
11
  size = sub.size
12
- down_to_length = size + down_to_length if down_to_length < 0
13
- down_to_length = size if size < down_to_length
12
+ from_length = size + from_length if from_length < 0
13
+ from_length = size if size < from_length
14
14
 
15
15
  result = [self]
16
- size.downto(down_to_length + 1) { result << sub.chop!.intern }
16
+ size.downto(from_length + 1) { result << sub.chop!.intern }
17
17
  result
18
18
  end
19
19
 
20
20
  # TODO Duplicate code.
21
21
  #
22
- def each_subtoken down_to_length = 1
22
+ def each_subtoken from_length = 1
23
23
  sub = self.id2name
24
24
 
25
25
  size = sub.size
26
- down_to_length = size + down_to_length + 1 if down_to_length < 0
27
- down_to_length = size if size < down_to_length
28
- down_to_length = 1 if down_to_length < 1
26
+ from_length = size + from_length + 1 if from_length < 0
27
+ from_length = size if size < from_length
28
+ from_length = 1 if from_length < 1
29
29
 
30
30
  yield self
31
- size.downto(down_to_length + 1) { yield sub.chop!.intern }
31
+ size.downto(from_length + 1) { yield sub.chop!.intern }
32
32
  end
33
33
 
34
34
  end
@@ -6,12 +6,14 @@ module Index
6
6
  #
7
7
  # Handles exact index, partial index, weights index, and similarity index.
8
8
  #
9
+ # Delegates file handling and checking to a Index::Files object.
10
+ #
9
11
  class Bundle
10
12
 
11
- attr_reader :checker
12
- attr_reader :name, :category, :type
13
+ attr_reader :identifier, :category
13
14
  attr_accessor :index, :weights, :similarity
14
15
  attr_accessor :partial_strategy, :weights_strategy, :similarity_strategy
16
+ attr_reader :files
15
17
 
16
18
  delegate :[], :[]=, :clear, :to => :index
17
19
  delegate :raise_unless_cache_exists, :to => :checker
@@ -19,19 +21,21 @@ module Index
19
21
  # Path is in which directory the cache is located.
20
22
  #
21
23
  def initialize name, category, type, partial_strategy, weights_strategy, similarity_strategy
24
+ @identifier = "#{name}: #{type.name} #{category.name}"
25
+
22
26
  @index = {}
23
27
  @weights = {}
24
28
  @similarity = {}
25
29
 
26
- @name = name
30
+ # TODO Used in weights, try to remove!
31
+ #
27
32
  @category = category
28
- @type = type
29
33
 
30
34
  @partial_strategy = partial_strategy
31
35
  @weights_strategy = weights_strategy
32
36
  @similarity_strategy = similarity_strategy
33
37
 
34
- @checker = BundleChecker.new self
38
+ @files = Files.new name, category.name, type.name
35
39
  end
36
40
 
37
41
  # Get the ids for the text.
@@ -50,109 +54,6 @@ module Index
50
54
  code = similarity_strategy.encoded text
51
55
  code && @similarity[code] || []
52
56
  end
53
-
54
- # Identifier for this bundle.
55
- #
56
- def identifier
57
- "#{name}: #{type.name} #{category.name}"
58
- end
59
-
60
- # Point to category.
61
- #
62
- def search_index_root
63
- File.join PICKY_ROOT, 'index'
64
- # category.search_index_root
65
- end
66
-
67
- # Copies the indexes to the "backup" directory.
68
- #
69
- def backup
70
- target = backup_path
71
- FileUtils.mkdir target unless Dir.exists?(target)
72
- FileUtils.cp index_cache_path, target, :verbose => true
73
- FileUtils.cp similarity_cache_path, target, :verbose => true
74
- FileUtils.cp weights_cache_path, target, :verbose => true
75
- end
76
- def backup_path
77
- File.join File.dirname(index_cache_path), 'backup'
78
- end
79
-
80
- # Restores the indexes from the "backup" directory.
81
- #
82
- def restore
83
- FileUtils.cp backup_file_path_of(index_cache_path), index_cache_path, :verbose => true
84
- FileUtils.cp backup_file_path_of(similarity_cache_path), similarity_cache_path, :verbose => true
85
- FileUtils.cp backup_file_path_of(weights_cache_path), weights_cache_path, :verbose => true
86
- end
87
- def backup_file_path_of path
88
- dir, name = File.split path
89
- File.join dir, 'backup', name
90
- end
91
-
92
- # Delete the file at path.
93
- #
94
- def delete path
95
- `rm -Rf #{path}`
96
- end
97
- # Delete all index files.
98
- #
99
- def delete_all
100
- delete index_cache_path
101
- delete similarity_cache_path
102
- delete weights_cache_path
103
- end
104
-
105
- # Create directory and parent directories.
106
- #
107
- def create_directory
108
- FileUtils.mkdir_p cache_directory
109
- end
110
- # TODO Move to config. Duplicate Code in field.rb.
111
- #
112
- def cache_directory
113
- File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
114
- end
115
-
116
- # Generates a cache path.
117
- #
118
- def cache_path text
119
- File.join cache_directory, "#{name}_#{text}"
120
- end
121
- def index_cache_path
122
- cache_path "#{category.name}_index"
123
- end
124
- def similarity_cache_path
125
- cache_path "#{category.name}_similarity"
126
- end
127
- def weights_cache_path
128
- cache_path "#{category.name}_weights"
129
- end
130
-
131
- # Loads all indexes into this category.
132
- #
133
- def load
134
- load_index
135
- load_similarity
136
- load_weights
137
- end
138
- def load_the_json path
139
- Yajl::Parser.parse File.open("#{path}.json", 'r'), :symbolize_keys => true
140
- end
141
- def load_the_marshalled path
142
- Marshal.load File.open("#{path}.dump", 'r:binary')
143
- end
144
- def load_index
145
- timed_exclaim "Loading the index for #{identifier} from the cache."
146
- self.index = load_the_json index_cache_path
147
- end
148
- def load_similarity
149
- timed_exclaim "Loading the similarity for #{identifier} from the cache."
150
- self.similarity = load_the_marshalled similarity_cache_path
151
- end
152
- def load_weights
153
- timed_exclaim "Loading the weights for #{identifier} from the cache."
154
- self.weights = load_the_json weights_cache_path
155
- end
156
57
 
157
58
  # Generation
158
59
  #
@@ -200,29 +101,17 @@ module Index
200
101
  # TODO Beautify.
201
102
  #
202
103
  def retrieve
203
- # TODO Make r:binary configurable!
204
- #
205
- File.open(search_index_file_name, 'r:binary') do |file|
206
- file.each_line do |line|
207
- indexed_id, token = line.split ?,,2
208
- token.chomp!
209
- token = token.to_sym
210
-
211
- initialize_index_for token
212
- index[token] << indexed_id.to_i
213
- end
104
+ files.retrieve do |indexed_id, token|
105
+ token.chomp!
106
+ token = token.to_sym
107
+
108
+ initialize_index_for token
109
+ index[token] << indexed_id.to_i
214
110
  end
215
111
  end
216
112
  def initialize_index_for token
217
113
  index[token] ||= []
218
114
  end
219
- # TODO Duplicate code!
220
- #
221
- # TODO Use config object?
222
- #
223
- def search_index_file_name
224
- File.join cache_directory, "prepared_#{category.name}_index.txt"
225
- end
226
115
 
227
116
  # Generators.
228
117
  #
@@ -266,20 +155,56 @@ module Index
266
155
  end
267
156
  def dump_index
268
157
  timed_exclaim "DUMP INDEX #{identifier}."
269
- index.dump_to_json index_cache_path
158
+ files.dump_index index
270
159
  end
271
- # Note: We marshal the similarity, as the
272
- # Yajl json lib cannot load symbolized
273
- # values, just keys.
274
- #
275
160
  def dump_similarity
276
161
  timed_exclaim "DUMP SIMILARITY #{identifier}."
277
- similarity.dump_to_marshalled similarity_cache_path
162
+ files.dump_similarity similarity
278
163
  end
279
164
  def dump_weights
280
165
  timed_exclaim "DUMP WEIGHTS #{identifier}."
281
- weights.dump_to_json weights_cache_path
166
+ files.dump_weights weights
167
+ end
168
+
169
+ # Loads all indexes into this category.
170
+ #
171
+ def load
172
+ load_index
173
+ load_similarity
174
+ load_weights
175
+ end
176
+ def load_index
177
+ timed_exclaim "Loading the index for #{identifier} from the cache."
178
+ self.index = files.load_index
179
+ end
180
+ def load_similarity
181
+ timed_exclaim "Loading the similarity for #{identifier} from the cache."
182
+ self.similarity = files.load_similarity
282
183
  end
184
+ def load_weights
185
+ timed_exclaim "Loading the weights for #{identifier} from the cache."
186
+ self.weights = files.load_weights
187
+ end
188
+
189
+ # Alerts the user if an index is missing.
190
+ #
191
+ def raise_unless_cache_exists
192
+ warn_cache_small :index if files.index_cache_small?
193
+ warn_cache_small :similarity if files.similarity_cache_small?
194
+ warn_cache_small :weights if files.weights_cache_small?
283
195
 
196
+ raise_cache_missing :index unless files.index_cache_ok?
197
+ raise_cache_missing :similarity unless files.similarity_cache_ok?
198
+ raise_cache_missing :weights unless files.weights_cache_ok?
199
+ end
200
+ def warn_cache_small what
201
+ puts "#{what} cache for #{identifier} smaller than 16 bytes."
202
+ end
203
+ # Raises an appropriate error message.
204
+ #
205
+ def raise_cache_missing what
206
+ raise "#{what} cache for #{identifier} missing."
207
+ end
208
+
284
209
  end
285
210
  end
@@ -0,0 +1,67 @@
1
+ module Index
2
+
3
+ module File
4
+
5
+ class Basic
6
+
7
+ attr_reader :cache_path
8
+
9
+ def initialize cache_path
10
+ @cache_path = "#{cache_path}.#{extension}"
11
+ end
12
+
13
+ def extension
14
+ :index
15
+ end
16
+
17
+ # Backup.
18
+ #
19
+ def backup
20
+ prepare_backup backup_path
21
+ FileUtils.cp cache_path, target, :verbose => true
22
+ end
23
+ def backup_path
24
+ ::File.join ::File.dirname(cache_path), 'backup'
25
+ end
26
+ def prepare_backup target
27
+ FileUtils.mkdir target unless Dir.exists?(target)
28
+ end
29
+
30
+ # Restore.
31
+ #
32
+ def restore
33
+ FileUtils.cp backup_file_path_of(cache_path), cache_path, :verbose => true
34
+ end
35
+ def backup_file_path_of path
36
+ dir, name = ::File.split path
37
+ ::File.join dir, 'backup', name
38
+ end
39
+
40
+ # Delete.
41
+ #
42
+ def delete
43
+ `rm -Rf #{cache_path}`
44
+ end
45
+
46
+ # Checks.
47
+ #
48
+
49
+ # Is the cache small?
50
+ #
51
+ def cache_small?
52
+ size_of(cache_path) < 16
53
+ end
54
+ # Is the cache ok? I.e. larger than four Bytes in size.
55
+ #
56
+ def cache_ok?
57
+ size_of(cache_path) > 0
58
+ end
59
+ def size_of path
60
+ `ls -l #{path} | awk '{print $5}'`.to_i
61
+ end
62
+
63
+ end
64
+
65
+ end
66
+
67
+ end
@@ -0,0 +1,24 @@
1
+ module Index
2
+
3
+ module File
4
+
5
+ class JSON < Basic
6
+
7
+ def extension
8
+ :json
9
+ end
10
+ def load
11
+ Yajl::Parser.parse ::File.open(cache_path, 'r'), :symbolize_keys => true
12
+ end
13
+ def dump hash
14
+ hash.dump_json cache_path
15
+ end
16
+ def retrieve
17
+ raise "Can't retrieve from marshalled file. Use text file."
18
+ end
19
+
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -0,0 +1,24 @@
1
+ module Index
2
+
3
+ module File
4
+
5
+ class Marshal < Basic
6
+
7
+ def extension
8
+ :dump
9
+ end
10
+ def load
11
+ ::Marshal.load ::File.open(cache_path, 'r:binary')
12
+ end
13
+ def dump hash
14
+ hash.dump_marshalled cache_path
15
+ end
16
+ def retrieve
17
+ raise "Can't retrieve from marshalled file. Use text file."
18
+ end
19
+
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -0,0 +1,28 @@
1
+ module Index
2
+
3
+ module File
4
+
5
+ class Text < Basic
6
+
7
+ def extension
8
+ :txt
9
+ end
10
+ def load
11
+ raise "Can't load from text file. Use JSON or Marshal."
12
+ end
13
+ def dump hash
14
+ raise "Can't dump to text file. Use JSON or Marshal."
15
+ end
16
+ def retrieve
17
+ ::File.open(cache_path, 'r:binary') do |file|
18
+ file.each_line do |line|
19
+ yield line.split ?,, 2
20
+ end
21
+ end
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,135 @@
1
+ module Index
2
+
3
+ # TODO Think about using 3 instances of this in the bundle.
4
+ #
5
+ class Files
6
+
7
+ attr_reader :bundle_name, :category_name, :type_name
8
+ attr_reader :prepared, :index, :similarity, :weights
9
+
10
+ def initialize bundle_name, category_name, type_name
11
+ @bundle_name = bundle_name
12
+ @category_name = category_name
13
+ @type_name = type_name
14
+
15
+ # Note: We marshal the similarity, as the
16
+ # Yajl json lib cannot load symbolized
17
+ # values, just keys.
18
+ #
19
+ @prepared = File::Text.new "#{cache_directory}/prepared_#{category_name}_index"
20
+ @index = File::JSON.new cache_path(:index)
21
+ @similarity = File::Marshal.new cache_path(:similarity)
22
+ @weights = File::JSON.new cache_path(:weights)
23
+ end
24
+
25
+ # Paths.
26
+ #
27
+
28
+ # Cache path, for File-s.
29
+ #
30
+ def cache_path name
31
+ ::File.join cache_directory, "#{bundle_name}_#{category_name}_#{name}"
32
+ end
33
+
34
+ # Point to category.
35
+ #
36
+ def search_index_root
37
+ ::File.join PICKY_ROOT, 'index'
38
+ end
39
+
40
+ # Create directory and parent directories.
41
+ #
42
+ def create_directory
43
+ FileUtils.mkdir_p cache_directory
44
+ end
45
+ # TODO Move to config. Duplicate Code in field.rb.
46
+ #
47
+ def cache_directory
48
+ "#{search_index_root}/#{PICKY_ENVIRONMENT}/#{type_name}"
49
+ end
50
+ def retrieve &block
51
+ prepared.retrieve &block
52
+ end
53
+
54
+ # Single index/similarity/weights files delegation.
55
+ #
56
+
57
+ # Delegators.
58
+ #
59
+
60
+ # Dumping.
61
+ #
62
+ def dump_index index_hash
63
+ index.dump index_hash
64
+ end
65
+ def dump_similarity similarity_hash
66
+ similarity.dump similarity_hash
67
+ end
68
+ def dump_weights weights_hash
69
+ weights.dump weights_hash
70
+ end
71
+
72
+ # Loading.
73
+ #
74
+ def load_index
75
+ index.load
76
+ end
77
+ def load_similarity
78
+ similarity.load
79
+ end
80
+ def load_weights
81
+ weights.load
82
+ end
83
+
84
+ # Cache ok?
85
+ #
86
+ def index_cache_ok?
87
+ index.cache_ok?
88
+ end
89
+ def similarity_cache_ok?
90
+ similarity.cache_ok?
91
+ end
92
+ def weights_cache_ok?
93
+ weights.cache_ok?
94
+ end
95
+
96
+ # Cache small?
97
+ #
98
+ def index_cache_small?
99
+ index.cache_small?
100
+ end
101
+ def similarity_cache_small?
102
+ similarity.cache_small?
103
+ end
104
+ def weights_cache_small?
105
+ weights.cache_small?
106
+ end
107
+
108
+ # Copies the indexes to the "backup" directory.
109
+ #
110
+ def backup
111
+ index.backup
112
+ similarity.backup
113
+ weights.backup
114
+ end
115
+
116
+ # Restores the indexes from the "backup" directory.
117
+ #
118
+ def restore
119
+ index.restore
120
+ similarity.restore
121
+ weights.restore
122
+ end
123
+
124
+
125
+ # Delete all index files.
126
+ #
127
+ def delete
128
+ index.delete
129
+ similarity.delete
130
+ weights.delete
131
+ end
132
+
133
+ end
134
+
135
+ end
@@ -19,6 +19,8 @@ module Indexers
19
19
  end
20
20
  # Convenience methods for user subclasses.
21
21
  #
22
+ # TODO Duplicate code in Index::Files.
23
+ #
22
24
  def search_index_file_name
23
25
  @field.search_index_file_name
24
26
  end
@@ -48,6 +50,8 @@ module Indexers
48
50
 
49
51
  indexing_message
50
52
 
53
+ # TODO Move open to Index::File.
54
+ #
51
55
  File.open(search_index_file_name, 'w:binary') do |file|
52
56
  result = []
53
57
  source.harvest(@type, @field) do |indexed_id, text|