picky 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
  module Cacher
2
2
  module Partial
3
- Default = Subtoken.new :down_to => -3, :starting_at => -1
3
+ Default = Substring.new :from => -3, :to => -1
4
4
  end
5
5
  end
@@ -4,20 +4,20 @@ module Cacher
4
4
 
5
5
  # Generates the right subtokens for use in the subtoken strategy.
6
6
  #
7
- class SubtokenGenerator
7
+ class SubstringGenerator
8
8
 
9
- attr_reader :down_to, :starting_at
9
+ attr_reader :from, :to
10
10
 
11
- def initialize down_to, starting_at
12
- @down_to, @starting_at = down_to, starting_at
11
+ def initialize from, to
12
+ @from, @to = from, to
13
13
 
14
- if @starting_at.zero?
14
+ if @to.zero?
15
15
  def each_subtoken token, &block
16
- token.each_subtoken @down_to, &block
16
+ token.each_subtoken @from, &block
17
17
  end
18
18
  else
19
19
  def each_subtoken token, &block
20
- token[0..@starting_at].intern.each_subtoken @down_to, &block
20
+ token[0..@to].intern.each_subtoken @from, &block
21
21
  end
22
22
  end
23
23
 
@@ -36,31 +36,31 @@ module Cacher
36
36
  # "flo"
37
37
  # "fl"
38
38
  # "f"
39
- # Depending on what the given down_to value is. (Example with down_to == 1)
39
+ # Depending on what the given from value is. (Example with from == 1)
40
40
  #
41
- class Subtoken < Strategy
41
+ class Substring < Strategy
42
42
 
43
43
  # Down to is how far it will go down in generating the subtokens.
44
44
  #
45
45
  # Examples:
46
- # With :hello, and starting_at -1
46
+ # With :hello, and to -1
47
47
  # * down to == 1: [:hello, :hell, :hel, :he, :h]
48
48
  # * down to == 4: [:hello, :hell]
49
49
  #
50
- # With :hello, and starting_at -2
50
+ # With :hello, and to -2
51
51
  # * down to == 1: [:hell, :hel, :he, :h]
52
52
  # * down to == 4: [:hell]
53
53
  #
54
54
  def initialize options = {}
55
- down_to = options[:down_to] || 1
56
- starting_at = options[:starting_at] || -1
57
- @generator = SubtokenGenerator.new down_to, starting_at
55
+ from = options[:from] || 1
56
+ to = options[:to] || -1
57
+ @generator = SubstringGenerator.new from, to
58
58
  end
59
- def down_to
60
- @generator.down_to
59
+ def from
60
+ @generator.from
61
61
  end
62
- def starting_at
63
- @generator.starting_at
62
+ def to
63
+ @generator.to
64
64
  end
65
65
 
66
66
  # Generates a partial index from the given index.
@@ -6,7 +6,7 @@ module Cacher
6
6
 
7
7
  # Generate a partial index based on the given index.
8
8
  #
9
- def generate strategy = Partial::Subtoken.new(:down_to => 1)
9
+ def generate strategy = Partial::Substring.new(:from => 1)
10
10
  strategy.generate_from self.index
11
11
  end
12
12
 
@@ -4,16 +4,16 @@ class Hash
4
4
 
5
5
  # Dumps jsonized self to the path given. Minus extension.
6
6
  #
7
- def dump_to_json path
8
- File.open("#{path}.json", 'w') do |out_file|
7
+ def dump_json path
8
+ File.open(path, 'w') do |out_file|
9
9
  Yajl::Encoder.encode self, out_file
10
10
  end
11
11
  end
12
12
 
13
13
  # Dumps binary self to the path given. Minus extension.
14
14
  #
15
- def dump_to_marshalled path
16
- File.open("#{path}.dump", 'w:binary') do |out_file|
15
+ def dump_marshalled path
16
+ File.open(path, 'w:binary') do |out_file|
17
17
  Marshal.dump self, out_file
18
18
  end
19
19
  end
@@ -5,30 +5,30 @@ class Symbol
5
5
  # :keys.subtokens # => [:keys, :key, :ke, :k]
6
6
  # :keys.subtokens(2) # => [:keys, :key, :ke]
7
7
  #
8
- def subtokens down_to_length = 1
8
+ def subtokens from_length = 1
9
9
  sub = self.id2name
10
10
 
11
11
  size = sub.size
12
- down_to_length = size + down_to_length if down_to_length < 0
13
- down_to_length = size if size < down_to_length
12
+ from_length = size + from_length if from_length < 0
13
+ from_length = size if size < from_length
14
14
 
15
15
  result = [self]
16
- size.downto(down_to_length + 1) { result << sub.chop!.intern }
16
+ size.downto(from_length + 1) { result << sub.chop!.intern }
17
17
  result
18
18
  end
19
19
 
20
20
  # TODO Duplicate code.
21
21
  #
22
- def each_subtoken down_to_length = 1
22
+ def each_subtoken from_length = 1
23
23
  sub = self.id2name
24
24
 
25
25
  size = sub.size
26
- down_to_length = size + down_to_length + 1 if down_to_length < 0
27
- down_to_length = size if size < down_to_length
28
- down_to_length = 1 if down_to_length < 1
26
+ from_length = size + from_length + 1 if from_length < 0
27
+ from_length = size if size < from_length
28
+ from_length = 1 if from_length < 1
29
29
 
30
30
  yield self
31
- size.downto(down_to_length + 1) { yield sub.chop!.intern }
31
+ size.downto(from_length + 1) { yield sub.chop!.intern }
32
32
  end
33
33
 
34
34
  end
@@ -6,12 +6,14 @@ module Index
6
6
  #
7
7
  # Handles exact index, partial index, weights index, and similarity index.
8
8
  #
9
+ # Delegates file handling and checking to a Index::Files object.
10
+ #
9
11
  class Bundle
10
12
 
11
- attr_reader :checker
12
- attr_reader :name, :category, :type
13
+ attr_reader :identifier, :category
13
14
  attr_accessor :index, :weights, :similarity
14
15
  attr_accessor :partial_strategy, :weights_strategy, :similarity_strategy
16
+ attr_reader :files
15
17
 
16
18
  delegate :[], :[]=, :clear, :to => :index
17
19
  delegate :raise_unless_cache_exists, :to => :checker
@@ -19,19 +21,21 @@ module Index
19
21
  # Path is in which directory the cache is located.
20
22
  #
21
23
  def initialize name, category, type, partial_strategy, weights_strategy, similarity_strategy
24
+ @identifier = "#{name}: #{type.name} #{category.name}"
25
+
22
26
  @index = {}
23
27
  @weights = {}
24
28
  @similarity = {}
25
29
 
26
- @name = name
30
+ # TODO Used in weights, try to remove!
31
+ #
27
32
  @category = category
28
- @type = type
29
33
 
30
34
  @partial_strategy = partial_strategy
31
35
  @weights_strategy = weights_strategy
32
36
  @similarity_strategy = similarity_strategy
33
37
 
34
- @checker = BundleChecker.new self
38
+ @files = Files.new name, category.name, type.name
35
39
  end
36
40
 
37
41
  # Get the ids for the text.
@@ -50,109 +54,6 @@ module Index
50
54
  code = similarity_strategy.encoded text
51
55
  code && @similarity[code] || []
52
56
  end
53
-
54
- # Identifier for this bundle.
55
- #
56
- def identifier
57
- "#{name}: #{type.name} #{category.name}"
58
- end
59
-
60
- # Point to category.
61
- #
62
- def search_index_root
63
- File.join PICKY_ROOT, 'index'
64
- # category.search_index_root
65
- end
66
-
67
- # Copies the indexes to the "backup" directory.
68
- #
69
- def backup
70
- target = backup_path
71
- FileUtils.mkdir target unless Dir.exists?(target)
72
- FileUtils.cp index_cache_path, target, :verbose => true
73
- FileUtils.cp similarity_cache_path, target, :verbose => true
74
- FileUtils.cp weights_cache_path, target, :verbose => true
75
- end
76
- def backup_path
77
- File.join File.dirname(index_cache_path), 'backup'
78
- end
79
-
80
- # Restores the indexes from the "backup" directory.
81
- #
82
- def restore
83
- FileUtils.cp backup_file_path_of(index_cache_path), index_cache_path, :verbose => true
84
- FileUtils.cp backup_file_path_of(similarity_cache_path), similarity_cache_path, :verbose => true
85
- FileUtils.cp backup_file_path_of(weights_cache_path), weights_cache_path, :verbose => true
86
- end
87
- def backup_file_path_of path
88
- dir, name = File.split path
89
- File.join dir, 'backup', name
90
- end
91
-
92
- # Delete the file at path.
93
- #
94
- def delete path
95
- `rm -Rf #{path}`
96
- end
97
- # Delete all index files.
98
- #
99
- def delete_all
100
- delete index_cache_path
101
- delete similarity_cache_path
102
- delete weights_cache_path
103
- end
104
-
105
- # Create directory and parent directories.
106
- #
107
- def create_directory
108
- FileUtils.mkdir_p cache_directory
109
- end
110
- # TODO Move to config. Duplicate Code in field.rb.
111
- #
112
- def cache_directory
113
- File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
114
- end
115
-
116
- # Generates a cache path.
117
- #
118
- def cache_path text
119
- File.join cache_directory, "#{name}_#{text}"
120
- end
121
- def index_cache_path
122
- cache_path "#{category.name}_index"
123
- end
124
- def similarity_cache_path
125
- cache_path "#{category.name}_similarity"
126
- end
127
- def weights_cache_path
128
- cache_path "#{category.name}_weights"
129
- end
130
-
131
- # Loads all indexes into this category.
132
- #
133
- def load
134
- load_index
135
- load_similarity
136
- load_weights
137
- end
138
- def load_the_json path
139
- Yajl::Parser.parse File.open("#{path}.json", 'r'), :symbolize_keys => true
140
- end
141
- def load_the_marshalled path
142
- Marshal.load File.open("#{path}.dump", 'r:binary')
143
- end
144
- def load_index
145
- timed_exclaim "Loading the index for #{identifier} from the cache."
146
- self.index = load_the_json index_cache_path
147
- end
148
- def load_similarity
149
- timed_exclaim "Loading the similarity for #{identifier} from the cache."
150
- self.similarity = load_the_marshalled similarity_cache_path
151
- end
152
- def load_weights
153
- timed_exclaim "Loading the weights for #{identifier} from the cache."
154
- self.weights = load_the_json weights_cache_path
155
- end
156
57
 
157
58
  # Generation
158
59
  #
@@ -200,29 +101,17 @@ module Index
200
101
  # TODO Beautify.
201
102
  #
202
103
  def retrieve
203
- # TODO Make r:binary configurable!
204
- #
205
- File.open(search_index_file_name, 'r:binary') do |file|
206
- file.each_line do |line|
207
- indexed_id, token = line.split ?,,2
208
- token.chomp!
209
- token = token.to_sym
210
-
211
- initialize_index_for token
212
- index[token] << indexed_id.to_i
213
- end
104
+ files.retrieve do |indexed_id, token|
105
+ token.chomp!
106
+ token = token.to_sym
107
+
108
+ initialize_index_for token
109
+ index[token] << indexed_id.to_i
214
110
  end
215
111
  end
216
112
  def initialize_index_for token
217
113
  index[token] ||= []
218
114
  end
219
- # TODO Duplicate code!
220
- #
221
- # TODO Use config object?
222
- #
223
- def search_index_file_name
224
- File.join cache_directory, "prepared_#{category.name}_index.txt"
225
- end
226
115
 
227
116
  # Generators.
228
117
  #
@@ -266,20 +155,56 @@ module Index
266
155
  end
267
156
  def dump_index
268
157
  timed_exclaim "DUMP INDEX #{identifier}."
269
- index.dump_to_json index_cache_path
158
+ files.dump_index index
270
159
  end
271
- # Note: We marshal the similarity, as the
272
- # Yajl json lib cannot load symbolized
273
- # values, just keys.
274
- #
275
160
  def dump_similarity
276
161
  timed_exclaim "DUMP SIMILARITY #{identifier}."
277
- similarity.dump_to_marshalled similarity_cache_path
162
+ files.dump_similarity similarity
278
163
  end
279
164
  def dump_weights
280
165
  timed_exclaim "DUMP WEIGHTS #{identifier}."
281
- weights.dump_to_json weights_cache_path
166
+ files.dump_weights weights
167
+ end
168
+
169
+ # Loads all indexes into this category.
170
+ #
171
+ def load
172
+ load_index
173
+ load_similarity
174
+ load_weights
175
+ end
176
+ def load_index
177
+ timed_exclaim "Loading the index for #{identifier} from the cache."
178
+ self.index = files.load_index
179
+ end
180
+ def load_similarity
181
+ timed_exclaim "Loading the similarity for #{identifier} from the cache."
182
+ self.similarity = files.load_similarity
282
183
  end
184
+ def load_weights
185
+ timed_exclaim "Loading the weights for #{identifier} from the cache."
186
+ self.weights = files.load_weights
187
+ end
188
+
189
+ # Alerts the user if an index is missing.
190
+ #
191
+ def raise_unless_cache_exists
192
+ warn_cache_small :index if files.index_cache_small?
193
+ warn_cache_small :similarity if files.similarity_cache_small?
194
+ warn_cache_small :weights if files.weights_cache_small?
283
195
 
196
+ raise_cache_missing :index unless files.index_cache_ok?
197
+ raise_cache_missing :similarity unless files.similarity_cache_ok?
198
+ raise_cache_missing :weights unless files.weights_cache_ok?
199
+ end
200
+ def warn_cache_small what
201
+ puts "#{what} cache for #{identifier} smaller than 16 bytes."
202
+ end
203
+ # Raises an appropriate error message.
204
+ #
205
+ def raise_cache_missing what
206
+ raise "#{what} cache for #{identifier} missing."
207
+ end
208
+
284
209
  end
285
210
  end
@@ -0,0 +1,67 @@
1
+ module Index
2
+
3
+ module File
4
+
5
+ class Basic
6
+
7
+ attr_reader :cache_path
8
+
9
+ def initialize cache_path
10
+ @cache_path = "#{cache_path}.#{extension}"
11
+ end
12
+
13
+ def extension
14
+ :index
15
+ end
16
+
17
+ # Backup.
18
+ #
19
+ def backup
20
+ prepare_backup backup_path
21
+ FileUtils.cp cache_path, target, :verbose => true
22
+ end
23
+ def backup_path
24
+ ::File.join ::File.dirname(cache_path), 'backup'
25
+ end
26
+ def prepare_backup target
27
+ FileUtils.mkdir target unless Dir.exists?(target)
28
+ end
29
+
30
+ # Restore.
31
+ #
32
+ def restore
33
+ FileUtils.cp backup_file_path_of(cache_path), cache_path, :verbose => true
34
+ end
35
+ def backup_file_path_of path
36
+ dir, name = ::File.split path
37
+ ::File.join dir, 'backup', name
38
+ end
39
+
40
+ # Delete.
41
+ #
42
+ def delete
43
+ `rm -Rf #{cache_path}`
44
+ end
45
+
46
+ # Checks.
47
+ #
48
+
49
+ # Is the cache small?
50
+ #
51
+ def cache_small?
52
+ size_of(cache_path) < 16
53
+ end
54
+ # Is the cache ok? I.e. larger than four Bytes in size.
55
+ #
56
+ def cache_ok?
57
+ size_of(cache_path) > 0
58
+ end
59
+ def size_of path
60
+ `ls -l #{path} | awk '{print $5}'`.to_i
61
+ end
62
+
63
+ end
64
+
65
+ end
66
+
67
+ end
@@ -0,0 +1,24 @@
1
+ module Index
2
+
3
+ module File
4
+
5
+ class JSON < Basic
6
+
7
+ def extension
8
+ :json
9
+ end
10
+ def load
11
+ Yajl::Parser.parse ::File.open(cache_path, 'r'), :symbolize_keys => true
12
+ end
13
+ def dump hash
14
+ hash.dump_json cache_path
15
+ end
16
+ def retrieve
17
+ raise "Can't retrieve from marshalled file. Use text file."
18
+ end
19
+
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -0,0 +1,24 @@
1
+ module Index
2
+
3
+ module File
4
+
5
+ class Marshal < Basic
6
+
7
+ def extension
8
+ :dump
9
+ end
10
+ def load
11
+ ::Marshal.load ::File.open(cache_path, 'r:binary')
12
+ end
13
+ def dump hash
14
+ hash.dump_marshalled cache_path
15
+ end
16
+ def retrieve
17
+ raise "Can't retrieve from marshalled file. Use text file."
18
+ end
19
+
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -0,0 +1,28 @@
1
+ module Index
2
+
3
+ module File
4
+
5
+ class Text < Basic
6
+
7
+ def extension
8
+ :txt
9
+ end
10
+ def load
11
+ raise "Can't load from text file. Use JSON or Marshal."
12
+ end
13
+ def dump hash
14
+ raise "Can't dump to text file. Use JSON or Marshal."
15
+ end
16
+ def retrieve
17
+ ::File.open(cache_path, 'r:binary') do |file|
18
+ file.each_line do |line|
19
+ yield line.split ?,, 2
20
+ end
21
+ end
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,135 @@
1
+ module Index
2
+
3
+ # TODO Think about using 3 instances of this in the bundle.
4
+ #
5
+ class Files
6
+
7
+ attr_reader :bundle_name, :category_name, :type_name
8
+ attr_reader :prepared, :index, :similarity, :weights
9
+
10
+ def initialize bundle_name, category_name, type_name
11
+ @bundle_name = bundle_name
12
+ @category_name = category_name
13
+ @type_name = type_name
14
+
15
+ # Note: We marshal the similarity, as the
16
+ # Yajl json lib cannot load symbolized
17
+ # values, just keys.
18
+ #
19
+ @prepared = File::Text.new "#{cache_directory}/prepared_#{category_name}_index"
20
+ @index = File::JSON.new cache_path(:index)
21
+ @similarity = File::Marshal.new cache_path(:similarity)
22
+ @weights = File::JSON.new cache_path(:weights)
23
+ end
24
+
25
+ # Paths.
26
+ #
27
+
28
+ # Cache path, for File-s.
29
+ #
30
+ def cache_path name
31
+ ::File.join cache_directory, "#{bundle_name}_#{category_name}_#{name}"
32
+ end
33
+
34
+ # Point to category.
35
+ #
36
+ def search_index_root
37
+ ::File.join PICKY_ROOT, 'index'
38
+ end
39
+
40
+ # Create directory and parent directories.
41
+ #
42
+ def create_directory
43
+ FileUtils.mkdir_p cache_directory
44
+ end
45
+ # TODO Move to config. Duplicate Code in field.rb.
46
+ #
47
+ def cache_directory
48
+ "#{search_index_root}/#{PICKY_ENVIRONMENT}/#{type_name}"
49
+ end
50
+ def retrieve &block
51
+ prepared.retrieve &block
52
+ end
53
+
54
+ # Single index/similarity/weights files delegation.
55
+ #
56
+
57
+ # Delegators.
58
+ #
59
+
60
+ # Dumping.
61
+ #
62
+ def dump_index index_hash
63
+ index.dump index_hash
64
+ end
65
+ def dump_similarity similarity_hash
66
+ similarity.dump similarity_hash
67
+ end
68
+ def dump_weights weights_hash
69
+ weights.dump weights_hash
70
+ end
71
+
72
+ # Loading.
73
+ #
74
+ def load_index
75
+ index.load
76
+ end
77
+ def load_similarity
78
+ similarity.load
79
+ end
80
+ def load_weights
81
+ weights.load
82
+ end
83
+
84
+ # Cache ok?
85
+ #
86
+ def index_cache_ok?
87
+ index.cache_ok?
88
+ end
89
+ def similarity_cache_ok?
90
+ similarity.cache_ok?
91
+ end
92
+ def weights_cache_ok?
93
+ weights.cache_ok?
94
+ end
95
+
96
+ # Cache small?
97
+ #
98
+ def index_cache_small?
99
+ index.cache_small?
100
+ end
101
+ def similarity_cache_small?
102
+ similarity.cache_small?
103
+ end
104
+ def weights_cache_small?
105
+ weights.cache_small?
106
+ end
107
+
108
+ # Copies the indexes to the "backup" directory.
109
+ #
110
+ def backup
111
+ index.backup
112
+ similarity.backup
113
+ weights.backup
114
+ end
115
+
116
+ # Restores the indexes from the "backup" directory.
117
+ #
118
+ def restore
119
+ index.restore
120
+ similarity.restore
121
+ weights.restore
122
+ end
123
+
124
+
125
+ # Delete all index files.
126
+ #
127
+ def delete
128
+ index.delete
129
+ similarity.delete
130
+ weights.delete
131
+ end
132
+
133
+ end
134
+
135
+ end
@@ -19,6 +19,8 @@ module Indexers
19
19
  end
20
20
  # Convenience methods for user subclasses.
21
21
  #
22
+ # TODO Duplicate code in Index::Files.
23
+ #
22
24
  def search_index_file_name
23
25
  @field.search_index_file_name
24
26
  end
@@ -48,6 +50,8 @@ module Indexers
48
50
 
49
51
  indexing_message
50
52
 
53
+ # TODO Move open to Index::File.
54
+ #
51
55
  File.open(search_index_file_name, 'w:binary') do |file|
52
56
  result = []
53
57
  source.harvest(@type, @field) do |indexed_id, text|