content_data 0.0.9 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,397 +2,722 @@ require 'log'
2
2
  require 'params'
3
3
  require 'time'
4
4
 
5
- module BBFS
6
- module ContentData
7
-
8
- class Content
9
- attr_reader :checksum, :size, :first_appearance_time
10
-
11
- def initialize(checksum, size, first_appearance_time, content_serializer = nil)
12
- if content_serializer != nil
13
- if (content_serializer.checksum == nil)
14
- raise ArgumentError.new("checksum have to be defined")
15
- else
16
- @checksum = content_serializer.checksum
17
- end
18
- if (content_serializer.size == nil)
19
- raise ArgumentError.new("size have to be defined")
20
- else
21
- @size = content_serializer.size
22
- end
23
- if (content_serializer.first_appearance_time == nil)
24
- raise ArgumentError.new("first_appearance_time have to be defined")
25
- else
26
- @first_appearance_time = ContentData.parse_time(content_serializer.first_appearance_time)
27
- end
28
-
5
+ module ContentData
6
+ Params.string('instance_check_level', 'shallow', 'Defines check level. Supported levels are: ' \
7
+ 'shallow - quick, tests instance for file existence and attributes. ' \
8
+ 'deep - can take more time, in addition to shallow recalculates hash sum.')
9
+
10
+ class Content
11
+ attr_reader :checksum, :size, :first_appearance_time
12
+
13
+ def initialize(checksum, size, first_appearance_time, content_serializer = nil)
14
+ if content_serializer != nil
15
+ if (content_serializer.checksum == nil)
16
+ raise ArgumentError.new("checksum have to be defined")
29
17
  else
30
- @checksum = checksum
31
- @size = size
32
- @first_appearance_time = first_appearance_time
18
+ @checksum = content_serializer.checksum
19
+ end
20
+ if (content_serializer.size == nil)
21
+ raise ArgumentError.new("size have to be defined")
22
+ else
23
+ @size = content_serializer.size
24
+ end
25
+ if (content_serializer.first_appearance_time == nil)
26
+ raise ArgumentError.new("first_appearance_time have to be defined")
27
+ else
28
+ @first_appearance_time = ContentData.parse_time(content_serializer.first_appearance_time)
33
29
  end
34
- end
35
30
 
36
- def to_s
37
- "%s,%d,%s" % [@checksum, @size, ContentData.format_time(@first_appearance_time)]
31
+ else
32
+ @checksum = checksum
33
+ @size = size
34
+ @first_appearance_time = first_appearance_time
38
35
  end
36
+ end
39
37
 
40
- def ==(other)
41
- return (self.checksum.eql? other.checksum and
42
- self.size.eql? other.size and
43
- self.first_appearance_time.to_i.eql? other.first_appearance_time.to_i)
44
- end
38
+ def to_s
39
+ "%s,%d,%s" % [@checksum, @size, ContentData.format_time(@first_appearance_time)]
45
40
  end
46
41
 
47
- class ContentInstance
48
- attr_reader :checksum, :size, :server_name, :device, :full_path, :modification_time
42
+ def ==(other)
43
+ return (self.checksum.eql? other.checksum and
44
+ self.size.eql? other.size and
45
+ self.first_appearance_time.to_i.eql? other.first_appearance_time.to_i)
46
+ end
47
+ end
49
48
 
50
- def initialize(checksum, size, server_name, device, full_path, modification_time, content_instance_serializer = nil)
51
- if content_instance_serializer != nil
52
- if (content_instance_serializer.checksum == nil)
53
- raise ArgumentError.new("checksum have to be defined")
54
- else
55
- @checksum = content_instance_serializer.checksum
56
- end
57
- if (content_instance_serializer.size == nil)
58
- raise ArgumentError.new("size have to be defined")
59
- else
60
- @size = content_instance_serializer.size
61
- end
62
- if (content_instance_serializer.modification_time == nil)
63
- raise ArgumentError.new("modification_time have to be defined")
64
- else
65
- @modification_time = ContentData.parse_time(content_instance_serializer.modification_time)
66
- end
67
- if (content_instance_serializer.server_name == nil)
68
- raise ArgumentError.new("server_name have to be defined")
69
- else
70
- @server_name = content_instance_serializer.server_name
71
- end
72
- if (content_instance_serializer.device == nil)
73
- raise ArgumentError.new("device have to be defined")
74
- else
75
- @device = content_instance_serializer.device
76
- end
77
- if (content_instance_serializer.full_path == nil)
78
- raise ArgumentError.new("full_path have to be defined")
79
- else
80
- @full_path = content_instance_serializer.full_path
81
- end
49
+ class ContentInstance
50
+ attr_reader :checksum, :size, :server_name, :device, :full_path, :modification_time
51
+
52
+ def initialize(checksum, size, server_name, device, full_path, modification_time, content_instance_serializer = nil)
53
+ if content_instance_serializer != nil
54
+ if (content_instance_serializer.checksum == nil)
55
+ raise ArgumentError.new("checksum have to be defined")
56
+ else
57
+ @checksum = content_instance_serializer.checksum
58
+ end
59
+ if (content_instance_serializer.size == nil)
60
+ raise ArgumentError.new("size have to be defined")
82
61
  else
83
- @checksum = checksum
84
- @size = size
85
- @server_name = server_name
86
- @device = device
87
- @full_path = full_path
88
- @modification_time = modification_time
62
+ @size = content_instance_serializer.size
89
63
  end
64
+ if (content_instance_serializer.modification_time == nil)
65
+ raise ArgumentError.new("modification_time have to be defined")
66
+ else
67
+ @modification_time = ContentData.parse_time(content_instance_serializer.modification_time)
68
+ end
69
+ if (content_instance_serializer.server_name == nil)
70
+ raise ArgumentError.new("server_name have to be defined")
71
+ else
72
+ @server_name = content_instance_serializer.server_name
73
+ end
74
+ if (content_instance_serializer.device == nil)
75
+ raise ArgumentError.new("device have to be defined")
76
+ else
77
+ @device = content_instance_serializer.device
78
+ end
79
+ if (content_instance_serializer.full_path == nil)
80
+ raise ArgumentError.new("full_path have to be defined")
81
+ else
82
+ @full_path = content_instance_serializer.full_path
83
+ end
84
+ else
85
+ @checksum = checksum
86
+ @size = size
87
+ @server_name = server_name
88
+ @device = device
89
+ @full_path = full_path
90
+ @modification_time = modification_time
90
91
  end
92
+ end
91
93
 
92
- def global_path
93
- ContentInstance.instance_global_path(@server_name, @full_path)
94
- end
94
+ def global_path
95
+ ContentInstance.instance_global_path(@server_name, @full_path)
96
+ end
95
97
 
96
- def ContentInstance.instance_global_path(server_name, full_path)
97
- "%s:%s" % [server_name, full_path]
98
- end
98
+ def ContentInstance.instance_global_path(server_name, full_path)
99
+ "%s:%s" % [server_name, full_path]
100
+ end
99
101
 
100
- def to_s
101
- "%s,%d,%s,%s,%s,%s" % [@checksum, @size, @server_name,
102
- @device, @full_path, ContentData.format_time(@modification_time)]
103
- end
102
+ def to_s
103
+ "%s,%d,%s,%s,%s,%s" % [@checksum, @size, @server_name,
104
+ @device, @full_path, ContentData.format_time(@modification_time)]
105
+ end
104
106
 
105
- def ==(other)
106
- return (self.checksum.eql? other.checksum and
107
- self.size.eql? other.size and
108
- self.server_name.eql? other.server_name and
109
- self.device.eql? other.device and
110
- self.full_path.eql? other.full_path and
111
- self.modification_time.to_i.eql? other.modification_time.to_i)
107
+ def ==(other)
108
+ return (self.checksum.eql? other.checksum and
109
+ self.size.eql? other.size and
110
+ self.server_name.eql? other.server_name and
111
+ self.device.eql? other.device and
112
+ self.full_path.eql? other.full_path and
113
+ self.modification_time.to_i.eql? other.modification_time.to_i)
114
+ end
115
+ end
116
+
117
+ # Unfortunately this class is used as mutable for now. So need to be carefull.
118
+ # TODO(kolman): Make this class imutable, but add indexing structure to it.
119
+ # TODO(kolman): Add wrapper to the class to enable dynamic content data
120
+ # (with easy access indexes)
121
+ class ContentData
122
+ attr_reader :contents, :instances
123
+
124
+ # @param content_data_serializer_str [String]
125
+ def initialize(copy = nil)
126
+ if copy.nil?
127
+ @contents = Hash.new # key is a checksum , value is a refernce to the Content object
128
+ @instances = Hash.new # key is an instance global path , value is a reference to the ContentInstance object
129
+ else
130
+ # Regenerate only the hashes, the values are immutable.
131
+ @contents = copy.contents.clone
132
+ @instances = copy.instances.clone
112
133
  end
113
134
  end
114
135
 
115
- # Unfortunately this class is used as mutable for now. So need to be carefull.
116
- # TODO(kolman): Make this class imutable, but add indexing structure to it.
117
- # TODO(kolman): Add wrapper to the class to enable dynamic content data
118
- # (with easy access indexes)
119
- class ContentData
120
- attr_reader :contents, :instances
136
+ def add_content(content)
137
+ @contents[content.checksum] = content
138
+ end
121
139
 
122
- # @param content_data_serializer_str [String]
123
- def initialize(copy = nil)
124
- if copy.nil?
125
- @contents = Hash.new # key is a checksum , value is a refernce to the Content object
126
- @instances = Hash.new # key is an instance global path , value is a reference to the ContentInstance object
127
- else
128
- # Regenerate only the hashes, the values are immutable.
129
- @contents = copy.contents.clone
130
- @instances = copy.instances.clone
131
- end
140
+ def add_instance(instance)
141
+ if (not @contents.key?(instance.checksum))
142
+ Log.warning sprintf("Adding instance while it's" +
143
+ " checksum %s does not exists.\n", instance.checksum)
144
+ Log.warning sprintf("%s\n", instance.to_s)
145
+ return false
146
+ elsif (@contents[instance.checksum].size != instance.size)
147
+ Log.warning 'File size different from content size while same checksum'
148
+ Log.warning instance.to_s
149
+ return false
132
150
  end
133
151
 
134
- def add_content(content)
135
- @contents[content.checksum] = content
136
- end
152
+ key = instance.global_path
137
153
 
138
- def add_instance(instance)
139
- if (not @contents.key?(instance.checksum))
140
- Log.warning sprintf("Adding instance while it's" +
141
- " checksum %s does not exists.\n", instance.checksum)
142
- Log.warning sprintf("%s\n", instance.to_s)
154
+ #override file if needed
155
+ @instances[key] = instance
156
+ end
157
+
158
+ def empty?
159
+ @contents.empty?
160
+ end
161
+
162
+ # TODO rename method with finishing '?', cause it returns a boolean
163
+ def content_exists(checksum)
164
+ @contents.key? checksum
165
+ end
166
+
167
+ # TODO(kolman): The semantics of thir merge is merge! change in all file.
168
+ def merge(content_data)
169
+ content_data.contents.values.each { |content|
170
+ add_content(content)
171
+ }
172
+ content_data.instances.values.each { |instance|
173
+ add_instance(instance)
174
+ }
175
+ end
176
+
177
+ def ==(other)
178
+ return false if other == nil
179
+ return false unless @contents.size == other.contents.size
180
+ return false unless @instances.size == other.instances.size
181
+
182
+ @contents.keys.each { |key|
183
+ if (@contents[key] != other.contents[key])
184
+ Log.info @contents[key].first_appearance_time.to_i
185
+ Log.info other.contents[key].first_appearance_time.to_i
143
186
  return false
144
- elsif (@contents[instance.checksum].size != instance.size)
145
- Log.warning 'File size different from content size while same checksum'
146
- Log.warning instance.to_s
187
+ end
188
+ }
189
+ @instances.keys.each { |key|
190
+ if (@instances[key] != other.instances[key])
147
191
  return false
148
192
  end
193
+ }
194
+ return true
195
+ end
149
196
 
150
- key = instance.global_path
197
+ def to_s
198
+ ret = ""
199
+ ret << @contents.length.to_s << "\n"
200
+ @contents.each_value { |content|
201
+ ret << content.to_s << "\n"
202
+ }
203
+ ret << @instances.length.to_s << "\n"
204
+ @instances.each_value { |instance|
205
+ ret << instance.to_s << "\n"
206
+ }
207
+ return ret
208
+ end
209
+
210
+ def to_file(filename)
211
+ content_data_dir = File.dirname(filename)
212
+ FileUtils.makedirs(content_data_dir) unless File.directory?(content_data_dir)
213
+ File.open(filename, 'w') {|f| f.write(to_s) }
214
+ end
151
215
 
152
- #override file if needed
153
- @instances[key] = instance
216
+ # TODO validation that file indeed contains ContentData missing
217
+ def from_file(filename)
218
+ lines = IO.readlines(filename)
219
+ i = 0
220
+ number_of_contents = lines[i].to_i
221
+ i += 1
222
+ number_of_contents.times {
223
+ parameters = lines[i].split(",")
224
+ add_content(Content.new(parameters[0],
225
+ parameters[1].to_i,
226
+ ContentData.parse_time(parameters[2])))
227
+ i += 1
228
+ }
229
+
230
+ number_of_instances = lines[i].to_i
231
+ i += 1
232
+ number_of_instances.times {
233
+ if lines[i].nil?
234
+ Log.info "lines[i] if nil !!!, Backing filename: #{filename} to #{filename}.bad"
235
+ FileUtils.cp(filename, "#{filename}.bad")
236
+ Log.info lines[i].join("\n")
237
+ end
238
+ parameters = lines[i].split(',')
239
+ # bugfix: if file name consist a comma then parsing based on comma separating fails
240
+ if (parameters.size > 6)
241
+ (5..parameters.size-2).each do |i|
242
+ parameters[4] = [parameters[4], parameters[i]].join(",")
243
+ end
244
+ (5..parameters.size-2).each do |i|
245
+ parameters.delete_at(5)
246
+ end
247
+ end
248
+
249
+ add_instance(ContentInstance.new(parameters[0],
250
+ parameters[1].to_i,
251
+ parameters[2],
252
+ parameters[3],
253
+ parameters[4],
254
+ ContentData.parse_time(parameters[5])))
255
+ i += 1
256
+ }
257
+ end
258
+
259
+ def self.parse_time time_str
260
+ return nil unless time_str.instance_of? String
261
+ seconds_from_epoch = Integer time_str # Not using to_i here because it does not check string is integer.
262
+ time = Time.at seconds_from_epoch
263
+ end
264
+
265
+ def self.format_time(time)
266
+ return nil unless time.instance_of?Time
267
+ str = time.to_i.to_s
268
+ return str
269
+ end
270
+
271
+ # merges content data a and content data b to a new content data and returns it.
272
+ def self.merge(a, b)
273
+ return b unless not a.nil?
274
+ return a unless not b.nil?
275
+
276
+ return nil unless a.instance_of?ContentData
277
+ return nil unless b.instance_of?ContentData
278
+
279
+ ret = ContentData.new
280
+ ret.merge(a)
281
+ ret.merge(b)
282
+
283
+ return ret
284
+ end
285
+
286
+ # removed content data a from content data b and returns the new content data.
287
+ def self.remove(a, b)
288
+ return nil unless a.instance_of?ContentData
289
+ return nil unless b.instance_of?ContentData
290
+
291
+ ret = ContentData.new
292
+
293
+ b.contents.values.each { |content|
294
+ #print "%s - %s\n" % [content.checksum, a.content_exists(content.checksum).to_s]
295
+ ret.add_content(content) unless a.content_exists(content.checksum)
296
+ }
297
+
298
+ #Log.info "kaka"
299
+
300
+ b.instances.values.each { |instance|
301
+ #print "%s - %s\n" % [instance.checksum, a.content_exists(instance.checksum).to_s]
302
+ ret.add_instance(instance) unless a.content_exists(instance.checksum)
303
+ }
304
+
305
+ #print "kuku %s" % ret.contents.size.to_s
306
+ #print "kuku %s" % ret.instances.size.to_s
307
+ return ret
308
+ end
309
+
310
+ def self.remove_instances(a, b)
311
+ return nil unless a.instance_of?ContentData
312
+ return nil unless b.instance_of?ContentData
313
+
314
+ ret = ContentData.new
315
+ b.instances.values.each do |instance|
316
+ if !a.instances.key?(instance.global_path)
317
+ ret.add_content(b.contents[instance.checksum])
318
+ ret.add_instance(instance)
319
+ end
154
320
  end
321
+ return ret
322
+ end
155
323
 
156
- def empty?
157
- @contents.empty?
324
+ def self.remove_directory(cd, global_dir_path)
325
+ return nil unless cd.instance_of?ContentData
326
+
327
+ ret = ContentData.new
328
+ cd.instances.values.each do |instance|
329
+ Log.debug3("global path to check: #{global_dir_path}")
330
+ Log.debug3("instance global path: #{instance.global_path}")
331
+ if instance.global_path.scan(global_dir_path).size == 0
332
+ Log.debug3("Adding instance.")
333
+ ret.add_content(cd.contents[instance.checksum])
334
+ ret.add_instance(instance)
335
+ end
158
336
  end
337
+ return ret
338
+ end
339
+
340
+ # returns the common content in both a and b
341
+ def self.intersect(a, b)
342
+ b_minus_a = ContentData.remove(a, b)
343
+ return ContentData.remove(b_minus_a, b)
344
+ end
159
345
 
160
- def content_exists(checksum)
161
- @contents.key? checksum
346
+ # unify time for all entries with same content to minimal time
347
+ def self.unify_time(db)
348
+ mod_db = ContentData.new # resulting ContentData that will consists objects with unified time
349
+ checksum2time = Hash.new # key=checksum value=min_time_for_this_checksum
350
+ checksum2instances = Hash.new # key=checksum value=array_of_instances_with_this_checksum (Will be replaced with ContentData method)
351
+
352
+ # populate tables with given ContentData entries
353
+ db.instances.each_value do |instance|
354
+ checksum = instance.checksum
355
+ time = instance.modification_time
356
+
357
+ unless (checksum2instances.has_key? checksum)
358
+ checksum2instances[checksum] = []
359
+ end
360
+ checksum2instances[checksum] << instance
361
+
362
+ if (not checksum2time.has_key? checksum)
363
+ checksum2time[checksum] = time
364
+ elsif ((checksum2time[checksum] <=> time) > 0)
365
+ checksum2time[checksum] = time
366
+ end
162
367
  end
163
368
 
164
- # TODO(kolman): The semantics of thir merge is merge! change in all file.
165
- def merge(content_data)
166
- content_data.contents.values.each { |content|
167
- add_content(content)
168
- }
169
- content_data.instances.values.each { |instance|
170
- add_instance(instance)
171
- }
369
+ # update min time table with time information from contents
370
+ db.contents.each do |checksum, content|
371
+ time = content.first_appearance_time
372
+ if (not checksum2time.has_key? checksum)
373
+ checksum2time[checksum] = time
374
+ elsif ((checksum2time[checksum] <=> time) > 0)
375
+ checksum2time[checksum] = time
376
+ end
172
377
  end
173
378
 
174
- def ==(other)
175
- return false if other == nil
176
- return false unless @contents.size == other.contents.size
177
- return false unless @instances.size == other.instances.size
379
+ # add content entries to the output table. in need of case update time field with found min time
380
+ db.contents.each do |checksum, content|
381
+ time = checksum2time[checksum]
382
+ if ((content.first_appearance_time <=> time) == 0)
383
+ mod_db.add_content(content)
384
+ else
385
+ mod_db.add_content(Content.new(checksum, content.size, time))
386
+ end
387
+ end
178
388
 
179
- @contents.keys.each { |key|
180
- if (@contents[key] != other.contents[key])
181
- Log.info @contents[key].first_appearance_time.to_i
182
- Log.info other.contents[key].first_appearance_time.to_i
183
- return false
184
- end
185
- }
186
- @instances.keys.each { |key|
187
- if (@instances[key] != other.instances[key])
188
- return false
389
+ # add instance entries to the output table. in need of case update time field with found min time
390
+ checksum2instances.each do |checksum, instances|
391
+ time = checksum2time[checksum]
392
+ instances.each do |instance|
393
+ if ((instance.modification_time <=> time) == 0)
394
+ mod_db.add_instance(instance)
395
+ else # must be bigger then found min time
396
+ mod_instance = ContentInstance.new(instance.checksum, instance.size,
397
+ instance.server_name, instance.device,
398
+ instance.full_path, time)
399
+ mod_db.add_instance(mod_instance)
189
400
  end
190
- }
191
- return true
401
+ end
192
402
  end
403
+ mod_db
404
+ end
193
405
 
194
- def to_s
195
- ret = ""
196
- ret << @contents.length.to_s << "\n"
197
- @contents.each_value { |content|
198
- ret << content.to_s << "\n"
199
- }
200
- ret << @instances.length.to_s << "\n"
201
- @instances.each_value { |instance|
202
- ret << instance.to_s << "\n"
203
- }
204
- return ret
406
+ # Validates index against file system that all instances hold a correct data regarding files
407
+ # that they represrents.
408
+ #
409
+ # There are two levels of validation, controlled by instance_check_level system parameter:
410
+ # * shallow - quick, tests instance for file existence and attributes.
411
+ # * deep - can take more time, in addition to shallow recalculates hash sum.
412
+ # @param [Hash] params hash of parameters of validation, can be used to return additional data.
413
+ #
414
+ # Supported key/value combinations:
415
+ # * key is <tt>:failed</tt> value is <tt>ContentData</tt> used to return failed instances
416
+ # @return [Boolean] true when index is correct, false otherwise
417
+ def validate(params = nil)
418
+ # used to answer whether specific param was set
419
+ param_exists = Proc.new do |param|
420
+ !(params.nil? || params[param].nil?)
205
421
  end
206
422
 
207
- def to_file(filename)
208
- content_data_dir = File.dirname(filename)
209
- FileUtils.makedirs(content_data_dir) unless File.directory?(content_data_dir)
210
- File.open(filename, 'w') {|f| f.write(to_s) }
423
+ # used to process method parameters centrally
424
+ process_params = Proc.new do |values|
425
+ # values is a Hash with keys: :content, :instance and value appropriate to key
426
+ if param_exists.call :failed
427
+ unless values[:content].nil?
428
+ params[:failed].add_content values[:content]
429
+ end
430
+ unless values[:instance].nil?
431
+ # appropriate content should be already added
432
+ params[:failed].add_instance values[:instance]
433
+ end
434
+ end
211
435
  end
212
436
 
213
- def from_file(filename)
214
- lines = IO.readlines(filename)
215
- i = 0
216
- number_of_contents = lines[i].to_i
217
- i += 1
218
- number_of_contents.times {
219
- parameters = lines[i].split(",")
220
- add_content(Content.new(parameters[0],
221
- parameters[1].to_i,
222
- ContentData.parse_time(parameters[2])))
223
- i += 1
224
- }
225
-
226
- number_of_instances = lines[i].to_i
227
- i += 1
228
- number_of_instances.times {
229
- parameters = lines[i].split(',')
230
- # bugfix: if file name consist a comma then parsing based on comma separating fails
231
- if (parameters.size > 6)
232
- (5..parameters.size-2).each do |i|
233
- parameters[4] = [parameters[4], parameters[i]].join(",")
234
- end
235
- (5..parameters.size-2).each do |i|
236
- parameters.delete_at(5)
237
- end
437
+ is_valid = true
438
+ instances.each_value do |instance|
439
+ unless check_instance instance
440
+ is_valid = false
441
+
442
+ unless params.nil? || params.empty?
443
+ process_params.call :content => contents[instance.checksum], :instance => instance
238
444
  end
445
+ end
446
+ end
447
+
448
+ is_valid
449
+ end
239
450
 
240
- add_instance(ContentInstance.new(parameters[0],
241
- parameters[1].to_i,
242
- parameters[2],
243
- parameters[3],
244
- parameters[4],
245
- ContentData.parse_time(parameters[5])))
246
- i += 1
247
- }
451
+ def shallow_check(instance)
452
+ path = instance.full_path
453
+ is_valid = true
454
+
455
+ if (File.exists?(path))
456
+ if File.size(path) != instance.size
457
+ is_valid = false
458
+ err_msg = "#{path} size #{File.size(path)} differs from indexed size #{instance.size}"
459
+ Log.warning err_msg
460
+ end
461
+ #if ContentData.format_time(File.mtime(path)) != instance.modification_time
462
+ if File.mtime(path).to_i != instance.modification_time.to_i
463
+ is_valid = false
464
+ err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
465
+ + "indexed #{instance.modification_time}"
466
+ Log.warning err_msg
467
+ end
468
+ else
469
+ is_valid = false
470
+ err_msg = "Indexed file #{path} doesn't exist"
471
+ Log.warning err_msg
248
472
  end
473
+ is_valid
474
+ end
249
475
 
250
- def self.parse_time time_str
251
- return nil unless time_str.instance_of? String
252
- seconds_from_epoch = Integer time_str # Not using to_i here because it does not check string is integer.
253
- time = Time.at seconds_from_epoch
476
+ def deep_check(instance)
477
+ if shallow_check(instance)
478
+ path = instance.full_path
479
+ current_checksum = FileIndexing::IndexAgent.get_checksum(path)
480
+ if instance.checksum == current_checksum
481
+ true
482
+ else
483
+ err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance.checksum}"
484
+ Log.warning err_msg
485
+ false
486
+ end
487
+ else
488
+ false
254
489
  end
490
+ end
255
491
 
256
- def self.format_time(time)
257
- return nil unless time.instance_of?Time
258
- str = time.to_i.to_s
259
- return str
492
+ def check_instance(instance)
493
+ case Params['instance_check_level']
494
+ when 'deep'
495
+ deep_check instance
496
+ when 'shallow'
497
+ shallow_check instance
498
+ else
499
+ # TODO remove it when params will support set of values
500
+ throw ArgumentError.new "Unsupported check level #{Params['instance_check_level']}"
260
501
  end
502
+ end
261
503
 
262
- # merges content data a and content data b to a new content data and returns it.
263
- def self.merge(a, b)
264
- return b unless not a.nil?
265
- return a unless not b.nil?
266
504
 
267
- return nil unless a.instance_of?ContentData
268
- return nil unless b.instance_of?ContentData
505
+ # TODO simplify conditions
506
+ # This mehod is experimental and shouldn\'t be used
507
+ # nil is used to define +/- infinity for to/from method arguments
508
+ # from/to values are exlusive in condition'a calculations
509
+ # Need to take care about '==' operation that is used for object's comparison.
510
+ # In need of case user should define it's own '==' implemementation.
511
+ def get_query(variable, params)
512
+ raise RuntimeError.new 'This method is experimental and shouldn\'t be used'
269
513
 
270
- ret = ContentData.new
271
- ret.merge(a)
272
- ret.merge(b)
514
+ exact = params['exact'].nil? ? Array.new : params['exact']
515
+ from = params['from']
516
+ to = params ['to']
517
+ is_inside = params['is_inside']
273
518
 
274
- return ret
519
+ unless ContentInstance.new.instance_variable_defined?("@#{attribute}")
520
+ raise ArgumentError "#{variable} isn't a ContentInstance variable"
275
521
  end
276
522
 
277
- # removed content data a from content data b and returns the new content data.
278
- def self.remove(a, b)
279
- return nil unless a.instance_of?ContentData
280
- return nil unless b.instance_of?ContentData
523
+ if (exact.nil? && from.nil? && to.nil?)
524
+ raise ArgumentError 'At least one of the argiments {exact, from, to} must be defined'
525
+ end
281
526
 
282
- ret = ContentData.new
527
+ if (!(from.nil? || to.nil?) && from.kind_of?(to.class))
528
+ raise ArgumentError 'to and from arguments should be comparable one with another'
529
+ end
283
530
 
284
- b.contents.values.each { |content|
285
- #print "%s - %s\n" % [content.checksum, a.content_exists(content.checksum).to_s]
286
- ret.add_content(content) unless a.content_exists(content.checksum)
287
- }
531
+ # FIXME add support for from/to for Strings
532
+ if ((!from.nil? && !from.kind_of?(Numeric.new.class))\
533
+ || (!to.nil? && to.kind_of?(Numeric.new.class)))
534
+ raise ArgumentError 'from and to options supported only for numeric values'
535
+ end
288
536
 
289
- #Log.info "kaka"
537
+ if (!exact.empty? && (!from.nil? || !to.nil?))
538
+ raise ArgumentError 'exact and from/to options are mutually exclusive'
539
+ end
290
540
 
291
- b.instances.values.each { |instance|
292
- #print "%s - %s\n" % [instance.checksum, a.content_exists(instance.checksum).to_s]
293
- ret.add_instance(instance) unless a.content_exists(instance.checksum)
294
- }
541
+ result_index = ContentData.new
542
+ instances.each_value do |instance|
543
+ is_match = false
544
+ var_value = instance.instance_variable_get("@#{variable}")
295
545
 
296
- #print "kuku %s" % ret.contents.size.to_s
297
- #print "kuku %s" % ret.instances.size.to_s
298
- return ret
546
+ if exact.include? var_value
547
+ is_match = true
548
+ elsif (from.nil? || var_value > from) && (to.nil? || var_value < to)
549
+ is_match = true
550
+ end
551
+
552
+ if (is_match && is_inside) || (!is_match && !is_inside)
553
+ checksum = instance.checksum
554
+ result_index.add_content(contents[checksum]) unless result_index.content_exists(checksum)
555
+ result_index.add_instance instance
556
+ end
299
557
  end
558
+ result_index
559
+ end
300
560
 
301
- def self.remove_instances(a, b)
302
- return nil unless a.instance_of?ContentData
303
- return nil unless b.instance_of?ContentData
561
+ private :shallow_check, :deep_check, :check_instance
562
+ end
304
563
 
305
- ret = ContentData.new
306
- b.instances.values.each do |instance|
307
- if !a.instances.key?(instance.global_path)
308
- ret.add_content(b.contents[instance.checksum])
309
- ret.add_instance(instance)
310
- end
564
+ # Validates index against file system that all instances hold a correct data regarding files
565
+ # that they represrents.
566
+ #
567
+ # There are two levels of validation, controlled by instance_check_level system parameter:
568
+ # * shallow - quick, tests instance for file existence and attributes.
569
+ # * deep - can take more time, in addition to shallow recalculates hash sum.
570
+ # @param [Hash] params hash of parameters of validation, can be used to return additional data.
571
+ #
572
+ # Supported key/value combinations:
573
+ # * key is <tt>:failed</tt> value is <tt>ContentData</tt> used to return failed instances
574
+ # @return [Boolean] true when index is correct, false otherwise
575
+ # @raise [ArgumentError] when instance_check_level is incorrect
576
+ def validate(params = nil)
577
+ # used to answer whether specific param was set
578
+ param_exists = Proc.new do |param|
579
+ !(params.nil? || params[param].nil?)
580
+ end
581
+
582
+ # used to process method parameters centrally
583
+ process_params = Proc.new do |values|
584
+ # values is a Hash with keys: :content, :instance and value appropriate to key
585
+ if param_exists.call :failed
586
+ unless values[:content].nil?
587
+ params[:failed].add_content values[:content]
588
+ end
589
+ unless values[:instance].nil?
590
+ # appropriate content should be already added
591
+ params[:failed].add_instance values[:instance]
311
592
  end
312
- return ret
313
593
  end
594
+ end
314
595
 
315
- def self.remove_directory(cd, global_dir_path)
316
- return nil unless cd.instance_of?ContentData
317
-
318
- ret = ContentData.new
319
- cd.instances.values.each do |instance|
320
- Log.debug3("global path to check: #{global_dir_path}")
321
- Log.debug3("instance global path: #{instance.global_path}")
322
- if instance.global_path.scan(global_dir_path).size == 0
323
- Log.debug3("Adding instance.")
324
- ret.add_content(cd.contents[instance.checksum])
325
- ret.add_instance(instance)
326
- end
596
+ is_valid = true
597
+ instances.each_value do |instance|
598
+ unless check_instance instance
599
+ is_valid = false
600
+
601
+ unless params.nil? || params.empty?
602
+ process_params.call :content => contents[instance.checksum], :instance => instance
327
603
  end
328
- return ret
329
604
  end
605
+ end
606
+
607
+ is_valid
608
+ end
609
+
610
+ def shallow_check(instance)
611
+ path = instance.full_path
612
+ is_valid = true
330
613
 
331
- # returns the common content in both a and b
332
- def self.intersect(a, b)
333
- b_minus_a = ContentData.remove(a, b)
334
- return ContentData.remove(b_minus_a, b)
614
+ if (File.exists?(path))
615
+ if File.size(path) != instance.size
616
+ is_valid = false
617
+ err_msg = "#{path} size #{File.size(path)} differs from indexed size #{instance.size}"
618
+ Log.warning err_msg
335
619
  end
620
+ #if ContentData.format_time(File.mtime(path)) != instance.modification_time
621
+ if File.mtime(path).to_i != instance.modification_time.to_i
622
+ is_valid = false
623
+ err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
624
+ + "indexed #{instance.modification_time}"
625
+ Log.warning err_msg
626
+ end
627
+ else
628
+ is_valid = false
629
+ err_msg = "Indexed file #{path} doesn't exist"
630
+ Log.warning err_msg
631
+ end
632
+ is_valid
633
+ end
336
634
 
337
- # unify time for all entries with same content to minimal time
338
- def self.unify_time(db)
339
- mod_db = ContentData.new # resulting ContentData that will consists objects with unified time
340
- checksum2time = Hash.new # key=checksum value=min_time_for_this_checksum
341
- checksum2instances = Hash.new # key=checksum value=array_of_instances_with_this_checksum (Will be replaced with ContentData method)
635
+ def deep_check(instance)
636
+ if shallow_check(instance)
637
+ path = instance.full_path
638
+ current_checksum = FileIndexing::IndexAgent.get_checksum(path)
639
+ if instance.checksum == current_checksum
640
+ true
641
+ else
642
+ err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance.checksum}"
643
+ Log.warning err_msg
644
+ false
645
+ end
646
+ else
647
+ false
648
+ end
649
+ end
342
650
 
343
- # populate tables with given ContentData entries
344
- db.instances.each_value do |instance|
345
- checksum = instance.checksum
346
- time = instance.modification_time
651
+ # @raise [ArgumentError] when instance_check_level is incorrect
652
+ def check_instance(instance)
653
+ case Params['instance_check_level']
654
+ when 'deep'
655
+ deep_check instance
656
+ when 'shallow'
657
+ shallow_check instance
658
+ else
659
+ # TODO remove it when params will support set of values
660
+ throw ArgumentError.new "Unsupported check level #{Params['instance_check_level']}"
661
+ end
662
+ end
347
663
 
348
- unless (checksum2instances.has_key? checksum)
349
- checksum2instances[checksum] = []
350
- end
351
- checksum2instances[checksum] << instance
352
664
 
353
- if (not checksum2time.has_key? checksum)
354
- checksum2time[checksum] = time
355
- elsif ((checksum2time[checksum] <=> time) > 0)
356
- checksum2time[checksum] = time
357
- end
358
- end
665
+ # TODO simplify conditions
666
+ # This mehod is experimental and shouldn\'t be used
667
+ # nil is used to define +/- infinity for to/from method arguments
668
+ # from/to values are exlusive in condition'a calculations
669
+ # Need to take care about '==' operation that is used for object's comparison.
670
+ # In need of case user should define it's own '==' implemementation.
671
+ def get_query(variable, params)
672
+ raise RuntimeError.new 'This method is experimental and shouldn\'t be used'
359
673
 
360
- # update min time table with time information from contents
361
- db.contents.each do |checksum, content|
362
- time = content.first_appearance_time
363
- if (not checksum2time.has_key? checksum)
364
- checksum2time[checksum] = time
365
- elsif ((checksum2time[checksum] <=> time) > 0)
366
- checksum2time[checksum] = time
367
- end
368
- end
674
+ exact = params['exact'].nil? ? Array.new : params['exact']
675
+ from = params['from']
676
+ to = params ['to']
677
+ is_inside = params['is_inside']
369
678
 
370
- # add content entries to the output table. in need of case update time field with found min time
371
- db.contents.each do |checksum, content|
372
- time = checksum2time[checksum]
373
- if ((content.first_appearance_time <=> time) == 0)
374
- mod_db.add_content(content)
375
- else
376
- mod_db.add_content(Content.new(checksum, content.size, time))
377
- end
378
- end
679
+ unless ContentInstance.new.instance_variable_defined?("@#{attribute}")
680
+ raise ArgumentError "#{variable} isn't a ContentInstance variable"
681
+ end
379
682
 
380
- # add instance entries to the output table. in need of case update time field with found min time
381
- checksum2instances.each do |checksum, instances|
382
- time = checksum2time[checksum]
383
- instances.each do |instance|
384
- if ((instance.modification_time <=> time) == 0)
385
- mod_db.add_instance(instance)
386
- else # must be bigger then found min time
387
- mod_instance = ContentInstance.new(instance.checksum, instance.size,
388
- instance.server_name, instance.device,
389
- instance.full_path, time)
390
- mod_db.add_instance(mod_instance)
391
- end
392
- end
393
- end
394
- mod_db
683
+ if (exact.nil? && from.nil? && to.nil?)
684
+ raise ArgumentError 'At least one of the argiments {exact, from, to} must be defined'
685
+ end
686
+
687
+ if (!(from.nil? || to.nil?) && from.kind_of?(to.class))
688
+ raise ArgumentError 'to and from arguments should be comparable one with another'
689
+ end
690
+
691
+ # FIXME add support for from/to for Strings
692
+ if ((!from.nil? && !from.kind_of?(Numeric.new.class))\
693
+ || (!to.nil? && to.kind_of?(Numeric.new.class)))
694
+ raise ArgumentError 'from and to options supported only for numeric values'
695
+ end
696
+
697
+ if (!exact.empty? && (!from.nil? || !to.nil?))
698
+ raise ArgumentError 'exact and from/to options are mutually exclusive'
699
+ end
700
+
701
+ result_index = ContentData.new
702
+ instances.each_value do |instance|
703
+ is_match = false
704
+ var_value = instance.instance_variable_get("@#{variable}")
705
+
706
+ if exact.include? var_value
707
+ is_match = true
708
+ elsif (from.nil? || var_value > from) && (to.nil? || var_value < to)
709
+ is_match = true
710
+ end
711
+
712
+ if (is_match && is_inside) || (!is_match && !is_inside)
713
+ checksum = instance.checksum
714
+ result_index.add_content(contents[checksum]) unless result_index.content_exists(checksum)
715
+ result_index.add_instance instance
395
716
  end
396
717
  end
718
+ result_index
397
719
  end
720
+
721
+ private :shallow_check, :deep_check, :check_instance, :get_query
398
722
  end
723
+