content_data 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,210 +1,253 @@
1
+ require 'content_server/globals'
1
2
  require 'log'
2
3
  require 'params'
3
- require 'time'
4
4
 
5
5
  module ContentData
6
6
  Params.string('instance_check_level', 'shallow', 'Defines check level. Supported levels are: ' \
7
7
  'shallow - quick, tests instance for file existence and attributes. ' \
8
8
  'deep - can take more time, in addition to shallow recalculates hash sum.')
9
9
 
10
- class Content
11
- attr_reader :checksum, :size, :first_appearance_time
12
-
13
- def initialize(checksum, size, first_appearance_time, content_serializer = nil)
14
- if content_serializer != nil
15
- if (content_serializer.checksum == nil)
16
- raise ArgumentError.new("checksum have to be defined")
17
- else
18
- @checksum = content_serializer.checksum
19
- end
20
- if (content_serializer.size == nil)
21
- raise ArgumentError.new("size have to be defined")
22
- else
23
- @size = content_serializer.size
24
- end
25
- if (content_serializer.first_appearance_time == nil)
26
- raise ArgumentError.new("first_appearance_time have to be defined")
27
- else
28
- @first_appearance_time = ContentData.parse_time(content_serializer.first_appearance_time)
29
- end
10
+ # Content Data(CD) object holds files information as contents and instances
11
+ # Files info retrieved from hardware: checksum, size, time modification, server, device and path
12
+ # Those attributes are divided into content and instance attributes:
13
+ # unique checksum, size are content attributes
14
+ # time modification, server, device and path are instance attributes
15
+ # The relationship between content and instances is 1:many meaning that
16
+ # a content can have instances in many servers.
17
+ # content also has time attribute, which has the value of the time of the first instance.
18
+ # This can be changed by using unify_time method which sets all time attributes for a content and it's
19
+ # instances to the min time off all.
20
+ # Different files(instances) with same content(checksum), are grouped together under that content.
21
+ # Interface methods include:
22
+ # iterate over contents and instances info,
23
+ # unify time, add/remove instance, queries, merge, remove directory and more.
24
+ # Content info data structure:
25
+ # @contents_info = { Checksum -> [size, *instances*, content_modification_time] }
26
+ # *instances* = {[server,path] -> instance_modification_time }
27
+ # Notes:
28
+ # 1. content_modification_time is the instance_modification_time of the first
29
+ # instances which was added to @contents_info
30
+ class ContentData
30
31
 
32
+ def initialize(other = nil)
33
+ ObjectSpace.define_finalizer(self,
34
+ self.class.method(:finalize).to_proc)
35
+ if Params['enable_monitoring']
36
+ ::ContentServer::Globals.process_vars.inc('obj add ContentData')
37
+ end
38
+ if other.nil?
39
+ @contents_info = {} # Checksum --> [size, paths-->time(instance), time(content)]
31
40
  else
32
- @checksum = checksum
33
- @size = size
34
- @first_appearance_time = first_appearance_time
41
+ @contents_info = other.clone_contents_info
35
42
  end
36
43
  end
37
44
 
38
- def to_s
39
- "%s,%d,%s" % [@checksum, @size, ContentData.format_time(@first_appearance_time)]
45
+ def self.finalize(id)
46
+ if Params['enable_monitoring']
47
+ ::ContentServer::Globals.process_vars.inc('obj rem ContentData')
48
+ end
40
49
  end
41
50
 
42
- def ==(other)
43
- return (self.checksum.eql? other.checksum and
44
- self.size.eql? other.size and
45
- self.first_appearance_time.to_i.eql? other.first_appearance_time.to_i)
51
+ # getting a cloned data base
52
+ def clone_contents_info
53
+ @contents_info.keys.inject({}) { |clone_contents_info, checksum|
54
+ instances = @contents_info[checksum]
55
+ size = instances[0]
56
+ content_time = instances[2]
57
+ instances_db = instances[1]
58
+ instances_db_cloned = {}
59
+ instances_db.keys.each { |location|
60
+ instance_mtime = instances_db[location]
61
+ instances_db_cloned[[location[0].clone,location[1].clone]]=instance_mtime
62
+ }
63
+ clone_contents_info[checksum] = [size,
64
+ instances_db_cloned,
65
+ content_time]
66
+ clone_contents_info
67
+ }
46
68
  end
47
- end
48
-
49
- class ContentInstance
50
- attr_reader :checksum, :size, :server_name, :device, :full_path, :modification_time
51
69
 
52
- def initialize(checksum, size, server_name, device, full_path, modification_time, content_instance_serializer = nil)
53
- if content_instance_serializer != nil
54
- if (content_instance_serializer.checksum == nil)
55
- raise ArgumentError.new("checksum have to be defined")
56
- else
57
- @checksum = content_instance_serializer.checksum
58
- end
59
- if (content_instance_serializer.size == nil)
60
- raise ArgumentError.new("size have to be defined")
61
- else
62
- @size = content_instance_serializer.size
63
- end
64
- if (content_instance_serializer.modification_time == nil)
65
- raise ArgumentError.new("modification_time have to be defined")
66
- else
67
- @modification_time = ContentData.parse_time(content_instance_serializer.modification_time)
68
- end
69
- if (content_instance_serializer.server_name == nil)
70
- raise ArgumentError.new("server_name have to be defined")
71
- else
72
- @server_name = content_instance_serializer.server_name
73
- end
74
- if (content_instance_serializer.device == nil)
75
- raise ArgumentError.new("device have to be defined")
76
- else
77
- @device = content_instance_serializer.device
78
- end
79
- if (content_instance_serializer.full_path == nil)
80
- raise ArgumentError.new("full_path have to be defined")
81
- else
82
- @full_path = content_instance_serializer.full_path
83
- end
84
- else
85
- @checksum = checksum
86
- @size = size
87
- @server_name = server_name
88
- @device = device
89
- @full_path = full_path
90
- @modification_time = modification_time
91
- end
70
+ # iterator over @contents_info data structure (not including instances)
71
+ # block is provided with: checksum, size and content modification time
72
+ def each_content(&block)
73
+ @contents_info.keys.each { |checksum|
74
+ content_val = @contents_info[checksum]
75
+ # provide checksum, size and content modification time to the block
76
+ block.call(checksum,content_val[0], content_val[2])
77
+ }
92
78
  end
93
79
 
94
- def global_path
95
- ContentInstance.instance_global_path(@server_name, @full_path)
80
+ # iterator over @contents_info data structure (including instances)
81
+ # block is provided with: checksum, size, content modification time,
82
+ # instance modification time, server and file path
83
+ def each_instance(&block)
84
+ @contents_info.keys.each { |checksum|
85
+ content_info = @contents_info[checksum]
86
+ content_info[1].keys.each {|location|
87
+ # provide the block with: checksum, size, content modification time,instance modification time,
88
+ # server and path.
89
+ instance_modification_time = content_info[1][location]
90
+ block.call(checksum,content_info[0], content_info[2], instance_modification_time,
91
+ location[0], location[1])
92
+ }
93
+ }
96
94
  end
97
95
 
98
- def ContentInstance.instance_global_path(server_name, full_path)
99
- "%s:%s" % [server_name, full_path]
96
+ # iterator of instances over specific content
97
+ # block is provided with: checksum, size, content modification time,
98
+ # instance modification time, server and file path
99
+ def content_each_instance(checksum, &block)
100
+ content_info = @contents_info[checksum]
101
+ content_info[1].keys.each {|location|
102
+ # provide the block with: checksum, size, content modification time,instance modification time,
103
+ # server and path.
104
+ instance_modification_time = content_info[1][location]
105
+ block.call(checksum,content_info[0], content_info[2], instance_modification_time,
106
+ location[0], location[1])
107
+ }
100
108
  end
101
109
 
102
- def to_s
103
- "%s,%d,%s,%s,%s,%s" % [@checksum, @size, @server_name,
104
- @device, @full_path, ContentData.format_time(@modification_time)]
110
+ def contents_size()
111
+ @contents_info.size
105
112
  end
106
113
 
107
- def ==(other)
108
- return (self.checksum.eql? other.checksum and
109
- self.size.eql? other.size and
110
- self.server_name.eql? other.server_name and
111
- self.device.eql? other.device and
112
- self.full_path.eql? other.full_path and
113
- self.modification_time.to_i.eql? other.modification_time.to_i)
114
+ def instances_size(checksum)
115
+ content_info = @contents_info[checksum]
116
+ return 0 if content_info.nil?
117
+ content_info[1].size
114
118
  end
115
- end
116
119
 
117
- # Unfortunately this class is used as mutable for now. So need to be carefull.
118
- # TODO(kolman): Make this class imutable, but add indexing structure to it.
119
- # TODO(kolman): Add wrapper to the class to enable dynamic content data
120
- # (with easy access indexes)
121
- class ContentData
122
- attr_reader :contents, :instances
120
+ def get_instance_mod_time(checksum, location)
121
+ content_info = @contents_info[checksum]
122
+ return nil if content_info.nil?
123
+ instances = content_info[1]
124
+ instance_time = instances[location]
125
+ end
123
126
 
124
- # @param content_data_serializer_str [String]
125
- def initialize(copy = nil)
126
- if copy.nil?
127
- @contents = Hash.new # key is a checksum , value is a refernce to the Content object
128
- @instances = Hash.new # key is an instance global path , value is a reference to the ContentInstance object
127
+ def add_instance(checksum, size, server, path, modification_time)
128
+ location = [server, path]
129
+ content_info = @contents_info[checksum]
130
+ if content_info.nil?
131
+ @contents_info[checksum] = [size,
132
+ {location => modification_time},
133
+ modification_time]
129
134
  else
130
- # Regenerate only the hashes, the values are immutable.
131
- @contents = copy.contents.clone
132
- @instances = copy.instances.clone
135
+ if size != content_info[0]
136
+ Log.warning 'File size different from content size while same checksum'
137
+ Log.warning("instance location:server:'#{location[0]}' path:'#{location[1]}'")
138
+ Log.warning("instance mod time:'#{modification_time}'")
139
+ end
140
+ #override file if needed
141
+ content_info[0] = size
142
+ instances = content_info[1]
143
+ instances[location] = modification_time
133
144
  end
134
145
  end
135
146
 
136
- def add_content(content)
137
- @contents[content.checksum] = content
147
+ def empty?
148
+ @contents_info.empty?
138
149
  end
139
150
 
140
- def add_instance(instance)
141
- if (not @contents.key?(instance.checksum))
142
- Log.warning sprintf("Adding instance while it's" +
143
- " checksum %s does not exists.\n", instance.checksum)
144
- Log.warning sprintf("%s\n", instance.to_s)
145
- return false
146
- elsif (@contents[instance.checksum].size != instance.size)
147
- Log.warning 'File size different from content size while same checksum'
148
- Log.warning instance.to_s
149
- return false
150
- end
151
+ def content_exists(checksum)
152
+ @contents_info.has_key?(checksum)
153
+ end
151
154
 
152
- key = instance.global_path
153
155
 
154
- #override file if needed
155
- @instances[key] = instance
156
+ # TODO (genadyp) consider about using hash for optional defining of parameters
157
+ def instance_exists(path, server, checksum=nil)
158
+ location = [server, path]
159
+ if checksum.nil?
160
+ @contents_info.values.any? { |content_db|
161
+ content_db[1].has_key?(location)
162
+ }
163
+ else
164
+ content_info = @contents_info[checksum]
165
+ return false if content_info.nil?
166
+ content_info[1].has_key?(location)
167
+ end
156
168
  end
157
169
 
158
- def empty?
159
- @contents.empty?
170
+ def stats_by_location(location)
171
+ @contents_info.each_value { |content_db|
172
+ if content_db[1].has_key?(location)
173
+ return [content_db[0], content_db[1][location]]
174
+ end
175
+ }
176
+ return nil
160
177
  end
161
178
 
162
- # TODO rename method with finishing '?', cause it returns a boolean
163
- def content_exists(checksum)
164
- @contents.key? checksum
179
+
180
+ # removes an instance from known content (faster then unknown content)
181
+ # remove also the content, if content becomes empty
182
+ def remove_instance(location, checksum=nil)
183
+ if checksum.nil?
184
+ @contents_info.keys.each { |checksum|
185
+ instances = @contents_info[checksum][1]
186
+ instances.delete(location)
187
+ @contents_info.delete(checksum) if instances.empty?
188
+ }
189
+ else
190
+ content_info = @contents_info[checksum]
191
+ unless content_info.nil?
192
+ instances = content_info[1]
193
+ instances.delete(location)
194
+ @contents_info.delete(checksum) if instances.empty?
195
+ end
196
+ end
165
197
  end
166
198
 
167
- # TODO(kolman): The semantics of thir merge is merge! change in all file.
168
- def merge(content_data)
169
- content_data.contents.values.each { |content|
170
- add_content(content)
171
- }
172
- content_data.instances.values.each { |instance|
173
- add_instance(instance)
199
+ def remove_directory(dir_to_remove, server)
200
+ @contents_info.keys.each { |checksum|
201
+ instances = @contents_info[checksum][1]
202
+ instances.delete_if { |location, _|
203
+ location[0] == server and location[1].scan(dir_to_remove).size > 0
204
+ }
205
+ @contents_info.delete(checksum) if instances.empty?
174
206
  }
175
207
  end
176
208
 
209
+
177
210
  def ==(other)
178
- return false if other == nil
179
- return false unless @contents.size == other.contents.size
180
- return false unless @instances.size == other.instances.size
181
-
182
- @contents.keys.each { |key|
183
- if (@contents[key] != other.contents[key])
184
- Log.debug1 @contents[key].first_appearance_time.to_i
185
- Log.debug1 other.contents[key].first_appearance_time.to_i
186
- return false
187
- end
188
- }
189
- @instances.keys.each { |key|
190
- if (@instances[key] != other.instances[key])
191
- return false
192
- end
211
+ return false if other.nil?
212
+ return false if @contents_info.size != other.contents_size
213
+ other.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
214
+ local_content_info = @contents_info[checksum]
215
+ return false if local_content_info.nil?
216
+ return false if local_content_info[0] != size
217
+ return false if local_content_info[2] != content_mod_time
218
+ #check instances
219
+ local_instances = local_content_info[1]
220
+ return false if other.instances_size(checksum) != local_instances.size
221
+ location = [server, path]
222
+ local_instance_mod_time = local_instances[location]
223
+ return false if local_instance_mod_time.nil?
224
+ return false if local_instance_mod_time != instance_mod_time
193
225
  }
194
- return true
226
+ true
227
+ end
228
+
229
+ def remove_content(checksum)
230
+ @contents_info.delete(checksum)
195
231
  end
196
232
 
197
233
  def to_s
198
- ret = ""
199
- ret << @contents.length.to_s << "\n"
200
- @contents.each_value { |content|
201
- ret << content.to_s << "\n"
234
+ return_str = ""
235
+ contents_str = ""
236
+ instances_str = ""
237
+ instances_counter = 0
238
+ each_content { |checksum, size, content_mod_time|
239
+ contents_str << "%s,%d,%d\n" % [checksum, size, content_mod_time]
202
240
  }
203
- ret << @instances.length.to_s << "\n"
204
- @instances.each_value { |instance|
205
- ret << instance.to_s << "\n"
241
+ instances_counter = 0
242
+ each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
243
+ instances_counter += 1
244
+ instances_str << "%s,%d,%s,%s,%d\n" % [checksum, size, server, path, instance_mod_time]
206
245
  }
207
- return ret
246
+ return_str << "%d\n" % [@contents_info.size]
247
+ return_str << contents_str
248
+ return_str << "%d\n" % [instances_counter]
249
+ return_str << instances_str
250
+ return_str
208
251
  end
209
252
 
210
253
  def to_file(filename)
@@ -216,192 +259,61 @@ module ContentData
216
259
  # TODO validation that file indeed contains ContentData missing
217
260
  def from_file(filename)
218
261
  lines = IO.readlines(filename)
219
- i = 0
220
- number_of_contents = lines[i].to_i
221
- i += 1
222
- number_of_contents.times {
223
- parameters = lines[i].split(",")
224
- add_content(Content.new(parameters[0],
225
- parameters[1].to_i,
226
- ContentData.parse_time(parameters[2])))
227
- i += 1
228
- }
229
-
262
+ number_of_contents = lines[0].to_i
263
+ i = 1 + number_of_contents
230
264
  number_of_instances = lines[i].to_i
231
265
  i += 1
232
266
  number_of_instances.times {
233
267
  if lines[i].nil?
234
- Log.debug1 "lines[i] if nil !!!, Backing filename: #{filename} to #{filename}.bad"
268
+ Log.warning "line ##{i} is nil !!!, Backing filename: #{filename} to #{filename}.bad"
235
269
  FileUtils.cp(filename, "#{filename}.bad")
236
- Log.debug1 lines[i].join("\n")
237
- end
238
- parameters = lines[i].split(',')
239
- # bugfix: if file name consist a comma then parsing based on comma separating fails
240
- if (parameters.size > 6)
241
- (5..parameters.size-2).each do |i|
242
- parameters[4] = [parameters[4], parameters[i]].join(",")
243
- end
244
- (5..parameters.size-2).each do |i|
245
- parameters.delete_at(5)
270
+ Log.warning("Lines:\n#{lines[i].join("\n")}")
271
+ else
272
+ parameters = lines[i].split(',')
273
+ # bugfix: if file name consist a comma then parsing based on comma separating fails
274
+ if (parameters.size > 5)
275
+ (4..parameters.size-2).each do |i|
276
+ parameters[3] = [parameters[3], parameters[i]].join(",")
277
+ end
278
+ (4..parameters.size-2).each do |i|
279
+ parameters.delete_at(4)
280
+ end
246
281
  end
247
- end
248
282
 
249
- add_instance(ContentInstance.new(parameters[0],
250
- parameters[1].to_i,
251
- parameters[2],
252
- parameters[3],
253
- parameters[4],
254
- ContentData.parse_time(parameters[5])))
283
+ add_instance(parameters[0],
284
+ parameters[1].to_i,
285
+ parameters[2],
286
+ parameters[3],
287
+ parameters[4].to_i)
288
+ end
255
289
  i += 1
256
290
  }
257
291
  end
258
292
 
259
- def self.parse_time time_str
260
- return nil unless time_str.instance_of? String
261
- seconds_from_epoch = Integer time_str # Not using to_i here because it does not check string is integer.
262
- time = Time.at seconds_from_epoch
263
- end
264
-
265
- def self.format_time(time)
266
- return nil unless time.instance_of?Time
267
- str = time.to_i.to_s
268
- return str
269
- end
270
-
271
- # merges content data a and content data b to a new content data and returns it.
272
- def self.merge(a, b)
273
- return b unless not a.nil?
274
- return a unless not b.nil?
275
-
276
- return nil unless a.instance_of?ContentData
277
- return nil unless b.instance_of?ContentData
278
-
279
- ret = ContentData.new
280
- ret.merge(a)
281
- ret.merge(b)
282
-
283
- return ret
284
- end
285
-
286
- # removed content data a from content data b and returns the new content data.
287
- def self.remove(a, b)
288
- return nil unless a.instance_of?ContentData
289
- return nil unless b.instance_of?ContentData
290
-
291
- ret = ContentData.new
292
-
293
- b.contents.values.each { |content|
294
- #print "%s - %s\n" % [content.checksum, a.content_exists(content.checksum).to_s]
295
- ret.add_content(content) unless a.content_exists(content.checksum)
296
- }
297
-
298
- #Log.debug1 "kaka"
299
-
300
- b.instances.values.each { |instance|
301
- #print "%s - %s\n" % [instance.checksum, a.content_exists(instance.checksum).to_s]
302
- ret.add_instance(instance) unless a.content_exists(instance.checksum)
303
- }
304
-
305
- #print "kuku %s" % ret.contents.size.to_s
306
- #print "kuku %s" % ret.instances.size.to_s
307
- return ret
308
- end
309
-
310
- def self.remove_instances(a, b)
311
- return nil unless a.instance_of?ContentData
312
- return nil unless b.instance_of?ContentData
313
-
314
- ret = ContentData.new
315
- b.instances.values.each do |instance|
316
- if !a.instances.key?(instance.global_path)
317
- ret.add_content(b.contents[instance.checksum])
318
- ret.add_instance(instance)
319
- end
320
- end
321
- return ret
322
- end
323
-
324
- def self.remove_directory(cd, global_dir_path)
325
- return nil unless cd.instance_of?ContentData
326
-
327
- ret = ContentData.new
328
- cd.instances.values.each do |instance|
329
- if instance.global_path.scan(global_dir_path).size == 0
330
- ret.add_content(cd.contents[instance.checksum])
331
- ret.add_instance(instance)
332
- end
333
- end
334
- return ret
335
- end
336
-
337
- # returns the common content in both a and b
338
- def self.intersect(a, b)
339
- b_minus_a = ContentData.remove(a, b)
340
- return ContentData.remove(b_minus_a, b)
341
- end
342
-
343
- # unify time for all entries with same content to minimal time
344
- def self.unify_time(db)
345
- mod_db = ContentData.new # resulting ContentData that will consists objects with unified time
346
- checksum2time = Hash.new # key=checksum value=min_time_for_this_checksum
347
- checksum2instances = Hash.new # key=checksum value=array_of_instances_with_this_checksum (Will be replaced with ContentData method)
348
-
349
- # populate tables with given ContentData entries
350
- db.instances.each_value do |instance|
351
- checksum = instance.checksum
352
- time = instance.modification_time
353
-
354
- unless (checksum2instances.has_key? checksum)
355
- checksum2instances[checksum] = []
356
- end
357
- checksum2instances[checksum] << instance
358
-
359
- if (not checksum2time.has_key? checksum)
360
- checksum2time[checksum] = time
361
- elsif ((checksum2time[checksum] <=> time) > 0)
362
- checksum2time[checksum] = time
363
- end
364
- end
365
-
366
- # update min time table with time information from contents
367
- db.contents.each do |checksum, content|
368
- time = content.first_appearance_time
369
- if (not checksum2time.has_key? checksum)
370
- checksum2time[checksum] = time
371
- elsif ((checksum2time[checksum] <=> time) > 0)
372
- checksum2time[checksum] = time
373
- end
374
- end
375
-
376
- # add content entries to the output table. in need of case update time field with found min time
377
- db.contents.each do |checksum, content|
378
- time = checksum2time[checksum]
379
- if ((content.first_appearance_time <=> time) == 0)
380
- mod_db.add_content(content)
381
- else
382
- mod_db.add_content(Content.new(checksum, content.size, time))
383
- end
384
- end
385
-
386
- # add instance entries to the output table. in need of case update time field with found min time
387
- checksum2instances.each do |checksum, instances|
388
- time = checksum2time[checksum]
389
- instances.each do |instance|
390
- if ((instance.modification_time <=> time) == 0)
391
- mod_db.add_instance(instance)
392
- else # must be bigger then found min time
393
- mod_instance = ContentInstance.new(instance.checksum, instance.size,
394
- instance.server_name, instance.device,
395
- instance.full_path, time)
396
- mod_db.add_instance(mod_instance)
293
+ # for each content, all time fields (content and instances) are replaced with the
294
+ # min time found, while going through all time fields.
295
+ def unify_time()
296
+ @contents_info.keys.each { |checksum|
297
+ content_info = @contents_info[checksum]
298
+ min_time_per_checksum = content_info[2]
299
+ instances = content_info[1]
300
+ instances.keys.each { |location|
301
+ instance_mod_time = instances[location]
302
+ if instance_mod_time < min_time_per_checksum
303
+ min_time_per_checksum = instance_mod_time
397
304
  end
398
- end
399
- end
400
- mod_db
305
+ }
306
+ # update all instances with min time
307
+ instances.keys.each { |location|
308
+ instances[location] = min_time_per_checksum
309
+ }
310
+ # update content time with min time
311
+ content_info[2] = min_time_per_checksum
312
+ }
401
313
  end
402
314
 
403
315
  # Validates index against file system that all instances hold a correct data regarding files
404
- # that they represrents.
316
+ # that they represents.
405
317
  #
406
318
  # There are two levels of validation, controlled by instance_check_level system parameter:
407
319
  # * shallow - quick, tests instance for file existence and attributes.
@@ -411,6 +323,7 @@ module ContentData
411
323
  # Supported key/value combinations:
412
324
  # * key is <tt>:failed</tt> value is <tt>ContentData</tt> used to return failed instances
413
325
  # @return [Boolean] true when index is correct, false otherwise
326
+ # @raise [ArgumentError] when instance_check_level is incorrect
414
327
  def validate(params = nil)
415
328
  # used to answer whether specific param was set
416
329
  param_exists = Proc.new do |param|
@@ -419,47 +332,65 @@ module ContentData
419
332
 
420
333
  # used to process method parameters centrally
421
334
  process_params = Proc.new do |values|
422
- # values is a Hash with keys: :content, :instance and value appropriate to key
423
- if param_exists.call :failed
424
- unless values[:content].nil?
425
- params[:failed].add_content values[:content]
426
- end
427
- unless values[:instance].nil?
428
- # appropriate content should be already added
429
- params[:failed].add_instance values[:instance]
335
+ if param_exists.call(:failed)
336
+ info = values[:details]
337
+ unless info.nil?
338
+ checksum = info[0]
339
+ content_mtime = info[1]
340
+ size = info[2]
341
+ inst_mtime = info[3]
342
+ server = info[4]
343
+ file_path = info[5]
344
+ params[:failed].add_instance(checksum, size, server, file_path, inst_mtime)
430
345
  end
431
346
  end
432
347
  end
433
348
 
434
349
  is_valid = true
435
- instances.each_value do |instance|
436
- unless check_instance instance
437
- is_valid = false
438
-
439
- unless params.nil? || params.empty?
440
- process_params.call :content => contents[instance.checksum], :instance => instance
350
+ @contents_info.keys.each { |checksum|
351
+ instances = @contents_info[checksum]
352
+ content_size = instances[0]
353
+ content_mtime = instances[2]
354
+ instances[1].keys.each { |unique_path|
355
+ instance_mtime = instances[1][unique_path]
356
+ instance_info = [checksum, content_mtime, content_size, instance_mtime]
357
+ instance_info.concat(unique_path)
358
+ unless check_instance(instance_info)
359
+ is_valid = false
360
+
361
+ unless params.nil? || params.empty?
362
+ process_params.call({:details => instance_info})
363
+ end
441
364
  end
442
- end
443
- end
444
-
365
+ }
366
+ }
445
367
  is_valid
446
368
  end
447
369
 
448
- def shallow_check(instance)
449
- path = instance.full_path
370
+ # instance_info is an array:
371
+ # [0] - checksum
372
+ # [1] - content time
373
+ # [2] - content size
374
+ # [3] - instance mtime
375
+ # [4] - server name
376
+ # [5] - file path
377
+ def shallow_check(instance_info)
378
+ path = instance_info[5]
379
+ size = instance_info[2]
380
+ instance_mtime = instance_info[3]
450
381
  is_valid = true
451
382
 
452
383
  if (File.exists?(path))
453
- if File.size(path) != instance.size
384
+ if File.size(path) != size
454
385
  is_valid = false
455
- err_msg = "#{path} size #{File.size(path)} differs from indexed size #{instance.size}"
386
+ err_msg = "#{path} size #{File.size(path)} differs from indexed size #{size}"
456
387
  Log.warning err_msg
457
388
  end
458
389
  #if ContentData.format_time(File.mtime(path)) != instance.modification_time
459
- if File.mtime(path).to_i != instance.modification_time.to_i
390
+ if File.mtime(path).to_i != instance_mtime
460
391
  is_valid = false
461
- err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
462
- + "indexed #{instance.modification_time}"
392
+ err_msg = "#{path} modification time #{File.mtime(path).to_i} differs from " \
393
+ + "indexed #{instance_mtime}"
463
394
  Log.warning err_msg
464
395
  end
465
396
  else
@@ -470,14 +401,22 @@ module ContentData
470
401
  is_valid
471
402
  end
472
403
 
473
- def deep_check(instance)
474
- if shallow_check(instance)
475
- path = instance.full_path
404
+ # instance_info is an array:
405
+ # [0] - checksum
406
+ # [1] - content time
407
+ # [2] - content size
408
+ # [3] - instance mtime
409
+ # [4] - server name
410
+ # [5] - file path
411
+ def deep_check(instance_info)
412
+ if shallow_check(instance_info)
413
+ instance_checksum = instance_info[0]
414
+ path = instance_info[5]
476
415
  current_checksum = FileIndexing::IndexAgent.get_checksum(path)
477
- if instance.checksum == current_checksum
416
+ if instance_checksum == current_checksum
478
417
  true
479
418
  else
480
- err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance.checksum}"
419
+ err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance_checksum}"
481
420
  Log.warning err_msg
482
421
  false
483
422
  end
@@ -486,6 +425,7 @@ module ContentData
486
425
  end
487
426
  end
488
427
 
428
+ # @raise [ArgumentError] when instance_check_level is incorrect
489
429
  def check_instance(instance)
490
430
  case Params['instance_check_level']
491
431
  when 'deep'
@@ -558,163 +498,115 @@ module ContentData
558
498
  private :shallow_check, :deep_check, :check_instance
559
499
  end
560
500
 
561
- # Validates index against file system that all instances hold a correct data regarding files
562
- # that they represrents.
563
- #
564
- # There are two levels of validation, controlled by instance_check_level system parameter:
565
- # * shallow - quick, tests instance for file existence and attributes.
566
- # * deep - can take more time, in addition to shallow recalculates hash sum.
567
- # @param [Hash] params hash of parameters of validation, can be used to return additional data.
568
- #
569
- # Supported key/value combinations:
570
- # * key is <tt>:failed</tt> value is <tt>ContentData</tt> used to return failed instances
571
- # @return [Boolean] true when index is correct, false otherwise
572
- # @raise [ArgumentError] when instance_check_level is incorrect
573
- def validate(params = nil)
574
- # used to answer whether specific param was set
575
- param_exists = Proc.new do |param|
576
- !(params.nil? || params[param].nil?)
577
- end
578
-
579
- # used to process method parameters centrally
580
- process_params = Proc.new do |values|
581
- # values is a Hash with keys: :content, :instance and value appropriate to key
582
- if param_exists.call :failed
583
- unless values[:content].nil?
584
- params[:failed].add_content values[:content]
585
- end
586
- unless values[:instance].nil?
587
- # appropriate content should be already added
588
- params[:failed].add_instance values[:instance]
589
- end
590
- end
591
- end
592
-
593
- is_valid = true
594
- instances.each_value do |instance|
595
- unless check_instance instance
596
- is_valid = false
597
-
598
- unless params.nil? || params.empty?
599
- process_params.call :content => contents[instance.checksum], :instance => instance
600
- end
601
- end
602
- end
603
-
604
- is_valid
501
+ # merges content data a and content data b to a new content data and returns it.
502
+ def self.merge(a, b)
503
+ return ContentData.new(a) if b.nil?
504
+ return ContentData.new(b) if a.nil?
505
+ c = ContentData.new(b)
506
+ # Add A instances to content data c
507
+ a.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
508
+ c.add_instance(checksum, size, server, path, instance_mod_time)
509
+ }
510
+ c
605
511
  end
606
512
 
607
- def shallow_check(instance)
608
- path = instance.full_path
609
- is_valid = true
610
-
611
- if (File.exists?(path))
612
- if File.size(path) != instance.size
613
- is_valid = false
614
- err_msg = "#{path} size #{File.size(path)} differs from indexed size #{instance.size}"
615
- Log.warning err_msg
616
- end
617
- #if ContentData.format_time(File.mtime(path)) != instance.modification_time
618
- if File.mtime(path).to_i != instance.modification_time.to_i
619
- is_valid = false
620
- err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
621
- + "indexed #{instance.modification_time}"
622
- Log.warning err_msg
623
- end
624
- else
625
- is_valid = false
626
- err_msg = "Indexed file #{path} doesn't exist"
627
- Log.warning err_msg
628
- end
629
- is_valid
513
+ def self.merge_override_b(a, b)
514
+ return ContentData.new(a) if b.nil?
515
+ return ContentData.new(b) if a.nil?
516
+ # Add A instances to content data B
517
+ a.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
518
+ b.add_instance(checksum, size, server, path, instance_mod_time)
519
+ }
520
+ b
630
521
  end
631
522
 
632
- def deep_check(instance)
633
- if shallow_check(instance)
634
- path = instance.full_path
635
- current_checksum = FileIndexing::IndexAgent.get_checksum(path)
636
- if instance.checksum == current_checksum
637
- true
638
- else
639
- err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance.checksum}"
640
- Log.warning err_msg
641
- false
642
- end
643
- else
644
- false
645
- end
523
+ # B - A : Remove contents of A from B and return the new content data.
524
+ # instances are ignored
525
+ # e.g
526
+ # A db:
527
+ # Content_1 ->
528
+ # Instance_1
529
+ # Instance_2
530
+ #
531
+ # Content_2 ->
532
+ # Instance_3
533
+ #
534
+ # B db:
535
+ # Content_1 ->
536
+ # Instance_1
537
+ # Instance_2
538
+ #
539
+ # Content_2 ->
540
+ # Instance_3
541
+ # Instance_4
542
+ # Content_3 ->
543
+ # Instance_5
544
+ # B-A db:
545
+ # Content_3 ->
546
+ # Instance_5
547
+ def self.remove(a, b)
548
+ return nil if b.nil?
549
+ return ContentData.new(b) if a.nil?
550
+ c = ContentData.new(b) # create new cloned content C from B
551
+ # remove contents of A from newly cloned content A
552
+ a.each_content { |checksum, size, content_mod_time|
553
+ c.remove_content(checksum)
554
+ }
555
+ c
646
556
  end
647
557
 
648
- # @raise [ArgumentError] when instance_check_level is incorrect
649
- def check_instance(instance)
650
- case Params['instance_check_level']
651
- when 'deep'
652
- deep_check instance
653
- when 'shallow'
654
- shallow_check instance
655
- else
656
- # TODO remove it when params will support set of values
657
- throw ArgumentError.new "Unsupported check level #{Params['instance_check_level']}"
658
- end
558
+ # B - A : Remove instances of A content from B content data B and return the new content data.
559
+ # If all instances are removed then the content record itself will be removed
560
+ # e.g
561
+ # A db:
562
+ # Content_1 ->
563
+ # Instance_1
564
+ # Instance_2
565
+ #
566
+ # Content_2 ->
567
+ # Instance_3
568
+ #
569
+ # B db:
570
+ # Content_1 ->
571
+ # Instance_1
572
+ # Instance_2
573
+ #
574
+ # Content_2 ->
575
+ # Instance_3
576
+ # Instance_4
577
+ # B-A db:
578
+ # Content_2 ->
579
+ # Instance_4
580
+ def self.remove_instances(a, b)
581
+ return nil if b.nil?
582
+ return ContentData.new(b) if a.nil?
583
+ c = ContentData.new(b) # create new cloned content C from B
584
+ # remove contents of A from newly cloned content A
585
+ a.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
586
+ location = [server, path]
587
+ c.remove_instance(location, checksum)
588
+ }
589
+ c
659
590
  end
660
591
 
661
-
662
- # TODO simplify conditions
663
- # This mehod is experimental and shouldn\'t be used
664
- # nil is used to define +/- infinity for to/from method arguments
665
- # from/to values are exlusive in condition'a calculations
666
- # Need to take care about '==' operation that is used for object's comparison.
667
- # In need of case user should define it's own '==' implemementation.
668
- def get_query(variable, params)
669
- raise RuntimeError.new 'This method is experimental and shouldn\'t be used'
670
-
671
- exact = params['exact'].nil? ? Array.new : params['exact']
672
- from = params['from']
673
- to = params ['to']
674
- is_inside = params['is_inside']
675
-
676
- unless ContentInstance.new.instance_variable_defined?("@#{attribute}")
677
- raise ArgumentError "#{variable} isn't a ContentInstance variable"
678
- end
679
-
680
- if (exact.nil? && from.nil? && to.nil?)
681
- raise ArgumentError 'At least one of the argiments {exact, from, to} must be defined'
682
- end
683
-
684
- if (!(from.nil? || to.nil?) && from.kind_of?(to.class))
685
- raise ArgumentError 'to and from arguments should be comparable one with another'
686
- end
687
-
688
- # FIXME add support for from/to for Strings
689
- if ((!from.nil? && !from.kind_of?(Numeric.new.class))\
690
- || (!to.nil? && to.kind_of?(Numeric.new.class)))
691
- raise ArgumentError 'from and to options supported only for numeric values'
692
- end
693
-
694
- if (!exact.empty? && (!from.nil? || !to.nil?))
695
- raise ArgumentError 'exact and from/to options are mutually exclusive'
696
- end
697
-
698
- result_index = ContentData.new
699
- instances.each_value do |instance|
700
- is_match = false
701
- var_value = instance.instance_variable_get("@#{variable}")
702
-
703
- if exact.include? var_value
704
- is_match = true
705
- elsif (from.nil? || var_value > from) && (to.nil? || var_value < to)
706
- is_match = true
707
- end
708
-
709
- if (is_match && is_inside) || (!is_match && !is_inside)
710
- checksum = instance.checksum
711
- result_index.add_content(contents[checksum]) unless result_index.content_exists(checksum)
712
- result_index.add_instance instance
592
+ def self.remove_directory(content_data, dir_to_remove, server_to_remove)
593
+ return nil if content_data.nil?
594
+ result_content_data = ContentData.new()
595
+ content_data.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
596
+ # Keep instance if path is not of server to remove or path does not include dir to remove
597
+ if (server_to_remove!=server) or (path.scan(dir_to_remove).size == 0)
598
+ result_content_data.add_instance(checksum.clone, size, server, path.clone, instance_mod_time)
713
599
  end
714
- end
715
- result_index
600
+ }
601
+ result_content_data
716
602
  end
717
603
 
718
- private :shallow_check, :deep_check, :check_instance, :get_query
604
+ # returns the common content in both a and b
605
+ def self.intersect(a, b)
606
+ return nil if a.nil?
607
+ return nil if b.nil?
608
+ b_minus_a = remove(a, b)
609
+ b_minus_b_minus_a = remove(b_minus_a, b)
610
+ end
719
611
  end
720
612