content_data 1.0.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/content_data/content_data.rb +394 -502
- data/lib/content_data/dynamic_content_data.rb +44 -3
- data/lib/content_data/version.rb +1 -1
- data/test/content_data/content_data_test.rb +281 -135
- metadata +2 -2
@@ -1,210 +1,253 @@
|
|
1
|
+
require 'content_server/globals'
|
1
2
|
require 'log'
|
2
3
|
require 'params'
|
3
|
-
require 'time'
|
4
4
|
|
5
5
|
module ContentData
|
6
6
|
Params.string('instance_check_level', 'shallow', 'Defines check level. Supported levels are: ' \
|
7
7
|
'shallow - quick, tests instance for file existence and attributes. ' \
|
8
8
|
'deep - can take more time, in addition to shallow recalculates hash sum.')
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
10
|
+
# Content Data(CD) object holds files information as contents and instances
|
11
|
+
# Files info retrieved from hardware: checksum, size, time modification, server, device and path
|
12
|
+
# Those attributes are divided into content and instance attributes:
|
13
|
+
# unique checksum, size are content attributes
|
14
|
+
# time modification, server, device and path are instance attributes
|
15
|
+
# The relationship between content and instances is 1:many meaning that
|
16
|
+
# a content can have instances in many servers.
|
17
|
+
# content also has time attribute, which has the value of the time of the first instance.
|
18
|
+
# This can be changed by using unify_time method which sets all time attributes for a content and it's
|
19
|
+
# instances to the min time off all.
|
20
|
+
# Different files(instances) with same content(checksum), are grouped together under that content.
|
21
|
+
# Interface methods include:
|
22
|
+
# iterate over contents and instances info,
|
23
|
+
# unify time, add/remove instance, queries, merge, remove directory and more.
|
24
|
+
# Content info data structure:
|
25
|
+
# @contents_info = { Checksum -> [size, *instances*, content_modification_time] }
|
26
|
+
# *instances* = {[server,path] -> instance_modification_time }
|
27
|
+
# Notes:
|
28
|
+
# 1. content_modification_time is the instance_modification_time of the first
|
29
|
+
# instances which was added to @contents_info
|
30
|
+
class ContentData
|
30
31
|
|
32
|
+
def initialize(other = nil)
|
33
|
+
ObjectSpace.define_finalizer(self,
|
34
|
+
self.class.method(:finalize).to_proc)
|
35
|
+
if Params['enable_monitoring']
|
36
|
+
::ContentServer::Globals.process_vars.inc('obj add ContentData')
|
37
|
+
end
|
38
|
+
if other.nil?
|
39
|
+
@contents_info = {} # Checksum --> [size, paths-->time(instance), time(content)]
|
31
40
|
else
|
32
|
-
@
|
33
|
-
@size = size
|
34
|
-
@first_appearance_time = first_appearance_time
|
41
|
+
@contents_info = other.clone_contents_info
|
35
42
|
end
|
36
43
|
end
|
37
44
|
|
38
|
-
def
|
39
|
-
|
45
|
+
def self.finalize(id)
|
46
|
+
if Params['enable_monitoring']
|
47
|
+
::ContentServer::Globals.process_vars.inc('obj rem ContentData')
|
48
|
+
end
|
40
49
|
end
|
41
50
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
51
|
+
# getting a cloned data base
|
52
|
+
def clone_contents_info
|
53
|
+
@contents_info.keys.inject({}) { |clone_contents_info, checksum|
|
54
|
+
instances = @contents_info[checksum]
|
55
|
+
size = instances[0]
|
56
|
+
content_time = instances[2]
|
57
|
+
instances_db = instances[1]
|
58
|
+
instances_db_cloned = {}
|
59
|
+
instances_db.keys.each { |location|
|
60
|
+
instance_mtime = instances_db[location]
|
61
|
+
instances_db_cloned[[location[0].clone,location[1].clone]]=instance_mtime
|
62
|
+
}
|
63
|
+
clone_contents_info[checksum] = [size,
|
64
|
+
instances_db_cloned,
|
65
|
+
content_time]
|
66
|
+
clone_contents_info
|
67
|
+
}
|
46
68
|
end
|
47
|
-
end
|
48
|
-
|
49
|
-
class ContentInstance
|
50
|
-
attr_reader :checksum, :size, :server_name, :device, :full_path, :modification_time
|
51
69
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
raise ArgumentError.new("size have to be defined")
|
61
|
-
else
|
62
|
-
@size = content_instance_serializer.size
|
63
|
-
end
|
64
|
-
if (content_instance_serializer.modification_time == nil)
|
65
|
-
raise ArgumentError.new("modification_time have to be defined")
|
66
|
-
else
|
67
|
-
@modification_time = ContentData.parse_time(content_instance_serializer.modification_time)
|
68
|
-
end
|
69
|
-
if (content_instance_serializer.server_name == nil)
|
70
|
-
raise ArgumentError.new("server_name have to be defined")
|
71
|
-
else
|
72
|
-
@server_name = content_instance_serializer.server_name
|
73
|
-
end
|
74
|
-
if (content_instance_serializer.device == nil)
|
75
|
-
raise ArgumentError.new("device have to be defined")
|
76
|
-
else
|
77
|
-
@device = content_instance_serializer.device
|
78
|
-
end
|
79
|
-
if (content_instance_serializer.full_path == nil)
|
80
|
-
raise ArgumentError.new("full_path have to be defined")
|
81
|
-
else
|
82
|
-
@full_path = content_instance_serializer.full_path
|
83
|
-
end
|
84
|
-
else
|
85
|
-
@checksum = checksum
|
86
|
-
@size = size
|
87
|
-
@server_name = server_name
|
88
|
-
@device = device
|
89
|
-
@full_path = full_path
|
90
|
-
@modification_time = modification_time
|
91
|
-
end
|
70
|
+
# iterator over @contents_info data structure (not including instances)
|
71
|
+
# block is provided with: checksum, size and content modification time
|
72
|
+
def each_content(&block)
|
73
|
+
@contents_info.keys.each { |checksum|
|
74
|
+
content_val = @contents_info[checksum]
|
75
|
+
# provide checksum, size and content modification time to the block
|
76
|
+
block.call(checksum,content_val[0], content_val[2])
|
77
|
+
}
|
92
78
|
end
|
93
79
|
|
94
|
-
|
95
|
-
|
80
|
+
# iterator over @contents_info data structure (including instances)
|
81
|
+
# block is provided with: checksum, size, content modification time,
|
82
|
+
# instance modification time, server and file path
|
83
|
+
def each_instance(&block)
|
84
|
+
@contents_info.keys.each { |checksum|
|
85
|
+
content_info = @contents_info[checksum]
|
86
|
+
content_info[1].keys.each {|location|
|
87
|
+
# provide the block with: checksum, size, content modification time,instance modification time,
|
88
|
+
# server and path.
|
89
|
+
instance_modification_time = content_info[1][location]
|
90
|
+
block.call(checksum,content_info[0], content_info[2], instance_modification_time,
|
91
|
+
location[0], location[1])
|
92
|
+
}
|
93
|
+
}
|
96
94
|
end
|
97
95
|
|
98
|
-
|
99
|
-
|
96
|
+
# iterator of instances over specific content
|
97
|
+
# block is provided with: checksum, size, content modification time,
|
98
|
+
# instance modification time, server and file path
|
99
|
+
def content_each_instance(checksum, &block)
|
100
|
+
content_info = @contents_info[checksum]
|
101
|
+
content_info[1].keys.each {|location|
|
102
|
+
# provide the block with: checksum, size, content modification time,instance modification time,
|
103
|
+
# server and path.
|
104
|
+
instance_modification_time = content_info[1][location]
|
105
|
+
block.call(checksum,content_info[0], content_info[2], instance_modification_time,
|
106
|
+
location[0], location[1])
|
107
|
+
}
|
100
108
|
end
|
101
109
|
|
102
|
-
def
|
103
|
-
|
104
|
-
@device, @full_path, ContentData.format_time(@modification_time)]
|
110
|
+
def contents_size()
|
111
|
+
@contents_info.size
|
105
112
|
end
|
106
113
|
|
107
|
-
def
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
self.device.eql? other.device and
|
112
|
-
self.full_path.eql? other.full_path and
|
113
|
-
self.modification_time.to_i.eql? other.modification_time.to_i)
|
114
|
+
def instances_size(checksum)
|
115
|
+
content_info = @contents_info[checksum]
|
116
|
+
return 0 if content_info.nil?
|
117
|
+
content_info[1].size
|
114
118
|
end
|
115
|
-
end
|
116
119
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
120
|
+
def get_instance_mod_time(checksum, location)
|
121
|
+
content_info = @contents_info[checksum]
|
122
|
+
return nil if content_info.nil?
|
123
|
+
instances = content_info[1]
|
124
|
+
instance_time = instances[location]
|
125
|
+
end
|
123
126
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
@
|
127
|
+
def add_instance(checksum, size, server, path, modification_time)
|
128
|
+
location = [server, path]
|
129
|
+
content_info = @contents_info[checksum]
|
130
|
+
if content_info.nil?
|
131
|
+
@contents_info[checksum] = [size,
|
132
|
+
{location => modification_time},
|
133
|
+
modification_time]
|
129
134
|
else
|
130
|
-
|
131
|
-
|
132
|
-
|
135
|
+
if size != content_info[0]
|
136
|
+
Log.warning 'File size different from content size while same checksum'
|
137
|
+
Log.warning("instance location:server:'#{location[0]}' path:'#{location[1]}'")
|
138
|
+
Log.warning("instance mod time:'#{modification_time}'")
|
139
|
+
end
|
140
|
+
#override file if needed
|
141
|
+
content_info[0] = size
|
142
|
+
instances = content_info[1]
|
143
|
+
instances[location] = modification_time
|
133
144
|
end
|
134
145
|
end
|
135
146
|
|
136
|
-
def
|
137
|
-
@
|
147
|
+
def empty?
|
148
|
+
@contents_info.empty?
|
138
149
|
end
|
139
150
|
|
140
|
-
def
|
141
|
-
|
142
|
-
|
143
|
-
" checksum %s does not exists.\n", instance.checksum)
|
144
|
-
Log.warning sprintf("%s\n", instance.to_s)
|
145
|
-
return false
|
146
|
-
elsif (@contents[instance.checksum].size != instance.size)
|
147
|
-
Log.warning 'File size different from content size while same checksum'
|
148
|
-
Log.warning instance.to_s
|
149
|
-
return false
|
150
|
-
end
|
151
|
+
def content_exists(checksum)
|
152
|
+
@contents_info.has_key?(checksum)
|
153
|
+
end
|
151
154
|
|
152
|
-
key = instance.global_path
|
153
155
|
|
154
|
-
|
155
|
-
|
156
|
+
# TODO (genadyp) consider about using hash for optional defining of parameters
|
157
|
+
def instance_exists(path, server, checksum=nil)
|
158
|
+
location = [server, path]
|
159
|
+
if checksum.nil?
|
160
|
+
@contents_info.values.any? { |content_db|
|
161
|
+
content_db[1].has_key?(location)
|
162
|
+
}
|
163
|
+
else
|
164
|
+
content_info = @contents_info[checksum]
|
165
|
+
return false if content_info.nil?
|
166
|
+
content_info[1].has_key?(location)
|
167
|
+
end
|
156
168
|
end
|
157
169
|
|
158
|
-
def
|
159
|
-
@
|
170
|
+
def stats_by_location(location)
|
171
|
+
@contents_info.each_value { |content_db|
|
172
|
+
if content_db[1].has_key?(location)
|
173
|
+
return [content_db[0], content_db[1][location]]
|
174
|
+
end
|
175
|
+
}
|
176
|
+
return nil
|
160
177
|
end
|
161
178
|
|
162
|
-
|
163
|
-
|
164
|
-
|
179
|
+
|
180
|
+
# removes an instance from known content (faster then unknown content)
|
181
|
+
# remove also the content, if content becomes empty
|
182
|
+
def remove_instance(location, checksum=nil)
|
183
|
+
if checksum.nil?
|
184
|
+
@contents_info.keys.each { |checksum|
|
185
|
+
instances = @contents_info[checksum][1]
|
186
|
+
instances.delete(location)
|
187
|
+
@contents_info.delete(checksum) if instances.empty?
|
188
|
+
}
|
189
|
+
else
|
190
|
+
content_info = @contents_info[checksum]
|
191
|
+
unless content_info.nil?
|
192
|
+
instances = content_info[1]
|
193
|
+
instances.delete(location)
|
194
|
+
@contents_info.delete(checksum) if instances.empty?
|
195
|
+
end
|
196
|
+
end
|
165
197
|
end
|
166
198
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
199
|
+
def remove_directory(dir_to_remove, server)
|
200
|
+
@contents_info.keys.each { |checksum|
|
201
|
+
instances = @contents_info[checksum][1]
|
202
|
+
instances.delete_if { |location, _|
|
203
|
+
location[0] == server and location[1].scan(dir_to_remove).size > 0
|
204
|
+
}
|
205
|
+
@contents_info.delete(checksum) if instances.empty?
|
174
206
|
}
|
175
207
|
end
|
176
208
|
|
209
|
+
|
177
210
|
def ==(other)
|
178
|
-
return false if other
|
179
|
-
return false
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
if
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
end
|
211
|
+
return false if other.nil?
|
212
|
+
return false if @contents_info.size != other.contents_size
|
213
|
+
other.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
214
|
+
local_content_info = @contents_info[checksum]
|
215
|
+
return false if local_content_info.nil?
|
216
|
+
return false if local_content_info[0] != size
|
217
|
+
return false if local_content_info[2] != content_mod_time
|
218
|
+
#check instances
|
219
|
+
local_instances = local_content_info[1]
|
220
|
+
return false if other.instances_size(checksum) != local_instances.size
|
221
|
+
location = [server, path]
|
222
|
+
local_instance_mod_time = local_instances[location]
|
223
|
+
return false if local_instance_mod_time.nil?
|
224
|
+
return false if local_instance_mod_time != instance_mod_time
|
193
225
|
}
|
194
|
-
|
226
|
+
true
|
227
|
+
end
|
228
|
+
|
229
|
+
def remove_content(checksum)
|
230
|
+
@contents_info.delete(checksum)
|
195
231
|
end
|
196
232
|
|
197
233
|
def to_s
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
234
|
+
return_str = ""
|
235
|
+
contents_str = ""
|
236
|
+
instances_str = ""
|
237
|
+
instances_counter = 0
|
238
|
+
each_content { |checksum, size, content_mod_time|
|
239
|
+
contents_str << "%s,%d,%d\n" % [checksum, size, content_mod_time]
|
202
240
|
}
|
203
|
-
|
204
|
-
|
205
|
-
|
241
|
+
instances_counter = 0
|
242
|
+
each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
243
|
+
instances_counter += 1
|
244
|
+
instances_str << "%s,%d,%s,%s,%d\n" % [checksum, size, server, path, instance_mod_time]
|
206
245
|
}
|
207
|
-
|
246
|
+
return_str << "%d\n" % [@contents_info.size]
|
247
|
+
return_str << contents_str
|
248
|
+
return_str << "%d\n" % [instances_counter]
|
249
|
+
return_str << instances_str
|
250
|
+
return_str
|
208
251
|
end
|
209
252
|
|
210
253
|
def to_file(filename)
|
@@ -216,192 +259,61 @@ module ContentData
|
|
216
259
|
# TODO validation that file indeed contains ContentData missing
|
217
260
|
def from_file(filename)
|
218
261
|
lines = IO.readlines(filename)
|
219
|
-
|
220
|
-
|
221
|
-
i += 1
|
222
|
-
number_of_contents.times {
|
223
|
-
parameters = lines[i].split(",")
|
224
|
-
add_content(Content.new(parameters[0],
|
225
|
-
parameters[1].to_i,
|
226
|
-
ContentData.parse_time(parameters[2])))
|
227
|
-
i += 1
|
228
|
-
}
|
229
|
-
|
262
|
+
number_of_contents = lines[0].to_i
|
263
|
+
i = 1 + number_of_contents
|
230
264
|
number_of_instances = lines[i].to_i
|
231
265
|
i += 1
|
232
266
|
number_of_instances.times {
|
233
267
|
if lines[i].nil?
|
234
|
-
Log.
|
268
|
+
Log.warning "line ##{i} is nil !!!, Backing filename: #{filename} to #{filename}.bad"
|
235
269
|
FileUtils.cp(filename, "#{filename}.bad")
|
236
|
-
Log.
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
270
|
+
Log.warning("Lines:\n#{lines[i].join("\n")}")
|
271
|
+
else
|
272
|
+
parameters = lines[i].split(',')
|
273
|
+
# bugfix: if file name consist a comma then parsing based on comma separating fails
|
274
|
+
if (parameters.size > 5)
|
275
|
+
(4..parameters.size-2).each do |i|
|
276
|
+
parameters[3] = [parameters[3], parameters[i]].join(",")
|
277
|
+
end
|
278
|
+
(4..parameters.size-2).each do |i|
|
279
|
+
parameters.delete_at(4)
|
280
|
+
end
|
246
281
|
end
|
247
|
-
end
|
248
282
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
283
|
+
add_instance(parameters[0],
|
284
|
+
parameters[1].to_i,
|
285
|
+
parameters[2],
|
286
|
+
parameters[3],
|
287
|
+
parameters[4].to_i)
|
288
|
+
end
|
255
289
|
i += 1
|
256
290
|
}
|
257
291
|
end
|
258
292
|
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
# merges content data a and content data b to a new content data and returns it.
|
272
|
-
def self.merge(a, b)
|
273
|
-
return b unless not a.nil?
|
274
|
-
return a unless not b.nil?
|
275
|
-
|
276
|
-
return nil unless a.instance_of?ContentData
|
277
|
-
return nil unless b.instance_of?ContentData
|
278
|
-
|
279
|
-
ret = ContentData.new
|
280
|
-
ret.merge(a)
|
281
|
-
ret.merge(b)
|
282
|
-
|
283
|
-
return ret
|
284
|
-
end
|
285
|
-
|
286
|
-
# removed content data a from content data b and returns the new content data.
|
287
|
-
def self.remove(a, b)
|
288
|
-
return nil unless a.instance_of?ContentData
|
289
|
-
return nil unless b.instance_of?ContentData
|
290
|
-
|
291
|
-
ret = ContentData.new
|
292
|
-
|
293
|
-
b.contents.values.each { |content|
|
294
|
-
#print "%s - %s\n" % [content.checksum, a.content_exists(content.checksum).to_s]
|
295
|
-
ret.add_content(content) unless a.content_exists(content.checksum)
|
296
|
-
}
|
297
|
-
|
298
|
-
#Log.debug1 "kaka"
|
299
|
-
|
300
|
-
b.instances.values.each { |instance|
|
301
|
-
#print "%s - %s\n" % [instance.checksum, a.content_exists(instance.checksum).to_s]
|
302
|
-
ret.add_instance(instance) unless a.content_exists(instance.checksum)
|
303
|
-
}
|
304
|
-
|
305
|
-
#print "kuku %s" % ret.contents.size.to_s
|
306
|
-
#print "kuku %s" % ret.instances.size.to_s
|
307
|
-
return ret
|
308
|
-
end
|
309
|
-
|
310
|
-
def self.remove_instances(a, b)
|
311
|
-
return nil unless a.instance_of?ContentData
|
312
|
-
return nil unless b.instance_of?ContentData
|
313
|
-
|
314
|
-
ret = ContentData.new
|
315
|
-
b.instances.values.each do |instance|
|
316
|
-
if !a.instances.key?(instance.global_path)
|
317
|
-
ret.add_content(b.contents[instance.checksum])
|
318
|
-
ret.add_instance(instance)
|
319
|
-
end
|
320
|
-
end
|
321
|
-
return ret
|
322
|
-
end
|
323
|
-
|
324
|
-
def self.remove_directory(cd, global_dir_path)
|
325
|
-
return nil unless cd.instance_of?ContentData
|
326
|
-
|
327
|
-
ret = ContentData.new
|
328
|
-
cd.instances.values.each do |instance|
|
329
|
-
if instance.global_path.scan(global_dir_path).size == 0
|
330
|
-
ret.add_content(cd.contents[instance.checksum])
|
331
|
-
ret.add_instance(instance)
|
332
|
-
end
|
333
|
-
end
|
334
|
-
return ret
|
335
|
-
end
|
336
|
-
|
337
|
-
# returns the common content in both a and b
|
338
|
-
def self.intersect(a, b)
|
339
|
-
b_minus_a = ContentData.remove(a, b)
|
340
|
-
return ContentData.remove(b_minus_a, b)
|
341
|
-
end
|
342
|
-
|
343
|
-
# unify time for all entries with same content to minimal time
|
344
|
-
def self.unify_time(db)
|
345
|
-
mod_db = ContentData.new # resulting ContentData that will consists objects with unified time
|
346
|
-
checksum2time = Hash.new # key=checksum value=min_time_for_this_checksum
|
347
|
-
checksum2instances = Hash.new # key=checksum value=array_of_instances_with_this_checksum (Will be replaced with ContentData method)
|
348
|
-
|
349
|
-
# populate tables with given ContentData entries
|
350
|
-
db.instances.each_value do |instance|
|
351
|
-
checksum = instance.checksum
|
352
|
-
time = instance.modification_time
|
353
|
-
|
354
|
-
unless (checksum2instances.has_key? checksum)
|
355
|
-
checksum2instances[checksum] = []
|
356
|
-
end
|
357
|
-
checksum2instances[checksum] << instance
|
358
|
-
|
359
|
-
if (not checksum2time.has_key? checksum)
|
360
|
-
checksum2time[checksum] = time
|
361
|
-
elsif ((checksum2time[checksum] <=> time) > 0)
|
362
|
-
checksum2time[checksum] = time
|
363
|
-
end
|
364
|
-
end
|
365
|
-
|
366
|
-
# update min time table with time information from contents
|
367
|
-
db.contents.each do |checksum, content|
|
368
|
-
time = content.first_appearance_time
|
369
|
-
if (not checksum2time.has_key? checksum)
|
370
|
-
checksum2time[checksum] = time
|
371
|
-
elsif ((checksum2time[checksum] <=> time) > 0)
|
372
|
-
checksum2time[checksum] = time
|
373
|
-
end
|
374
|
-
end
|
375
|
-
|
376
|
-
# add content entries to the output table. in need of case update time field with found min time
|
377
|
-
db.contents.each do |checksum, content|
|
378
|
-
time = checksum2time[checksum]
|
379
|
-
if ((content.first_appearance_time <=> time) == 0)
|
380
|
-
mod_db.add_content(content)
|
381
|
-
else
|
382
|
-
mod_db.add_content(Content.new(checksum, content.size, time))
|
383
|
-
end
|
384
|
-
end
|
385
|
-
|
386
|
-
# add instance entries to the output table. in need of case update time field with found min time
|
387
|
-
checksum2instances.each do |checksum, instances|
|
388
|
-
time = checksum2time[checksum]
|
389
|
-
instances.each do |instance|
|
390
|
-
if ((instance.modification_time <=> time) == 0)
|
391
|
-
mod_db.add_instance(instance)
|
392
|
-
else # must be bigger then found min time
|
393
|
-
mod_instance = ContentInstance.new(instance.checksum, instance.size,
|
394
|
-
instance.server_name, instance.device,
|
395
|
-
instance.full_path, time)
|
396
|
-
mod_db.add_instance(mod_instance)
|
293
|
+
# for each content, all time fields (content and instances) are replaced with the
|
294
|
+
# min time found, while going through all time fields.
|
295
|
+
def unify_time()
|
296
|
+
@contents_info.keys.each { |checksum|
|
297
|
+
content_info = @contents_info[checksum]
|
298
|
+
min_time_per_checksum = content_info[2]
|
299
|
+
instances = content_info[1]
|
300
|
+
instances.keys.each { |location|
|
301
|
+
instance_mod_time = instances[location]
|
302
|
+
if instance_mod_time < min_time_per_checksum
|
303
|
+
min_time_per_checksum = instance_mod_time
|
397
304
|
end
|
398
|
-
|
399
|
-
|
400
|
-
|
305
|
+
}
|
306
|
+
# update all instances with min time
|
307
|
+
instances.keys.each { |location|
|
308
|
+
instances[location] = min_time_per_checksum
|
309
|
+
}
|
310
|
+
# update content time with min time
|
311
|
+
content_info[2] = min_time_per_checksum
|
312
|
+
}
|
401
313
|
end
|
402
314
|
|
403
315
|
# Validates index against file system that all instances hold a correct data regarding files
|
404
|
-
# that they
|
316
|
+
# that they represents.
|
405
317
|
#
|
406
318
|
# There are two levels of validation, controlled by instance_check_level system parameter:
|
407
319
|
# * shallow - quick, tests instance for file existence and attributes.
|
@@ -411,6 +323,7 @@ module ContentData
|
|
411
323
|
# Supported key/value combinations:
|
412
324
|
# * key is <tt>:failed</tt> value is <tt>ContentData</tt> used to return failed instances
|
413
325
|
# @return [Boolean] true when index is correct, false otherwise
|
326
|
+
# @raise [ArgumentError] when instance_check_level is incorrect
|
414
327
|
def validate(params = nil)
|
415
328
|
# used to answer whether specific param was set
|
416
329
|
param_exists = Proc.new do |param|
|
@@ -419,47 +332,65 @@ module ContentData
|
|
419
332
|
|
420
333
|
# used to process method parameters centrally
|
421
334
|
process_params = Proc.new do |values|
|
422
|
-
|
423
|
-
|
424
|
-
unless
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
335
|
+
if param_exists.call(:failed)
|
336
|
+
info = values[:details]
|
337
|
+
unless info.nil?
|
338
|
+
checksum = info[0]
|
339
|
+
content_mtime = info[1]
|
340
|
+
size = info[2]
|
341
|
+
inst_mtime = info[3]
|
342
|
+
server = info[4]
|
343
|
+
file_path = info[5]
|
344
|
+
params[:failed].add_instance(checksum, size, server, file_path, inst_mtime)
|
430
345
|
end
|
431
346
|
end
|
432
347
|
end
|
433
348
|
|
434
349
|
is_valid = true
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
350
|
+
@contents_info.keys.each { |checksum|
|
351
|
+
instances = @contents_info[checksum]
|
352
|
+
content_size = instances[0]
|
353
|
+
content_mtime = instances[2]
|
354
|
+
instances[1].keys.each { |unique_path|
|
355
|
+
instance_mtime = instances[1][unique_path]
|
356
|
+
instance_info = [checksum, content_mtime, content_size, instance_mtime]
|
357
|
+
instance_info.concat(unique_path)
|
358
|
+
unless check_instance(instance_info)
|
359
|
+
is_valid = false
|
360
|
+
|
361
|
+
unless params.nil? || params.empty?
|
362
|
+
process_params.call({:details => instance_info})
|
363
|
+
end
|
441
364
|
end
|
442
|
-
|
443
|
-
|
444
|
-
|
365
|
+
}
|
366
|
+
}
|
445
367
|
is_valid
|
446
368
|
end
|
447
369
|
|
448
|
-
|
449
|
-
|
370
|
+
# instance_info is an array:
|
371
|
+
# [0] - checksum
|
372
|
+
# [1] - content time
|
373
|
+
# [2] - content size
|
374
|
+
# [3] - instance mtime
|
375
|
+
# [4] - server name
|
376
|
+
# [5] - file path
|
377
|
+
def shallow_check(instance_info)
|
378
|
+
path = instance_info[5]
|
379
|
+
size = instance_info[2]
|
380
|
+
instance_mtime = instance_info[3]
|
450
381
|
is_valid = true
|
451
382
|
|
452
383
|
if (File.exists?(path))
|
453
|
-
if File.size(path) !=
|
384
|
+
if File.size(path) != size
|
454
385
|
is_valid = false
|
455
|
-
err_msg = "#{path} size #{File.size(path)} differs from indexed size #{
|
386
|
+
err_msg = "#{path} size #{File.size(path)} differs from indexed size #{size}"
|
456
387
|
Log.warning err_msg
|
457
388
|
end
|
458
389
|
#if ContentData.format_time(File.mtime(path)) != instance.modification_time
|
459
|
-
if File.mtime(path).to_i !=
|
390
|
+
if File.mtime(path).to_i != instance_mtime
|
460
391
|
is_valid = false
|
461
|
-
err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
|
462
|
-
|
392
|
+
err_msg = "#{path} modification time #{File.mtime(path).to_i} differs from " \
|
393
|
+
+ "indexed #{instance_mtime}"
|
463
394
|
Log.warning err_msg
|
464
395
|
end
|
465
396
|
else
|
@@ -470,14 +401,22 @@ module ContentData
|
|
470
401
|
is_valid
|
471
402
|
end
|
472
403
|
|
473
|
-
|
474
|
-
|
475
|
-
|
404
|
+
# instance_info is an array:
|
405
|
+
# [0] - checksum
|
406
|
+
# [1] - content time
|
407
|
+
# [2] - content size
|
408
|
+
# [3] - instance mtime
|
409
|
+
# [4] - server name
|
410
|
+
# [5] - file path
|
411
|
+
def deep_check(instance_info)
|
412
|
+
if shallow_check(instance_info)
|
413
|
+
instance_checksum = instance_info[0]
|
414
|
+
path = instance_info[5]
|
476
415
|
current_checksum = FileIndexing::IndexAgent.get_checksum(path)
|
477
|
-
if
|
416
|
+
if instance_checksum == current_checksum
|
478
417
|
true
|
479
418
|
else
|
480
|
-
err_msg = "#{path} checksum #{current_checksum} differs from indexed #{
|
419
|
+
err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance_checksum}"
|
481
420
|
Log.warning err_msg
|
482
421
|
false
|
483
422
|
end
|
@@ -486,6 +425,7 @@ module ContentData
|
|
486
425
|
end
|
487
426
|
end
|
488
427
|
|
428
|
+
# @raise [ArgumentError] when instance_check_level is incorrect
|
489
429
|
def check_instance(instance)
|
490
430
|
case Params['instance_check_level']
|
491
431
|
when 'deep'
|
@@ -558,163 +498,115 @@ module ContentData
|
|
558
498
|
private :shallow_check, :deep_check, :check_instance
|
559
499
|
end
|
560
500
|
|
561
|
-
#
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
# @return [Boolean] true when index is correct, false otherwise
|
572
|
-
# @raise [ArgumentError] when instance_check_level is incorrect
|
573
|
-
def validate(params = nil)
|
574
|
-
# used to answer whether specific param was set
|
575
|
-
param_exists = Proc.new do |param|
|
576
|
-
!(params.nil? || params[param].nil?)
|
577
|
-
end
|
578
|
-
|
579
|
-
# used to process method parameters centrally
|
580
|
-
process_params = Proc.new do |values|
|
581
|
-
# values is a Hash with keys: :content, :instance and value appropriate to key
|
582
|
-
if param_exists.call :failed
|
583
|
-
unless values[:content].nil?
|
584
|
-
params[:failed].add_content values[:content]
|
585
|
-
end
|
586
|
-
unless values[:instance].nil?
|
587
|
-
# appropriate content should be already added
|
588
|
-
params[:failed].add_instance values[:instance]
|
589
|
-
end
|
590
|
-
end
|
591
|
-
end
|
592
|
-
|
593
|
-
is_valid = true
|
594
|
-
instances.each_value do |instance|
|
595
|
-
unless check_instance instance
|
596
|
-
is_valid = false
|
597
|
-
|
598
|
-
unless params.nil? || params.empty?
|
599
|
-
process_params.call :content => contents[instance.checksum], :instance => instance
|
600
|
-
end
|
601
|
-
end
|
602
|
-
end
|
603
|
-
|
604
|
-
is_valid
|
501
|
+
# merges content data a and content data b to a new content data and returns it.
|
502
|
+
def self.merge(a, b)
|
503
|
+
return ContentData.new(a) if b.nil?
|
504
|
+
return ContentData.new(b) if a.nil?
|
505
|
+
c = ContentData.new(b)
|
506
|
+
# Add A instances to content data c
|
507
|
+
a.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
508
|
+
c.add_instance(checksum, size, server, path, instance_mod_time)
|
509
|
+
}
|
510
|
+
c
|
605
511
|
end
|
606
512
|
|
607
|
-
def
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
Log.warning err_msg
|
616
|
-
end
|
617
|
-
#if ContentData.format_time(File.mtime(path)) != instance.modification_time
|
618
|
-
if File.mtime(path).to_i != instance.modification_time.to_i
|
619
|
-
is_valid = false
|
620
|
-
err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
|
621
|
-
+ "indexed #{instance.modification_time}"
|
622
|
-
Log.warning err_msg
|
623
|
-
end
|
624
|
-
else
|
625
|
-
is_valid = false
|
626
|
-
err_msg = "Indexed file #{path} doesn't exist"
|
627
|
-
Log.warning err_msg
|
628
|
-
end
|
629
|
-
is_valid
|
513
|
+
def self.merge_override_b(a, b)
|
514
|
+
return ContentData.new(a) if b.nil?
|
515
|
+
return ContentData.new(b) if a.nil?
|
516
|
+
# Add A instances to content data B
|
517
|
+
a.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
518
|
+
b.add_instance(checksum, size, server, path, instance_mod_time)
|
519
|
+
}
|
520
|
+
b
|
630
521
|
end
|
631
522
|
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
523
|
+
# B - A : Remove contents of A from B and return the new content data.
|
524
|
+
# instances are ignored
|
525
|
+
# e.g
|
526
|
+
# A db:
|
527
|
+
# Content_1 ->
|
528
|
+
# Instance_1
|
529
|
+
# Instance_2
|
530
|
+
#
|
531
|
+
# Content_2 ->
|
532
|
+
# Instance_3
|
533
|
+
#
|
534
|
+
# B db:
|
535
|
+
# Content_1 ->
|
536
|
+
# Instance_1
|
537
|
+
# Instance_2
|
538
|
+
#
|
539
|
+
# Content_2 ->
|
540
|
+
# Instance_3
|
541
|
+
# Instance_4
|
542
|
+
# Content_3 ->
|
543
|
+
# Instance_5
|
544
|
+
# B-A db:
|
545
|
+
# Content_3 ->
|
546
|
+
# Instance_5
|
547
|
+
def self.remove(a, b)
|
548
|
+
return nil if b.nil?
|
549
|
+
return ContentData.new(b) if a.nil?
|
550
|
+
c = ContentData.new(b) # create new cloned content C from B
|
551
|
+
# remove contents of A from newly cloned content A
|
552
|
+
a.each_content { |checksum, size, content_mod_time|
|
553
|
+
c.remove_content(checksum)
|
554
|
+
}
|
555
|
+
c
|
646
556
|
end
|
647
557
|
|
648
|
-
#
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
558
|
+
# B - A : Remove instances of A content from B content data B and return the new content data.
|
559
|
+
# If all instances are removed then the content record itself will be removed
|
560
|
+
# e.g
|
561
|
+
# A db:
|
562
|
+
# Content_1 ->
|
563
|
+
# Instance_1
|
564
|
+
# Instance_2
|
565
|
+
#
|
566
|
+
# Content_2 ->
|
567
|
+
# Instance_3
|
568
|
+
#
|
569
|
+
# B db:
|
570
|
+
# Content_1 ->
|
571
|
+
# Instance_1
|
572
|
+
# Instance_2
|
573
|
+
#
|
574
|
+
# Content_2 ->
|
575
|
+
# Instance_3
|
576
|
+
# Instance_4
|
577
|
+
# B-A db:
|
578
|
+
# Content_2 ->
|
579
|
+
# Instance_4
|
580
|
+
def self.remove_instances(a, b)
|
581
|
+
return nil if b.nil?
|
582
|
+
return ContentData.new(b) if a.nil?
|
583
|
+
c = ContentData.new(b) # create new cloned content C from B
|
584
|
+
# remove contents of A from newly cloned content A
|
585
|
+
a.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
586
|
+
location = [server, path]
|
587
|
+
c.remove_instance(location, checksum)
|
588
|
+
}
|
589
|
+
c
|
659
590
|
end
|
660
591
|
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
def get_query(variable, params)
|
669
|
-
raise RuntimeError.new 'This method is experimental and shouldn\'t be used'
|
670
|
-
|
671
|
-
exact = params['exact'].nil? ? Array.new : params['exact']
|
672
|
-
from = params['from']
|
673
|
-
to = params ['to']
|
674
|
-
is_inside = params['is_inside']
|
675
|
-
|
676
|
-
unless ContentInstance.new.instance_variable_defined?("@#{attribute}")
|
677
|
-
raise ArgumentError "#{variable} isn't a ContentInstance variable"
|
678
|
-
end
|
679
|
-
|
680
|
-
if (exact.nil? && from.nil? && to.nil?)
|
681
|
-
raise ArgumentError 'At least one of the argiments {exact, from, to} must be defined'
|
682
|
-
end
|
683
|
-
|
684
|
-
if (!(from.nil? || to.nil?) && from.kind_of?(to.class))
|
685
|
-
raise ArgumentError 'to and from arguments should be comparable one with another'
|
686
|
-
end
|
687
|
-
|
688
|
-
# FIXME add support for from/to for Strings
|
689
|
-
if ((!from.nil? && !from.kind_of?(Numeric.new.class))\
|
690
|
-
|| (!to.nil? && to.kind_of?(Numeric.new.class)))
|
691
|
-
raise ArgumentError 'from and to options supported only for numeric values'
|
692
|
-
end
|
693
|
-
|
694
|
-
if (!exact.empty? && (!from.nil? || !to.nil?))
|
695
|
-
raise ArgumentError 'exact and from/to options are mutually exclusive'
|
696
|
-
end
|
697
|
-
|
698
|
-
result_index = ContentData.new
|
699
|
-
instances.each_value do |instance|
|
700
|
-
is_match = false
|
701
|
-
var_value = instance.instance_variable_get("@#{variable}")
|
702
|
-
|
703
|
-
if exact.include? var_value
|
704
|
-
is_match = true
|
705
|
-
elsif (from.nil? || var_value > from) && (to.nil? || var_value < to)
|
706
|
-
is_match = true
|
707
|
-
end
|
708
|
-
|
709
|
-
if (is_match && is_inside) || (!is_match && !is_inside)
|
710
|
-
checksum = instance.checksum
|
711
|
-
result_index.add_content(contents[checksum]) unless result_index.content_exists(checksum)
|
712
|
-
result_index.add_instance instance
|
592
|
+
def self.remove_directory(content_data, dir_to_remove, server_to_remove)
|
593
|
+
return nil if content_data.nil?
|
594
|
+
result_content_data = ContentData.new()
|
595
|
+
content_data.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
596
|
+
# Keep instance if path is not of server to remove or path does not include dir to remove
|
597
|
+
if (server_to_remove!=server) or (path.scan(dir_to_remove).size == 0)
|
598
|
+
result_content_data.add_instance(checksum.clone, size, server, path.clone, instance_mod_time)
|
713
599
|
end
|
714
|
-
|
715
|
-
|
600
|
+
}
|
601
|
+
result_content_data
|
716
602
|
end
|
717
603
|
|
718
|
-
|
604
|
+
# returns the common content in both a and b
|
605
|
+
def self.intersect(a, b)
|
606
|
+
return nil if a.nil?
|
607
|
+
return nil if b.nil?
|
608
|
+
b_minus_a = remove(a, b)
|
609
|
+
b_minus_b_minus_a = remove(b_minus_a, b)
|
610
|
+
end
|
719
611
|
end
|
720
612
|
|