content_data 0.0.9 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/content_data/content_data.rb +637 -312
- data/lib/content_data/dynamic_content_data.rb +42 -43
- data/lib/content_data/version.rb +2 -4
- data/lib/content_data.rb +1 -3
- data/test/content_data/content_data_test.rb +137 -138
- metadata +5 -5
@@ -2,397 +2,722 @@ require 'log'
|
|
2
2
|
require 'params'
|
3
3
|
require 'time'
|
4
4
|
|
5
|
-
module
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
18
|
-
if (content_serializer.size == nil)
|
19
|
-
raise ArgumentError.new("size have to be defined")
|
20
|
-
else
|
21
|
-
@size = content_serializer.size
|
22
|
-
end
|
23
|
-
if (content_serializer.first_appearance_time == nil)
|
24
|
-
raise ArgumentError.new("first_appearance_time have to be defined")
|
25
|
-
else
|
26
|
-
@first_appearance_time = ContentData.parse_time(content_serializer.first_appearance_time)
|
27
|
-
end
|
28
|
-
|
5
|
+
module ContentData
|
6
|
+
Params.string('instance_check_level', 'shallow', 'Defines check level. Supported levels are: ' \
|
7
|
+
'shallow - quick, tests instance for file existence and attributes. ' \
|
8
|
+
'deep - can take more time, in addition to shallow recalculates hash sum.')
|
9
|
+
|
10
|
+
class Content
|
11
|
+
attr_reader :checksum, :size, :first_appearance_time
|
12
|
+
|
13
|
+
def initialize(checksum, size, first_appearance_time, content_serializer = nil)
|
14
|
+
if content_serializer != nil
|
15
|
+
if (content_serializer.checksum == nil)
|
16
|
+
raise ArgumentError.new("checksum have to be defined")
|
29
17
|
else
|
30
|
-
@checksum = checksum
|
31
|
-
|
32
|
-
|
18
|
+
@checksum = content_serializer.checksum
|
19
|
+
end
|
20
|
+
if (content_serializer.size == nil)
|
21
|
+
raise ArgumentError.new("size have to be defined")
|
22
|
+
else
|
23
|
+
@size = content_serializer.size
|
24
|
+
end
|
25
|
+
if (content_serializer.first_appearance_time == nil)
|
26
|
+
raise ArgumentError.new("first_appearance_time have to be defined")
|
27
|
+
else
|
28
|
+
@first_appearance_time = ContentData.parse_time(content_serializer.first_appearance_time)
|
33
29
|
end
|
34
|
-
end
|
35
30
|
|
36
|
-
|
37
|
-
|
31
|
+
else
|
32
|
+
@checksum = checksum
|
33
|
+
@size = size
|
34
|
+
@first_appearance_time = first_appearance_time
|
38
35
|
end
|
36
|
+
end
|
39
37
|
|
40
|
-
|
41
|
-
|
42
|
-
self.size.eql? other.size and
|
43
|
-
self.first_appearance_time.to_i.eql? other.first_appearance_time.to_i)
|
44
|
-
end
|
38
|
+
def to_s
|
39
|
+
"%s,%d,%s" % [@checksum, @size, ContentData.format_time(@first_appearance_time)]
|
45
40
|
end
|
46
41
|
|
47
|
-
|
48
|
-
|
42
|
+
def ==(other)
|
43
|
+
return (self.checksum.eql? other.checksum and
|
44
|
+
self.size.eql? other.size and
|
45
|
+
self.first_appearance_time.to_i.eql? other.first_appearance_time.to_i)
|
46
|
+
end
|
47
|
+
end
|
49
48
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
if (content_instance_serializer.modification_time == nil)
|
63
|
-
raise ArgumentError.new("modification_time have to be defined")
|
64
|
-
else
|
65
|
-
@modification_time = ContentData.parse_time(content_instance_serializer.modification_time)
|
66
|
-
end
|
67
|
-
if (content_instance_serializer.server_name == nil)
|
68
|
-
raise ArgumentError.new("server_name have to be defined")
|
69
|
-
else
|
70
|
-
@server_name = content_instance_serializer.server_name
|
71
|
-
end
|
72
|
-
if (content_instance_serializer.device == nil)
|
73
|
-
raise ArgumentError.new("device have to be defined")
|
74
|
-
else
|
75
|
-
@device = content_instance_serializer.device
|
76
|
-
end
|
77
|
-
if (content_instance_serializer.full_path == nil)
|
78
|
-
raise ArgumentError.new("full_path have to be defined")
|
79
|
-
else
|
80
|
-
@full_path = content_instance_serializer.full_path
|
81
|
-
end
|
49
|
+
class ContentInstance
|
50
|
+
attr_reader :checksum, :size, :server_name, :device, :full_path, :modification_time
|
51
|
+
|
52
|
+
def initialize(checksum, size, server_name, device, full_path, modification_time, content_instance_serializer = nil)
|
53
|
+
if content_instance_serializer != nil
|
54
|
+
if (content_instance_serializer.checksum == nil)
|
55
|
+
raise ArgumentError.new("checksum have to be defined")
|
56
|
+
else
|
57
|
+
@checksum = content_instance_serializer.checksum
|
58
|
+
end
|
59
|
+
if (content_instance_serializer.size == nil)
|
60
|
+
raise ArgumentError.new("size have to be defined")
|
82
61
|
else
|
83
|
-
@
|
84
|
-
@size = size
|
85
|
-
@server_name = server_name
|
86
|
-
@device = device
|
87
|
-
@full_path = full_path
|
88
|
-
@modification_time = modification_time
|
62
|
+
@size = content_instance_serializer.size
|
89
63
|
end
|
64
|
+
if (content_instance_serializer.modification_time == nil)
|
65
|
+
raise ArgumentError.new("modification_time have to be defined")
|
66
|
+
else
|
67
|
+
@modification_time = ContentData.parse_time(content_instance_serializer.modification_time)
|
68
|
+
end
|
69
|
+
if (content_instance_serializer.server_name == nil)
|
70
|
+
raise ArgumentError.new("server_name have to be defined")
|
71
|
+
else
|
72
|
+
@server_name = content_instance_serializer.server_name
|
73
|
+
end
|
74
|
+
if (content_instance_serializer.device == nil)
|
75
|
+
raise ArgumentError.new("device have to be defined")
|
76
|
+
else
|
77
|
+
@device = content_instance_serializer.device
|
78
|
+
end
|
79
|
+
if (content_instance_serializer.full_path == nil)
|
80
|
+
raise ArgumentError.new("full_path have to be defined")
|
81
|
+
else
|
82
|
+
@full_path = content_instance_serializer.full_path
|
83
|
+
end
|
84
|
+
else
|
85
|
+
@checksum = checksum
|
86
|
+
@size = size
|
87
|
+
@server_name = server_name
|
88
|
+
@device = device
|
89
|
+
@full_path = full_path
|
90
|
+
@modification_time = modification_time
|
90
91
|
end
|
92
|
+
end
|
91
93
|
|
92
|
-
|
93
|
-
|
94
|
-
|
94
|
+
def global_path
|
95
|
+
ContentInstance.instance_global_path(@server_name, @full_path)
|
96
|
+
end
|
95
97
|
|
96
|
-
|
97
|
-
|
98
|
-
|
98
|
+
def ContentInstance.instance_global_path(server_name, full_path)
|
99
|
+
"%s:%s" % [server_name, full_path]
|
100
|
+
end
|
99
101
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
102
|
+
def to_s
|
103
|
+
"%s,%d,%s,%s,%s,%s" % [@checksum, @size, @server_name,
|
104
|
+
@device, @full_path, ContentData.format_time(@modification_time)]
|
105
|
+
end
|
104
106
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
107
|
+
def ==(other)
|
108
|
+
return (self.checksum.eql? other.checksum and
|
109
|
+
self.size.eql? other.size and
|
110
|
+
self.server_name.eql? other.server_name and
|
111
|
+
self.device.eql? other.device and
|
112
|
+
self.full_path.eql? other.full_path and
|
113
|
+
self.modification_time.to_i.eql? other.modification_time.to_i)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Unfortunately this class is used as mutable for now. So need to be carefull.
|
118
|
+
# TODO(kolman): Make this class imutable, but add indexing structure to it.
|
119
|
+
# TODO(kolman): Add wrapper to the class to enable dynamic content data
|
120
|
+
# (with easy access indexes)
|
121
|
+
class ContentData
|
122
|
+
attr_reader :contents, :instances
|
123
|
+
|
124
|
+
# @param content_data_serializer_str [String]
|
125
|
+
def initialize(copy = nil)
|
126
|
+
if copy.nil?
|
127
|
+
@contents = Hash.new # key is a checksum , value is a refernce to the Content object
|
128
|
+
@instances = Hash.new # key is an instance global path , value is a reference to the ContentInstance object
|
129
|
+
else
|
130
|
+
# Regenerate only the hashes, the values are immutable.
|
131
|
+
@contents = copy.contents.clone
|
132
|
+
@instances = copy.instances.clone
|
112
133
|
end
|
113
134
|
end
|
114
135
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
# (with easy access indexes)
|
119
|
-
class ContentData
|
120
|
-
attr_reader :contents, :instances
|
136
|
+
def add_content(content)
|
137
|
+
@contents[content.checksum] = content
|
138
|
+
end
|
121
139
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
140
|
+
def add_instance(instance)
|
141
|
+
if (not @contents.key?(instance.checksum))
|
142
|
+
Log.warning sprintf("Adding instance while it's" +
|
143
|
+
" checksum %s does not exists.\n", instance.checksum)
|
144
|
+
Log.warning sprintf("%s\n", instance.to_s)
|
145
|
+
return false
|
146
|
+
elsif (@contents[instance.checksum].size != instance.size)
|
147
|
+
Log.warning 'File size different from content size while same checksum'
|
148
|
+
Log.warning instance.to_s
|
149
|
+
return false
|
132
150
|
end
|
133
151
|
|
134
|
-
|
135
|
-
@contents[content.checksum] = content
|
136
|
-
end
|
152
|
+
key = instance.global_path
|
137
153
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
154
|
+
#override file if needed
|
155
|
+
@instances[key] = instance
|
156
|
+
end
|
157
|
+
|
158
|
+
def empty?
|
159
|
+
@contents.empty?
|
160
|
+
end
|
161
|
+
|
162
|
+
# TODO rename method with finishing '?', cause it returns a boolean
|
163
|
+
def content_exists(checksum)
|
164
|
+
@contents.key? checksum
|
165
|
+
end
|
166
|
+
|
167
|
+
# TODO(kolman): The semantics of thir merge is merge! change in all file.
|
168
|
+
def merge(content_data)
|
169
|
+
content_data.contents.values.each { |content|
|
170
|
+
add_content(content)
|
171
|
+
}
|
172
|
+
content_data.instances.values.each { |instance|
|
173
|
+
add_instance(instance)
|
174
|
+
}
|
175
|
+
end
|
176
|
+
|
177
|
+
def ==(other)
|
178
|
+
return false if other == nil
|
179
|
+
return false unless @contents.size == other.contents.size
|
180
|
+
return false unless @instances.size == other.instances.size
|
181
|
+
|
182
|
+
@contents.keys.each { |key|
|
183
|
+
if (@contents[key] != other.contents[key])
|
184
|
+
Log.info @contents[key].first_appearance_time.to_i
|
185
|
+
Log.info other.contents[key].first_appearance_time.to_i
|
143
186
|
return false
|
144
|
-
|
145
|
-
|
146
|
-
|
187
|
+
end
|
188
|
+
}
|
189
|
+
@instances.keys.each { |key|
|
190
|
+
if (@instances[key] != other.instances[key])
|
147
191
|
return false
|
148
192
|
end
|
193
|
+
}
|
194
|
+
return true
|
195
|
+
end
|
149
196
|
|
150
|
-
|
197
|
+
def to_s
|
198
|
+
ret = ""
|
199
|
+
ret << @contents.length.to_s << "\n"
|
200
|
+
@contents.each_value { |content|
|
201
|
+
ret << content.to_s << "\n"
|
202
|
+
}
|
203
|
+
ret << @instances.length.to_s << "\n"
|
204
|
+
@instances.each_value { |instance|
|
205
|
+
ret << instance.to_s << "\n"
|
206
|
+
}
|
207
|
+
return ret
|
208
|
+
end
|
209
|
+
|
210
|
+
def to_file(filename)
|
211
|
+
content_data_dir = File.dirname(filename)
|
212
|
+
FileUtils.makedirs(content_data_dir) unless File.directory?(content_data_dir)
|
213
|
+
File.open(filename, 'w') {|f| f.write(to_s) }
|
214
|
+
end
|
151
215
|
|
152
|
-
|
153
|
-
|
216
|
+
# TODO validation that file indeed contains ContentData missing
|
217
|
+
def from_file(filename)
|
218
|
+
lines = IO.readlines(filename)
|
219
|
+
i = 0
|
220
|
+
number_of_contents = lines[i].to_i
|
221
|
+
i += 1
|
222
|
+
number_of_contents.times {
|
223
|
+
parameters = lines[i].split(",")
|
224
|
+
add_content(Content.new(parameters[0],
|
225
|
+
parameters[1].to_i,
|
226
|
+
ContentData.parse_time(parameters[2])))
|
227
|
+
i += 1
|
228
|
+
}
|
229
|
+
|
230
|
+
number_of_instances = lines[i].to_i
|
231
|
+
i += 1
|
232
|
+
number_of_instances.times {
|
233
|
+
if lines[i].nil?
|
234
|
+
Log.info "lines[i] if nil !!!, Backing filename: #{filename} to #{filename}.bad"
|
235
|
+
FileUtils.cp(filename, "#{filename}.bad")
|
236
|
+
Log.info lines[i].join("\n")
|
237
|
+
end
|
238
|
+
parameters = lines[i].split(',')
|
239
|
+
# bugfix: if file name consist a comma then parsing based on comma separating fails
|
240
|
+
if (parameters.size > 6)
|
241
|
+
(5..parameters.size-2).each do |i|
|
242
|
+
parameters[4] = [parameters[4], parameters[i]].join(",")
|
243
|
+
end
|
244
|
+
(5..parameters.size-2).each do |i|
|
245
|
+
parameters.delete_at(5)
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
add_instance(ContentInstance.new(parameters[0],
|
250
|
+
parameters[1].to_i,
|
251
|
+
parameters[2],
|
252
|
+
parameters[3],
|
253
|
+
parameters[4],
|
254
|
+
ContentData.parse_time(parameters[5])))
|
255
|
+
i += 1
|
256
|
+
}
|
257
|
+
end
|
258
|
+
|
259
|
+
def self.parse_time time_str
|
260
|
+
return nil unless time_str.instance_of? String
|
261
|
+
seconds_from_epoch = Integer time_str # Not using to_i here because it does not check string is integer.
|
262
|
+
time = Time.at seconds_from_epoch
|
263
|
+
end
|
264
|
+
|
265
|
+
def self.format_time(time)
|
266
|
+
return nil unless time.instance_of?Time
|
267
|
+
str = time.to_i.to_s
|
268
|
+
return str
|
269
|
+
end
|
270
|
+
|
271
|
+
# merges content data a and content data b to a new content data and returns it.
|
272
|
+
def self.merge(a, b)
|
273
|
+
return b unless not a.nil?
|
274
|
+
return a unless not b.nil?
|
275
|
+
|
276
|
+
return nil unless a.instance_of?ContentData
|
277
|
+
return nil unless b.instance_of?ContentData
|
278
|
+
|
279
|
+
ret = ContentData.new
|
280
|
+
ret.merge(a)
|
281
|
+
ret.merge(b)
|
282
|
+
|
283
|
+
return ret
|
284
|
+
end
|
285
|
+
|
286
|
+
# removed content data a from content data b and returns the new content data.
|
287
|
+
def self.remove(a, b)
|
288
|
+
return nil unless a.instance_of?ContentData
|
289
|
+
return nil unless b.instance_of?ContentData
|
290
|
+
|
291
|
+
ret = ContentData.new
|
292
|
+
|
293
|
+
b.contents.values.each { |content|
|
294
|
+
#print "%s - %s\n" % [content.checksum, a.content_exists(content.checksum).to_s]
|
295
|
+
ret.add_content(content) unless a.content_exists(content.checksum)
|
296
|
+
}
|
297
|
+
|
298
|
+
#Log.info "kaka"
|
299
|
+
|
300
|
+
b.instances.values.each { |instance|
|
301
|
+
#print "%s - %s\n" % [instance.checksum, a.content_exists(instance.checksum).to_s]
|
302
|
+
ret.add_instance(instance) unless a.content_exists(instance.checksum)
|
303
|
+
}
|
304
|
+
|
305
|
+
#print "kuku %s" % ret.contents.size.to_s
|
306
|
+
#print "kuku %s" % ret.instances.size.to_s
|
307
|
+
return ret
|
308
|
+
end
|
309
|
+
|
310
|
+
def self.remove_instances(a, b)
|
311
|
+
return nil unless a.instance_of?ContentData
|
312
|
+
return nil unless b.instance_of?ContentData
|
313
|
+
|
314
|
+
ret = ContentData.new
|
315
|
+
b.instances.values.each do |instance|
|
316
|
+
if !a.instances.key?(instance.global_path)
|
317
|
+
ret.add_content(b.contents[instance.checksum])
|
318
|
+
ret.add_instance(instance)
|
319
|
+
end
|
154
320
|
end
|
321
|
+
return ret
|
322
|
+
end
|
155
323
|
|
156
|
-
|
157
|
-
|
324
|
+
def self.remove_directory(cd, global_dir_path)
|
325
|
+
return nil unless cd.instance_of?ContentData
|
326
|
+
|
327
|
+
ret = ContentData.new
|
328
|
+
cd.instances.values.each do |instance|
|
329
|
+
Log.debug3("global path to check: #{global_dir_path}")
|
330
|
+
Log.debug3("instance global path: #{instance.global_path}")
|
331
|
+
if instance.global_path.scan(global_dir_path).size == 0
|
332
|
+
Log.debug3("Adding instance.")
|
333
|
+
ret.add_content(cd.contents[instance.checksum])
|
334
|
+
ret.add_instance(instance)
|
335
|
+
end
|
158
336
|
end
|
337
|
+
return ret
|
338
|
+
end
|
339
|
+
|
340
|
+
# returns the common content in both a and b
|
341
|
+
def self.intersect(a, b)
|
342
|
+
b_minus_a = ContentData.remove(a, b)
|
343
|
+
return ContentData.remove(b_minus_a, b)
|
344
|
+
end
|
159
345
|
|
160
|
-
|
161
|
-
|
346
|
+
# unify time for all entries with same content to minimal time
|
347
|
+
def self.unify_time(db)
|
348
|
+
mod_db = ContentData.new # resulting ContentData that will consists objects with unified time
|
349
|
+
checksum2time = Hash.new # key=checksum value=min_time_for_this_checksum
|
350
|
+
checksum2instances = Hash.new # key=checksum value=array_of_instances_with_this_checksum (Will be replaced with ContentData method)
|
351
|
+
|
352
|
+
# populate tables with given ContentData entries
|
353
|
+
db.instances.each_value do |instance|
|
354
|
+
checksum = instance.checksum
|
355
|
+
time = instance.modification_time
|
356
|
+
|
357
|
+
unless (checksum2instances.has_key? checksum)
|
358
|
+
checksum2instances[checksum] = []
|
359
|
+
end
|
360
|
+
checksum2instances[checksum] << instance
|
361
|
+
|
362
|
+
if (not checksum2time.has_key? checksum)
|
363
|
+
checksum2time[checksum] = time
|
364
|
+
elsif ((checksum2time[checksum] <=> time) > 0)
|
365
|
+
checksum2time[checksum] = time
|
366
|
+
end
|
162
367
|
end
|
163
368
|
|
164
|
-
#
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
369
|
+
# update min time table with time information from contents
|
370
|
+
db.contents.each do |checksum, content|
|
371
|
+
time = content.first_appearance_time
|
372
|
+
if (not checksum2time.has_key? checksum)
|
373
|
+
checksum2time[checksum] = time
|
374
|
+
elsif ((checksum2time[checksum] <=> time) > 0)
|
375
|
+
checksum2time[checksum] = time
|
376
|
+
end
|
172
377
|
end
|
173
378
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
379
|
+
# add content entries to the output table. in need of case update time field with found min time
|
380
|
+
db.contents.each do |checksum, content|
|
381
|
+
time = checksum2time[checksum]
|
382
|
+
if ((content.first_appearance_time <=> time) == 0)
|
383
|
+
mod_db.add_content(content)
|
384
|
+
else
|
385
|
+
mod_db.add_content(Content.new(checksum, content.size, time))
|
386
|
+
end
|
387
|
+
end
|
178
388
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
389
|
+
# add instance entries to the output table. in need of case update time field with found min time
|
390
|
+
checksum2instances.each do |checksum, instances|
|
391
|
+
time = checksum2time[checksum]
|
392
|
+
instances.each do |instance|
|
393
|
+
if ((instance.modification_time <=> time) == 0)
|
394
|
+
mod_db.add_instance(instance)
|
395
|
+
else # must be bigger then found min time
|
396
|
+
mod_instance = ContentInstance.new(instance.checksum, instance.size,
|
397
|
+
instance.server_name, instance.device,
|
398
|
+
instance.full_path, time)
|
399
|
+
mod_db.add_instance(mod_instance)
|
189
400
|
end
|
190
|
-
|
191
|
-
return true
|
401
|
+
end
|
192
402
|
end
|
403
|
+
mod_db
|
404
|
+
end
|
193
405
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
406
|
+
# Validates index against file system that all instances hold a correct data regarding files
|
407
|
+
# that they represrents.
|
408
|
+
#
|
409
|
+
# There are two levels of validation, controlled by instance_check_level system parameter:
|
410
|
+
# * shallow - quick, tests instance for file existence and attributes.
|
411
|
+
# * deep - can take more time, in addition to shallow recalculates hash sum.
|
412
|
+
# @param [Hash] params hash of parameters of validation, can be used to return additional data.
|
413
|
+
#
|
414
|
+
# Supported key/value combinations:
|
415
|
+
# * key is <tt>:failed</tt> value is <tt>ContentData</tt> used to return failed instances
|
416
|
+
# @return [Boolean] true when index is correct, false otherwise
|
417
|
+
def validate(params = nil)
|
418
|
+
# used to answer whether specific param was set
|
419
|
+
param_exists = Proc.new do |param|
|
420
|
+
!(params.nil? || params[param].nil?)
|
205
421
|
end
|
206
422
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
423
|
+
# used to process method parameters centrally
|
424
|
+
process_params = Proc.new do |values|
|
425
|
+
# values is a Hash with keys: :content, :instance and value appropriate to key
|
426
|
+
if param_exists.call :failed
|
427
|
+
unless values[:content].nil?
|
428
|
+
params[:failed].add_content values[:content]
|
429
|
+
end
|
430
|
+
unless values[:instance].nil?
|
431
|
+
# appropriate content should be already added
|
432
|
+
params[:failed].add_instance values[:instance]
|
433
|
+
end
|
434
|
+
end
|
211
435
|
end
|
212
436
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
add_content(Content.new(parameters[0],
|
221
|
-
parameters[1].to_i,
|
222
|
-
ContentData.parse_time(parameters[2])))
|
223
|
-
i += 1
|
224
|
-
}
|
225
|
-
|
226
|
-
number_of_instances = lines[i].to_i
|
227
|
-
i += 1
|
228
|
-
number_of_instances.times {
|
229
|
-
parameters = lines[i].split(',')
|
230
|
-
# bugfix: if file name consist a comma then parsing based on comma separating fails
|
231
|
-
if (parameters.size > 6)
|
232
|
-
(5..parameters.size-2).each do |i|
|
233
|
-
parameters[4] = [parameters[4], parameters[i]].join(",")
|
234
|
-
end
|
235
|
-
(5..parameters.size-2).each do |i|
|
236
|
-
parameters.delete_at(5)
|
237
|
-
end
|
437
|
+
is_valid = true
|
438
|
+
instances.each_value do |instance|
|
439
|
+
unless check_instance instance
|
440
|
+
is_valid = false
|
441
|
+
|
442
|
+
unless params.nil? || params.empty?
|
443
|
+
process_params.call :content => contents[instance.checksum], :instance => instance
|
238
444
|
end
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
is_valid
|
449
|
+
end
|
239
450
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
451
|
+
def shallow_check(instance)
|
452
|
+
path = instance.full_path
|
453
|
+
is_valid = true
|
454
|
+
|
455
|
+
if (File.exists?(path))
|
456
|
+
if File.size(path) != instance.size
|
457
|
+
is_valid = false
|
458
|
+
err_msg = "#{path} size #{File.size(path)} differs from indexed size #{instance.size}"
|
459
|
+
Log.warning err_msg
|
460
|
+
end
|
461
|
+
#if ContentData.format_time(File.mtime(path)) != instance.modification_time
|
462
|
+
if File.mtime(path).to_i != instance.modification_time.to_i
|
463
|
+
is_valid = false
|
464
|
+
err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
|
465
|
+
+ "indexed #{instance.modification_time}"
|
466
|
+
Log.warning err_msg
|
467
|
+
end
|
468
|
+
else
|
469
|
+
is_valid = false
|
470
|
+
err_msg = "Indexed file #{path} doesn't exist"
|
471
|
+
Log.warning err_msg
|
248
472
|
end
|
473
|
+
is_valid
|
474
|
+
end
|
249
475
|
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
476
|
+
def deep_check(instance)
|
477
|
+
if shallow_check(instance)
|
478
|
+
path = instance.full_path
|
479
|
+
current_checksum = FileIndexing::IndexAgent.get_checksum(path)
|
480
|
+
if instance.checksum == current_checksum
|
481
|
+
true
|
482
|
+
else
|
483
|
+
err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance.checksum}"
|
484
|
+
Log.warning err_msg
|
485
|
+
false
|
486
|
+
end
|
487
|
+
else
|
488
|
+
false
|
254
489
|
end
|
490
|
+
end
|
255
491
|
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
492
|
+
def check_instance(instance)
|
493
|
+
case Params['instance_check_level']
|
494
|
+
when 'deep'
|
495
|
+
deep_check instance
|
496
|
+
when 'shallow'
|
497
|
+
shallow_check instance
|
498
|
+
else
|
499
|
+
# TODO remove it when params will support set of values
|
500
|
+
throw ArgumentError.new "Unsupported check level #{Params['instance_check_level']}"
|
260
501
|
end
|
502
|
+
end
|
261
503
|
|
262
|
-
# merges content data a and content data b to a new content data and returns it.
|
263
|
-
def self.merge(a, b)
|
264
|
-
return b unless not a.nil?
|
265
|
-
return a unless not b.nil?
|
266
504
|
|
267
|
-
|
268
|
-
|
505
|
+
# TODO simplify conditions
|
506
|
+
# This mehod is experimental and shouldn\'t be used
|
507
|
+
# nil is used to define +/- infinity for to/from method arguments
|
508
|
+
# from/to values are exlusive in condition'a calculations
|
509
|
+
# Need to take care about '==' operation that is used for object's comparison.
|
510
|
+
# In need of case user should define it's own '==' implemementation.
|
511
|
+
def get_query(variable, params)
|
512
|
+
raise RuntimeError.new 'This method is experimental and shouldn\'t be used'
|
269
513
|
|
270
|
-
|
271
|
-
|
272
|
-
|
514
|
+
exact = params['exact'].nil? ? Array.new : params['exact']
|
515
|
+
from = params['from']
|
516
|
+
to = params ['to']
|
517
|
+
is_inside = params['is_inside']
|
273
518
|
|
274
|
-
|
519
|
+
unless ContentInstance.new.instance_variable_defined?("@#{attribute}")
|
520
|
+
raise ArgumentError "#{variable} isn't a ContentInstance variable"
|
275
521
|
end
|
276
522
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
return nil unless b.instance_of?ContentData
|
523
|
+
if (exact.nil? && from.nil? && to.nil?)
|
524
|
+
raise ArgumentError 'At least one of the argiments {exact, from, to} must be defined'
|
525
|
+
end
|
281
526
|
|
282
|
-
|
527
|
+
if (!(from.nil? || to.nil?) && from.kind_of?(to.class))
|
528
|
+
raise ArgumentError 'to and from arguments should be comparable one with another'
|
529
|
+
end
|
283
530
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
531
|
+
# FIXME add support for from/to for Strings
|
532
|
+
if ((!from.nil? && !from.kind_of?(Numeric.new.class))\
|
533
|
+
|| (!to.nil? && to.kind_of?(Numeric.new.class)))
|
534
|
+
raise ArgumentError 'from and to options supported only for numeric values'
|
535
|
+
end
|
288
536
|
|
289
|
-
|
537
|
+
if (!exact.empty? && (!from.nil? || !to.nil?))
|
538
|
+
raise ArgumentError 'exact and from/to options are mutually exclusive'
|
539
|
+
end
|
290
540
|
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
}
|
541
|
+
result_index = ContentData.new
|
542
|
+
instances.each_value do |instance|
|
543
|
+
is_match = false
|
544
|
+
var_value = instance.instance_variable_get("@#{variable}")
|
295
545
|
|
296
|
-
|
297
|
-
|
298
|
-
|
546
|
+
if exact.include? var_value
|
547
|
+
is_match = true
|
548
|
+
elsif (from.nil? || var_value > from) && (to.nil? || var_value < to)
|
549
|
+
is_match = true
|
550
|
+
end
|
551
|
+
|
552
|
+
if (is_match && is_inside) || (!is_match && !is_inside)
|
553
|
+
checksum = instance.checksum
|
554
|
+
result_index.add_content(contents[checksum]) unless result_index.content_exists(checksum)
|
555
|
+
result_index.add_instance instance
|
556
|
+
end
|
299
557
|
end
|
558
|
+
result_index
|
559
|
+
end
|
300
560
|
|
301
|
-
|
302
|
-
|
303
|
-
return nil unless b.instance_of?ContentData
|
561
|
+
private :shallow_check, :deep_check, :check_instance
|
562
|
+
end
|
304
563
|
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
564
|
+
# Validates index against file system that all instances hold a correct data regarding files
|
565
|
+
# that they represrents.
|
566
|
+
#
|
567
|
+
# There are two levels of validation, controlled by instance_check_level system parameter:
|
568
|
+
# * shallow - quick, tests instance for file existence and attributes.
|
569
|
+
# * deep - can take more time, in addition to shallow recalculates hash sum.
|
570
|
+
# @param [Hash] params hash of parameters of validation, can be used to return additional data.
|
571
|
+
#
|
572
|
+
# Supported key/value combinations:
|
573
|
+
# * key is <tt>:failed</tt> value is <tt>ContentData</tt> used to return failed instances
|
574
|
+
# @return [Boolean] true when index is correct, false otherwise
|
575
|
+
# @raise [ArgumentError] when instance_check_level is incorrect
|
576
|
+
def validate(params = nil)
|
577
|
+
# used to answer whether specific param was set
|
578
|
+
param_exists = Proc.new do |param|
|
579
|
+
!(params.nil? || params[param].nil?)
|
580
|
+
end
|
581
|
+
|
582
|
+
# used to process method parameters centrally
|
583
|
+
process_params = Proc.new do |values|
|
584
|
+
# values is a Hash with keys: :content, :instance and value appropriate to key
|
585
|
+
if param_exists.call :failed
|
586
|
+
unless values[:content].nil?
|
587
|
+
params[:failed].add_content values[:content]
|
588
|
+
end
|
589
|
+
unless values[:instance].nil?
|
590
|
+
# appropriate content should be already added
|
591
|
+
params[:failed].add_instance values[:instance]
|
311
592
|
end
|
312
|
-
return ret
|
313
593
|
end
|
594
|
+
end
|
314
595
|
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
if instance.global_path.scan(global_dir_path).size == 0
|
323
|
-
Log.debug3("Adding instance.")
|
324
|
-
ret.add_content(cd.contents[instance.checksum])
|
325
|
-
ret.add_instance(instance)
|
326
|
-
end
|
596
|
+
is_valid = true
|
597
|
+
instances.each_value do |instance|
|
598
|
+
unless check_instance instance
|
599
|
+
is_valid = false
|
600
|
+
|
601
|
+
unless params.nil? || params.empty?
|
602
|
+
process_params.call :content => contents[instance.checksum], :instance => instance
|
327
603
|
end
|
328
|
-
return ret
|
329
604
|
end
|
605
|
+
end
|
606
|
+
|
607
|
+
is_valid
|
608
|
+
end
|
609
|
+
|
610
|
+
def shallow_check(instance)
|
611
|
+
path = instance.full_path
|
612
|
+
is_valid = true
|
330
613
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
614
|
+
if (File.exists?(path))
|
615
|
+
if File.size(path) != instance.size
|
616
|
+
is_valid = false
|
617
|
+
err_msg = "#{path} size #{File.size(path)} differs from indexed size #{instance.size}"
|
618
|
+
Log.warning err_msg
|
335
619
|
end
|
620
|
+
#if ContentData.format_time(File.mtime(path)) != instance.modification_time
|
621
|
+
if File.mtime(path).to_i != instance.modification_time.to_i
|
622
|
+
is_valid = false
|
623
|
+
err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
|
624
|
+
+ "indexed #{instance.modification_time}"
|
625
|
+
Log.warning err_msg
|
626
|
+
end
|
627
|
+
else
|
628
|
+
is_valid = false
|
629
|
+
err_msg = "Indexed file #{path} doesn't exist"
|
630
|
+
Log.warning err_msg
|
631
|
+
end
|
632
|
+
is_valid
|
633
|
+
end
|
336
634
|
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
635
|
+
def deep_check(instance)
|
636
|
+
if shallow_check(instance)
|
637
|
+
path = instance.full_path
|
638
|
+
current_checksum = FileIndexing::IndexAgent.get_checksum(path)
|
639
|
+
if instance.checksum == current_checksum
|
640
|
+
true
|
641
|
+
else
|
642
|
+
err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance.checksum}"
|
643
|
+
Log.warning err_msg
|
644
|
+
false
|
645
|
+
end
|
646
|
+
else
|
647
|
+
false
|
648
|
+
end
|
649
|
+
end
|
342
650
|
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
651
|
+
# @raise [ArgumentError] when instance_check_level is incorrect
|
652
|
+
def check_instance(instance)
|
653
|
+
case Params['instance_check_level']
|
654
|
+
when 'deep'
|
655
|
+
deep_check instance
|
656
|
+
when 'shallow'
|
657
|
+
shallow_check instance
|
658
|
+
else
|
659
|
+
# TODO remove it when params will support set of values
|
660
|
+
throw ArgumentError.new "Unsupported check level #{Params['instance_check_level']}"
|
661
|
+
end
|
662
|
+
end
|
347
663
|
|
348
|
-
unless (checksum2instances.has_key? checksum)
|
349
|
-
checksum2instances[checksum] = []
|
350
|
-
end
|
351
|
-
checksum2instances[checksum] << instance
|
352
664
|
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
665
|
+
# TODO simplify conditions
|
666
|
+
# This mehod is experimental and shouldn\'t be used
|
667
|
+
# nil is used to define +/- infinity for to/from method arguments
|
668
|
+
# from/to values are exlusive in condition'a calculations
|
669
|
+
# Need to take care about '==' operation that is used for object's comparison.
|
670
|
+
# In need of case user should define it's own '==' implemementation.
|
671
|
+
def get_query(variable, params)
|
672
|
+
raise RuntimeError.new 'This method is experimental and shouldn\'t be used'
|
359
673
|
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
checksum2time[checksum] = time
|
365
|
-
elsif ((checksum2time[checksum] <=> time) > 0)
|
366
|
-
checksum2time[checksum] = time
|
367
|
-
end
|
368
|
-
end
|
674
|
+
exact = params['exact'].nil? ? Array.new : params['exact']
|
675
|
+
from = params['from']
|
676
|
+
to = params ['to']
|
677
|
+
is_inside = params['is_inside']
|
369
678
|
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
if ((content.first_appearance_time <=> time) == 0)
|
374
|
-
mod_db.add_content(content)
|
375
|
-
else
|
376
|
-
mod_db.add_content(Content.new(checksum, content.size, time))
|
377
|
-
end
|
378
|
-
end
|
679
|
+
unless ContentInstance.new.instance_variable_defined?("@#{attribute}")
|
680
|
+
raise ArgumentError "#{variable} isn't a ContentInstance variable"
|
681
|
+
end
|
379
682
|
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
683
|
+
if (exact.nil? && from.nil? && to.nil?)
|
684
|
+
raise ArgumentError 'At least one of the argiments {exact, from, to} must be defined'
|
685
|
+
end
|
686
|
+
|
687
|
+
if (!(from.nil? || to.nil?) && from.kind_of?(to.class))
|
688
|
+
raise ArgumentError 'to and from arguments should be comparable one with another'
|
689
|
+
end
|
690
|
+
|
691
|
+
# FIXME add support for from/to for Strings
|
692
|
+
if ((!from.nil? && !from.kind_of?(Numeric.new.class))\
|
693
|
+
|| (!to.nil? && to.kind_of?(Numeric.new.class)))
|
694
|
+
raise ArgumentError 'from and to options supported only for numeric values'
|
695
|
+
end
|
696
|
+
|
697
|
+
if (!exact.empty? && (!from.nil? || !to.nil?))
|
698
|
+
raise ArgumentError 'exact and from/to options are mutually exclusive'
|
699
|
+
end
|
700
|
+
|
701
|
+
result_index = ContentData.new
|
702
|
+
instances.each_value do |instance|
|
703
|
+
is_match = false
|
704
|
+
var_value = instance.instance_variable_get("@#{variable}")
|
705
|
+
|
706
|
+
if exact.include? var_value
|
707
|
+
is_match = true
|
708
|
+
elsif (from.nil? || var_value > from) && (to.nil? || var_value < to)
|
709
|
+
is_match = true
|
710
|
+
end
|
711
|
+
|
712
|
+
if (is_match && is_inside) || (!is_match && !is_inside)
|
713
|
+
checksum = instance.checksum
|
714
|
+
result_index.add_content(contents[checksum]) unless result_index.content_exists(checksum)
|
715
|
+
result_index.add_instance instance
|
395
716
|
end
|
396
717
|
end
|
718
|
+
result_index
|
397
719
|
end
|
720
|
+
|
721
|
+
private :shallow_check, :deep_check, :check_instance, :get_query
|
398
722
|
end
|
723
|
+
|