content_data 0.0.9 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/content_data/content_data.rb +637 -312
- data/lib/content_data/dynamic_content_data.rb +42 -43
- data/lib/content_data/version.rb +2 -4
- data/lib/content_data.rb +1 -3
- data/test/content_data/content_data_test.rb +137 -138
- metadata +5 -5
@@ -2,397 +2,722 @@ require 'log'
|
|
2
2
|
require 'params'
|
3
3
|
require 'time'
|
4
4
|
|
5
|
-
module
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
18
|
-
if (content_serializer.size == nil)
|
19
|
-
raise ArgumentError.new("size have to be defined")
|
20
|
-
else
|
21
|
-
@size = content_serializer.size
|
22
|
-
end
|
23
|
-
if (content_serializer.first_appearance_time == nil)
|
24
|
-
raise ArgumentError.new("first_appearance_time have to be defined")
|
25
|
-
else
|
26
|
-
@first_appearance_time = ContentData.parse_time(content_serializer.first_appearance_time)
|
27
|
-
end
|
28
|
-
|
5
|
+
module ContentData
|
6
|
+
Params.string('instance_check_level', 'shallow', 'Defines check level. Supported levels are: ' \
|
7
|
+
'shallow - quick, tests instance for file existence and attributes. ' \
|
8
|
+
'deep - can take more time, in addition to shallow recalculates hash sum.')
|
9
|
+
|
10
|
+
class Content
|
11
|
+
attr_reader :checksum, :size, :first_appearance_time
|
12
|
+
|
13
|
+
def initialize(checksum, size, first_appearance_time, content_serializer = nil)
|
14
|
+
if content_serializer != nil
|
15
|
+
if (content_serializer.checksum == nil)
|
16
|
+
raise ArgumentError.new("checksum have to be defined")
|
29
17
|
else
|
30
|
-
@checksum = checksum
|
31
|
-
|
32
|
-
|
18
|
+
@checksum = content_serializer.checksum
|
19
|
+
end
|
20
|
+
if (content_serializer.size == nil)
|
21
|
+
raise ArgumentError.new("size have to be defined")
|
22
|
+
else
|
23
|
+
@size = content_serializer.size
|
24
|
+
end
|
25
|
+
if (content_serializer.first_appearance_time == nil)
|
26
|
+
raise ArgumentError.new("first_appearance_time have to be defined")
|
27
|
+
else
|
28
|
+
@first_appearance_time = ContentData.parse_time(content_serializer.first_appearance_time)
|
33
29
|
end
|
34
|
-
end
|
35
30
|
|
36
|
-
|
37
|
-
|
31
|
+
else
|
32
|
+
@checksum = checksum
|
33
|
+
@size = size
|
34
|
+
@first_appearance_time = first_appearance_time
|
38
35
|
end
|
36
|
+
end
|
39
37
|
|
40
|
-
|
41
|
-
|
42
|
-
self.size.eql? other.size and
|
43
|
-
self.first_appearance_time.to_i.eql? other.first_appearance_time.to_i)
|
44
|
-
end
|
38
|
+
def to_s
|
39
|
+
"%s,%d,%s" % [@checksum, @size, ContentData.format_time(@first_appearance_time)]
|
45
40
|
end
|
46
41
|
|
47
|
-
|
48
|
-
|
42
|
+
def ==(other)
|
43
|
+
return (self.checksum.eql? other.checksum and
|
44
|
+
self.size.eql? other.size and
|
45
|
+
self.first_appearance_time.to_i.eql? other.first_appearance_time.to_i)
|
46
|
+
end
|
47
|
+
end
|
49
48
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
if (content_instance_serializer.modification_time == nil)
|
63
|
-
raise ArgumentError.new("modification_time have to be defined")
|
64
|
-
else
|
65
|
-
@modification_time = ContentData.parse_time(content_instance_serializer.modification_time)
|
66
|
-
end
|
67
|
-
if (content_instance_serializer.server_name == nil)
|
68
|
-
raise ArgumentError.new("server_name have to be defined")
|
69
|
-
else
|
70
|
-
@server_name = content_instance_serializer.server_name
|
71
|
-
end
|
72
|
-
if (content_instance_serializer.device == nil)
|
73
|
-
raise ArgumentError.new("device have to be defined")
|
74
|
-
else
|
75
|
-
@device = content_instance_serializer.device
|
76
|
-
end
|
77
|
-
if (content_instance_serializer.full_path == nil)
|
78
|
-
raise ArgumentError.new("full_path have to be defined")
|
79
|
-
else
|
80
|
-
@full_path = content_instance_serializer.full_path
|
81
|
-
end
|
49
|
+
class ContentInstance
|
50
|
+
attr_reader :checksum, :size, :server_name, :device, :full_path, :modification_time
|
51
|
+
|
52
|
+
def initialize(checksum, size, server_name, device, full_path, modification_time, content_instance_serializer = nil)
|
53
|
+
if content_instance_serializer != nil
|
54
|
+
if (content_instance_serializer.checksum == nil)
|
55
|
+
raise ArgumentError.new("checksum have to be defined")
|
56
|
+
else
|
57
|
+
@checksum = content_instance_serializer.checksum
|
58
|
+
end
|
59
|
+
if (content_instance_serializer.size == nil)
|
60
|
+
raise ArgumentError.new("size have to be defined")
|
82
61
|
else
|
83
|
-
@
|
84
|
-
@size = size
|
85
|
-
@server_name = server_name
|
86
|
-
@device = device
|
87
|
-
@full_path = full_path
|
88
|
-
@modification_time = modification_time
|
62
|
+
@size = content_instance_serializer.size
|
89
63
|
end
|
64
|
+
if (content_instance_serializer.modification_time == nil)
|
65
|
+
raise ArgumentError.new("modification_time have to be defined")
|
66
|
+
else
|
67
|
+
@modification_time = ContentData.parse_time(content_instance_serializer.modification_time)
|
68
|
+
end
|
69
|
+
if (content_instance_serializer.server_name == nil)
|
70
|
+
raise ArgumentError.new("server_name have to be defined")
|
71
|
+
else
|
72
|
+
@server_name = content_instance_serializer.server_name
|
73
|
+
end
|
74
|
+
if (content_instance_serializer.device == nil)
|
75
|
+
raise ArgumentError.new("device have to be defined")
|
76
|
+
else
|
77
|
+
@device = content_instance_serializer.device
|
78
|
+
end
|
79
|
+
if (content_instance_serializer.full_path == nil)
|
80
|
+
raise ArgumentError.new("full_path have to be defined")
|
81
|
+
else
|
82
|
+
@full_path = content_instance_serializer.full_path
|
83
|
+
end
|
84
|
+
else
|
85
|
+
@checksum = checksum
|
86
|
+
@size = size
|
87
|
+
@server_name = server_name
|
88
|
+
@device = device
|
89
|
+
@full_path = full_path
|
90
|
+
@modification_time = modification_time
|
90
91
|
end
|
92
|
+
end
|
91
93
|
|
92
|
-
|
93
|
-
|
94
|
-
|
94
|
+
def global_path
|
95
|
+
ContentInstance.instance_global_path(@server_name, @full_path)
|
96
|
+
end
|
95
97
|
|
96
|
-
|
97
|
-
|
98
|
-
|
98
|
+
def ContentInstance.instance_global_path(server_name, full_path)
|
99
|
+
"%s:%s" % [server_name, full_path]
|
100
|
+
end
|
99
101
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
102
|
+
def to_s
|
103
|
+
"%s,%d,%s,%s,%s,%s" % [@checksum, @size, @server_name,
|
104
|
+
@device, @full_path, ContentData.format_time(@modification_time)]
|
105
|
+
end
|
104
106
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
107
|
+
def ==(other)
|
108
|
+
return (self.checksum.eql? other.checksum and
|
109
|
+
self.size.eql? other.size and
|
110
|
+
self.server_name.eql? other.server_name and
|
111
|
+
self.device.eql? other.device and
|
112
|
+
self.full_path.eql? other.full_path and
|
113
|
+
self.modification_time.to_i.eql? other.modification_time.to_i)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Unfortunately this class is used as mutable for now. So need to be carefull.
|
118
|
+
# TODO(kolman): Make this class imutable, but add indexing structure to it.
|
119
|
+
# TODO(kolman): Add wrapper to the class to enable dynamic content data
|
120
|
+
# (with easy access indexes)
|
121
|
+
class ContentData
|
122
|
+
attr_reader :contents, :instances
|
123
|
+
|
124
|
+
# @param content_data_serializer_str [String]
|
125
|
+
def initialize(copy = nil)
|
126
|
+
if copy.nil?
|
127
|
+
@contents = Hash.new # key is a checksum , value is a refernce to the Content object
|
128
|
+
@instances = Hash.new # key is an instance global path , value is a reference to the ContentInstance object
|
129
|
+
else
|
130
|
+
# Regenerate only the hashes, the values are immutable.
|
131
|
+
@contents = copy.contents.clone
|
132
|
+
@instances = copy.instances.clone
|
112
133
|
end
|
113
134
|
end
|
114
135
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
# (with easy access indexes)
|
119
|
-
class ContentData
|
120
|
-
attr_reader :contents, :instances
|
136
|
+
def add_content(content)
|
137
|
+
@contents[content.checksum] = content
|
138
|
+
end
|
121
139
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
140
|
+
def add_instance(instance)
|
141
|
+
if (not @contents.key?(instance.checksum))
|
142
|
+
Log.warning sprintf("Adding instance while it's" +
|
143
|
+
" checksum %s does not exists.\n", instance.checksum)
|
144
|
+
Log.warning sprintf("%s\n", instance.to_s)
|
145
|
+
return false
|
146
|
+
elsif (@contents[instance.checksum].size != instance.size)
|
147
|
+
Log.warning 'File size different from content size while same checksum'
|
148
|
+
Log.warning instance.to_s
|
149
|
+
return false
|
132
150
|
end
|
133
151
|
|
134
|
-
|
135
|
-
@contents[content.checksum] = content
|
136
|
-
end
|
152
|
+
key = instance.global_path
|
137
153
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
154
|
+
#override file if needed
|
155
|
+
@instances[key] = instance
|
156
|
+
end
|
157
|
+
|
158
|
+
def empty?
|
159
|
+
@contents.empty?
|
160
|
+
end
|
161
|
+
|
162
|
+
# TODO rename method with finishing '?', cause it returns a boolean
|
163
|
+
def content_exists(checksum)
|
164
|
+
@contents.key? checksum
|
165
|
+
end
|
166
|
+
|
167
|
+
# TODO(kolman): The semantics of thir merge is merge! change in all file.
|
168
|
+
def merge(content_data)
|
169
|
+
content_data.contents.values.each { |content|
|
170
|
+
add_content(content)
|
171
|
+
}
|
172
|
+
content_data.instances.values.each { |instance|
|
173
|
+
add_instance(instance)
|
174
|
+
}
|
175
|
+
end
|
176
|
+
|
177
|
+
def ==(other)
|
178
|
+
return false if other == nil
|
179
|
+
return false unless @contents.size == other.contents.size
|
180
|
+
return false unless @instances.size == other.instances.size
|
181
|
+
|
182
|
+
@contents.keys.each { |key|
|
183
|
+
if (@contents[key] != other.contents[key])
|
184
|
+
Log.info @contents[key].first_appearance_time.to_i
|
185
|
+
Log.info other.contents[key].first_appearance_time.to_i
|
143
186
|
return false
|
144
|
-
|
145
|
-
|
146
|
-
|
187
|
+
end
|
188
|
+
}
|
189
|
+
@instances.keys.each { |key|
|
190
|
+
if (@instances[key] != other.instances[key])
|
147
191
|
return false
|
148
192
|
end
|
193
|
+
}
|
194
|
+
return true
|
195
|
+
end
|
149
196
|
|
150
|
-
|
197
|
+
def to_s
|
198
|
+
ret = ""
|
199
|
+
ret << @contents.length.to_s << "\n"
|
200
|
+
@contents.each_value { |content|
|
201
|
+
ret << content.to_s << "\n"
|
202
|
+
}
|
203
|
+
ret << @instances.length.to_s << "\n"
|
204
|
+
@instances.each_value { |instance|
|
205
|
+
ret << instance.to_s << "\n"
|
206
|
+
}
|
207
|
+
return ret
|
208
|
+
end
|
209
|
+
|
210
|
+
def to_file(filename)
|
211
|
+
content_data_dir = File.dirname(filename)
|
212
|
+
FileUtils.makedirs(content_data_dir) unless File.directory?(content_data_dir)
|
213
|
+
File.open(filename, 'w') {|f| f.write(to_s) }
|
214
|
+
end
|
151
215
|
|
152
|
-
|
153
|
-
|
216
|
+
# TODO validation that file indeed contains ContentData missing
|
217
|
+
def from_file(filename)
|
218
|
+
lines = IO.readlines(filename)
|
219
|
+
i = 0
|
220
|
+
number_of_contents = lines[i].to_i
|
221
|
+
i += 1
|
222
|
+
number_of_contents.times {
|
223
|
+
parameters = lines[i].split(",")
|
224
|
+
add_content(Content.new(parameters[0],
|
225
|
+
parameters[1].to_i,
|
226
|
+
ContentData.parse_time(parameters[2])))
|
227
|
+
i += 1
|
228
|
+
}
|
229
|
+
|
230
|
+
number_of_instances = lines[i].to_i
|
231
|
+
i += 1
|
232
|
+
number_of_instances.times {
|
233
|
+
if lines[i].nil?
|
234
|
+
Log.info "lines[i] if nil !!!, Backing filename: #{filename} to #{filename}.bad"
|
235
|
+
FileUtils.cp(filename, "#{filename}.bad")
|
236
|
+
Log.info lines[i].join("\n")
|
237
|
+
end
|
238
|
+
parameters = lines[i].split(',')
|
239
|
+
# bugfix: if file name consist a comma then parsing based on comma separating fails
|
240
|
+
if (parameters.size > 6)
|
241
|
+
(5..parameters.size-2).each do |i|
|
242
|
+
parameters[4] = [parameters[4], parameters[i]].join(",")
|
243
|
+
end
|
244
|
+
(5..parameters.size-2).each do |i|
|
245
|
+
parameters.delete_at(5)
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
add_instance(ContentInstance.new(parameters[0],
|
250
|
+
parameters[1].to_i,
|
251
|
+
parameters[2],
|
252
|
+
parameters[3],
|
253
|
+
parameters[4],
|
254
|
+
ContentData.parse_time(parameters[5])))
|
255
|
+
i += 1
|
256
|
+
}
|
257
|
+
end
|
258
|
+
|
259
|
+
def self.parse_time time_str
|
260
|
+
return nil unless time_str.instance_of? String
|
261
|
+
seconds_from_epoch = Integer time_str # Not using to_i here because it does not check string is integer.
|
262
|
+
time = Time.at seconds_from_epoch
|
263
|
+
end
|
264
|
+
|
265
|
+
def self.format_time(time)
|
266
|
+
return nil unless time.instance_of?Time
|
267
|
+
str = time.to_i.to_s
|
268
|
+
return str
|
269
|
+
end
|
270
|
+
|
271
|
+
# merges content data a and content data b to a new content data and returns it.
|
272
|
+
def self.merge(a, b)
|
273
|
+
return b unless not a.nil?
|
274
|
+
return a unless not b.nil?
|
275
|
+
|
276
|
+
return nil unless a.instance_of?ContentData
|
277
|
+
return nil unless b.instance_of?ContentData
|
278
|
+
|
279
|
+
ret = ContentData.new
|
280
|
+
ret.merge(a)
|
281
|
+
ret.merge(b)
|
282
|
+
|
283
|
+
return ret
|
284
|
+
end
|
285
|
+
|
286
|
+
# removed content data a from content data b and returns the new content data.
|
287
|
+
def self.remove(a, b)
|
288
|
+
return nil unless a.instance_of?ContentData
|
289
|
+
return nil unless b.instance_of?ContentData
|
290
|
+
|
291
|
+
ret = ContentData.new
|
292
|
+
|
293
|
+
b.contents.values.each { |content|
|
294
|
+
#print "%s - %s\n" % [content.checksum, a.content_exists(content.checksum).to_s]
|
295
|
+
ret.add_content(content) unless a.content_exists(content.checksum)
|
296
|
+
}
|
297
|
+
|
298
|
+
#Log.info "kaka"
|
299
|
+
|
300
|
+
b.instances.values.each { |instance|
|
301
|
+
#print "%s - %s\n" % [instance.checksum, a.content_exists(instance.checksum).to_s]
|
302
|
+
ret.add_instance(instance) unless a.content_exists(instance.checksum)
|
303
|
+
}
|
304
|
+
|
305
|
+
#print "kuku %s" % ret.contents.size.to_s
|
306
|
+
#print "kuku %s" % ret.instances.size.to_s
|
307
|
+
return ret
|
308
|
+
end
|
309
|
+
|
310
|
+
def self.remove_instances(a, b)
|
311
|
+
return nil unless a.instance_of?ContentData
|
312
|
+
return nil unless b.instance_of?ContentData
|
313
|
+
|
314
|
+
ret = ContentData.new
|
315
|
+
b.instances.values.each do |instance|
|
316
|
+
if !a.instances.key?(instance.global_path)
|
317
|
+
ret.add_content(b.contents[instance.checksum])
|
318
|
+
ret.add_instance(instance)
|
319
|
+
end
|
154
320
|
end
|
321
|
+
return ret
|
322
|
+
end
|
155
323
|
|
156
|
-
|
157
|
-
|
324
|
+
def self.remove_directory(cd, global_dir_path)
|
325
|
+
return nil unless cd.instance_of?ContentData
|
326
|
+
|
327
|
+
ret = ContentData.new
|
328
|
+
cd.instances.values.each do |instance|
|
329
|
+
Log.debug3("global path to check: #{global_dir_path}")
|
330
|
+
Log.debug3("instance global path: #{instance.global_path}")
|
331
|
+
if instance.global_path.scan(global_dir_path).size == 0
|
332
|
+
Log.debug3("Adding instance.")
|
333
|
+
ret.add_content(cd.contents[instance.checksum])
|
334
|
+
ret.add_instance(instance)
|
335
|
+
end
|
158
336
|
end
|
337
|
+
return ret
|
338
|
+
end
|
339
|
+
|
340
|
+
# returns the common content in both a and b
|
341
|
+
def self.intersect(a, b)
|
342
|
+
b_minus_a = ContentData.remove(a, b)
|
343
|
+
return ContentData.remove(b_minus_a, b)
|
344
|
+
end
|
159
345
|
|
160
|
-
|
161
|
-
|
346
|
+
# unify time for all entries with same content to minimal time
|
347
|
+
def self.unify_time(db)
|
348
|
+
mod_db = ContentData.new # resulting ContentData that will consists objects with unified time
|
349
|
+
checksum2time = Hash.new # key=checksum value=min_time_for_this_checksum
|
350
|
+
checksum2instances = Hash.new # key=checksum value=array_of_instances_with_this_checksum (Will be replaced with ContentData method)
|
351
|
+
|
352
|
+
# populate tables with given ContentData entries
|
353
|
+
db.instances.each_value do |instance|
|
354
|
+
checksum = instance.checksum
|
355
|
+
time = instance.modification_time
|
356
|
+
|
357
|
+
unless (checksum2instances.has_key? checksum)
|
358
|
+
checksum2instances[checksum] = []
|
359
|
+
end
|
360
|
+
checksum2instances[checksum] << instance
|
361
|
+
|
362
|
+
if (not checksum2time.has_key? checksum)
|
363
|
+
checksum2time[checksum] = time
|
364
|
+
elsif ((checksum2time[checksum] <=> time) > 0)
|
365
|
+
checksum2time[checksum] = time
|
366
|
+
end
|
162
367
|
end
|
163
368
|
|
164
|
-
#
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
369
|
+
# update min time table with time information from contents
|
370
|
+
db.contents.each do |checksum, content|
|
371
|
+
time = content.first_appearance_time
|
372
|
+
if (not checksum2time.has_key? checksum)
|
373
|
+
checksum2time[checksum] = time
|
374
|
+
elsif ((checksum2time[checksum] <=> time) > 0)
|
375
|
+
checksum2time[checksum] = time
|
376
|
+
end
|
172
377
|
end
|
173
378
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
379
|
+
# add content entries to the output table. in need of case update time field with found min time
|
380
|
+
db.contents.each do |checksum, content|
|
381
|
+
time = checksum2time[checksum]
|
382
|
+
if ((content.first_appearance_time <=> time) == 0)
|
383
|
+
mod_db.add_content(content)
|
384
|
+
else
|
385
|
+
mod_db.add_content(Content.new(checksum, content.size, time))
|
386
|
+
end
|
387
|
+
end
|
178
388
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
389
|
+
# add instance entries to the output table. in need of case update time field with found min time
|
390
|
+
checksum2instances.each do |checksum, instances|
|
391
|
+
time = checksum2time[checksum]
|
392
|
+
instances.each do |instance|
|
393
|
+
if ((instance.modification_time <=> time) == 0)
|
394
|
+
mod_db.add_instance(instance)
|
395
|
+
else # must be bigger then found min time
|
396
|
+
mod_instance = ContentInstance.new(instance.checksum, instance.size,
|
397
|
+
instance.server_name, instance.device,
|
398
|
+
instance.full_path, time)
|
399
|
+
mod_db.add_instance(mod_instance)
|
189
400
|
end
|
190
|
-
|
191
|
-
return true
|
401
|
+
end
|
192
402
|
end
|
403
|
+
mod_db
|
404
|
+
end
|
193
405
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
406
|
+
# Validates index against file system that all instances hold a correct data regarding files
|
407
|
+
# that they represrents.
|
408
|
+
#
|
409
|
+
# There are two levels of validation, controlled by instance_check_level system parameter:
|
410
|
+
# * shallow - quick, tests instance for file existence and attributes.
|
411
|
+
# * deep - can take more time, in addition to shallow recalculates hash sum.
|
412
|
+
# @param [Hash] params hash of parameters of validation, can be used to return additional data.
|
413
|
+
#
|
414
|
+
# Supported key/value combinations:
|
415
|
+
# * key is <tt>:failed</tt> value is <tt>ContentData</tt> used to return failed instances
|
416
|
+
# @return [Boolean] true when index is correct, false otherwise
|
417
|
+
def validate(params = nil)
|
418
|
+
# used to answer whether specific param was set
|
419
|
+
param_exists = Proc.new do |param|
|
420
|
+
!(params.nil? || params[param].nil?)
|
205
421
|
end
|
206
422
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
423
|
+
# used to process method parameters centrally
|
424
|
+
process_params = Proc.new do |values|
|
425
|
+
# values is a Hash with keys: :content, :instance and value appropriate to key
|
426
|
+
if param_exists.call :failed
|
427
|
+
unless values[:content].nil?
|
428
|
+
params[:failed].add_content values[:content]
|
429
|
+
end
|
430
|
+
unless values[:instance].nil?
|
431
|
+
# appropriate content should be already added
|
432
|
+
params[:failed].add_instance values[:instance]
|
433
|
+
end
|
434
|
+
end
|
211
435
|
end
|
212
436
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
add_content(Content.new(parameters[0],
|
221
|
-
parameters[1].to_i,
|
222
|
-
ContentData.parse_time(parameters[2])))
|
223
|
-
i += 1
|
224
|
-
}
|
225
|
-
|
226
|
-
number_of_instances = lines[i].to_i
|
227
|
-
i += 1
|
228
|
-
number_of_instances.times {
|
229
|
-
parameters = lines[i].split(',')
|
230
|
-
# bugfix: if file name consist a comma then parsing based on comma separating fails
|
231
|
-
if (parameters.size > 6)
|
232
|
-
(5..parameters.size-2).each do |i|
|
233
|
-
parameters[4] = [parameters[4], parameters[i]].join(",")
|
234
|
-
end
|
235
|
-
(5..parameters.size-2).each do |i|
|
236
|
-
parameters.delete_at(5)
|
237
|
-
end
|
437
|
+
is_valid = true
|
438
|
+
instances.each_value do |instance|
|
439
|
+
unless check_instance instance
|
440
|
+
is_valid = false
|
441
|
+
|
442
|
+
unless params.nil? || params.empty?
|
443
|
+
process_params.call :content => contents[instance.checksum], :instance => instance
|
238
444
|
end
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
is_valid
|
449
|
+
end
|
239
450
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
451
|
+
def shallow_check(instance)
|
452
|
+
path = instance.full_path
|
453
|
+
is_valid = true
|
454
|
+
|
455
|
+
if (File.exists?(path))
|
456
|
+
if File.size(path) != instance.size
|
457
|
+
is_valid = false
|
458
|
+
err_msg = "#{path} size #{File.size(path)} differs from indexed size #{instance.size}"
|
459
|
+
Log.warning err_msg
|
460
|
+
end
|
461
|
+
#if ContentData.format_time(File.mtime(path)) != instance.modification_time
|
462
|
+
if File.mtime(path).to_i != instance.modification_time.to_i
|
463
|
+
is_valid = false
|
464
|
+
err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
|
465
|
+
+ "indexed #{instance.modification_time}"
|
466
|
+
Log.warning err_msg
|
467
|
+
end
|
468
|
+
else
|
469
|
+
is_valid = false
|
470
|
+
err_msg = "Indexed file #{path} doesn't exist"
|
471
|
+
Log.warning err_msg
|
248
472
|
end
|
473
|
+
is_valid
|
474
|
+
end
|
249
475
|
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
476
|
+
def deep_check(instance)
|
477
|
+
if shallow_check(instance)
|
478
|
+
path = instance.full_path
|
479
|
+
current_checksum = FileIndexing::IndexAgent.get_checksum(path)
|
480
|
+
if instance.checksum == current_checksum
|
481
|
+
true
|
482
|
+
else
|
483
|
+
err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance.checksum}"
|
484
|
+
Log.warning err_msg
|
485
|
+
false
|
486
|
+
end
|
487
|
+
else
|
488
|
+
false
|
254
489
|
end
|
490
|
+
end
|
255
491
|
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
492
|
+
def check_instance(instance)
|
493
|
+
case Params['instance_check_level']
|
494
|
+
when 'deep'
|
495
|
+
deep_check instance
|
496
|
+
when 'shallow'
|
497
|
+
shallow_check instance
|
498
|
+
else
|
499
|
+
# TODO remove it when params will support set of values
|
500
|
+
throw ArgumentError.new "Unsupported check level #{Params['instance_check_level']}"
|
260
501
|
end
|
502
|
+
end
|
261
503
|
|
262
|
-
# merges content data a and content data b to a new content data and returns it.
|
263
|
-
def self.merge(a, b)
|
264
|
-
return b unless not a.nil?
|
265
|
-
return a unless not b.nil?
|
266
504
|
|
267
|
-
|
268
|
-
|
505
|
+
# TODO simplify conditions
|
506
|
+
# This mehod is experimental and shouldn\'t be used
|
507
|
+
# nil is used to define +/- infinity for to/from method arguments
|
508
|
+
# from/to values are exlusive in condition'a calculations
|
509
|
+
# Need to take care about '==' operation that is used for object's comparison.
|
510
|
+
# In need of case user should define it's own '==' implemementation.
|
511
|
+
def get_query(variable, params)
|
512
|
+
raise RuntimeError.new 'This method is experimental and shouldn\'t be used'
|
269
513
|
|
270
|
-
|
271
|
-
|
272
|
-
|
514
|
+
exact = params['exact'].nil? ? Array.new : params['exact']
|
515
|
+
from = params['from']
|
516
|
+
to = params ['to']
|
517
|
+
is_inside = params['is_inside']
|
273
518
|
|
274
|
-
|
519
|
+
unless ContentInstance.new.instance_variable_defined?("@#{attribute}")
|
520
|
+
raise ArgumentError "#{variable} isn't a ContentInstance variable"
|
275
521
|
end
|
276
522
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
return nil unless b.instance_of?ContentData
|
523
|
+
if (exact.nil? && from.nil? && to.nil?)
|
524
|
+
raise ArgumentError 'At least one of the argiments {exact, from, to} must be defined'
|
525
|
+
end
|
281
526
|
|
282
|
-
|
527
|
+
if (!(from.nil? || to.nil?) && from.kind_of?(to.class))
|
528
|
+
raise ArgumentError 'to and from arguments should be comparable one with another'
|
529
|
+
end
|
283
530
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
531
|
+
# FIXME add support for from/to for Strings
|
532
|
+
if ((!from.nil? && !from.kind_of?(Numeric.new.class))\
|
533
|
+
|| (!to.nil? && to.kind_of?(Numeric.new.class)))
|
534
|
+
raise ArgumentError 'from and to options supported only for numeric values'
|
535
|
+
end
|
288
536
|
|
289
|
-
|
537
|
+
if (!exact.empty? && (!from.nil? || !to.nil?))
|
538
|
+
raise ArgumentError 'exact and from/to options are mutually exclusive'
|
539
|
+
end
|
290
540
|
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
}
|
541
|
+
result_index = ContentData.new
|
542
|
+
instances.each_value do |instance|
|
543
|
+
is_match = false
|
544
|
+
var_value = instance.instance_variable_get("@#{variable}")
|
295
545
|
|
296
|
-
|
297
|
-
|
298
|
-
|
546
|
+
if exact.include? var_value
|
547
|
+
is_match = true
|
548
|
+
elsif (from.nil? || var_value > from) && (to.nil? || var_value < to)
|
549
|
+
is_match = true
|
550
|
+
end
|
551
|
+
|
552
|
+
if (is_match && is_inside) || (!is_match && !is_inside)
|
553
|
+
checksum = instance.checksum
|
554
|
+
result_index.add_content(contents[checksum]) unless result_index.content_exists(checksum)
|
555
|
+
result_index.add_instance instance
|
556
|
+
end
|
299
557
|
end
|
558
|
+
result_index
|
559
|
+
end
|
300
560
|
|
301
|
-
|
302
|
-
|
303
|
-
return nil unless b.instance_of?ContentData
|
561
|
+
private :shallow_check, :deep_check, :check_instance
|
562
|
+
end
|
304
563
|
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
564
|
+
# Validates index against file system that all instances hold a correct data regarding files
|
565
|
+
# that they represrents.
|
566
|
+
#
|
567
|
+
# There are two levels of validation, controlled by instance_check_level system parameter:
|
568
|
+
# * shallow - quick, tests instance for file existence and attributes.
|
569
|
+
# * deep - can take more time, in addition to shallow recalculates hash sum.
|
570
|
+
# @param [Hash] params hash of parameters of validation, can be used to return additional data.
|
571
|
+
#
|
572
|
+
# Supported key/value combinations:
|
573
|
+
# * key is <tt>:failed</tt> value is <tt>ContentData</tt> used to return failed instances
|
574
|
+
# @return [Boolean] true when index is correct, false otherwise
|
575
|
+
# @raise [ArgumentError] when instance_check_level is incorrect
|
576
|
+
def validate(params = nil)
|
577
|
+
# used to answer whether specific param was set
|
578
|
+
param_exists = Proc.new do |param|
|
579
|
+
!(params.nil? || params[param].nil?)
|
580
|
+
end
|
581
|
+
|
582
|
+
# used to process method parameters centrally
|
583
|
+
process_params = Proc.new do |values|
|
584
|
+
# values is a Hash with keys: :content, :instance and value appropriate to key
|
585
|
+
if param_exists.call :failed
|
586
|
+
unless values[:content].nil?
|
587
|
+
params[:failed].add_content values[:content]
|
588
|
+
end
|
589
|
+
unless values[:instance].nil?
|
590
|
+
# appropriate content should be already added
|
591
|
+
params[:failed].add_instance values[:instance]
|
311
592
|
end
|
312
|
-
return ret
|
313
593
|
end
|
594
|
+
end
|
314
595
|
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
if instance.global_path.scan(global_dir_path).size == 0
|
323
|
-
Log.debug3("Adding instance.")
|
324
|
-
ret.add_content(cd.contents[instance.checksum])
|
325
|
-
ret.add_instance(instance)
|
326
|
-
end
|
596
|
+
is_valid = true
|
597
|
+
instances.each_value do |instance|
|
598
|
+
unless check_instance instance
|
599
|
+
is_valid = false
|
600
|
+
|
601
|
+
unless params.nil? || params.empty?
|
602
|
+
process_params.call :content => contents[instance.checksum], :instance => instance
|
327
603
|
end
|
328
|
-
return ret
|
329
604
|
end
|
605
|
+
end
|
606
|
+
|
607
|
+
is_valid
|
608
|
+
end
|
609
|
+
|
610
|
+
def shallow_check(instance)
|
611
|
+
path = instance.full_path
|
612
|
+
is_valid = true
|
330
613
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
614
|
+
if (File.exists?(path))
|
615
|
+
if File.size(path) != instance.size
|
616
|
+
is_valid = false
|
617
|
+
err_msg = "#{path} size #{File.size(path)} differs from indexed size #{instance.size}"
|
618
|
+
Log.warning err_msg
|
335
619
|
end
|
620
|
+
#if ContentData.format_time(File.mtime(path)) != instance.modification_time
|
621
|
+
if File.mtime(path).to_i != instance.modification_time.to_i
|
622
|
+
is_valid = false
|
623
|
+
err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
|
624
|
+
+ "indexed #{instance.modification_time}"
|
625
|
+
Log.warning err_msg
|
626
|
+
end
|
627
|
+
else
|
628
|
+
is_valid = false
|
629
|
+
err_msg = "Indexed file #{path} doesn't exist"
|
630
|
+
Log.warning err_msg
|
631
|
+
end
|
632
|
+
is_valid
|
633
|
+
end
|
336
634
|
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
635
|
+
def deep_check(instance)
|
636
|
+
if shallow_check(instance)
|
637
|
+
path = instance.full_path
|
638
|
+
current_checksum = FileIndexing::IndexAgent.get_checksum(path)
|
639
|
+
if instance.checksum == current_checksum
|
640
|
+
true
|
641
|
+
else
|
642
|
+
err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance.checksum}"
|
643
|
+
Log.warning err_msg
|
644
|
+
false
|
645
|
+
end
|
646
|
+
else
|
647
|
+
false
|
648
|
+
end
|
649
|
+
end
|
342
650
|
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
651
|
+
# @raise [ArgumentError] when instance_check_level is incorrect
|
652
|
+
def check_instance(instance)
|
653
|
+
case Params['instance_check_level']
|
654
|
+
when 'deep'
|
655
|
+
deep_check instance
|
656
|
+
when 'shallow'
|
657
|
+
shallow_check instance
|
658
|
+
else
|
659
|
+
# TODO remove it when params will support set of values
|
660
|
+
throw ArgumentError.new "Unsupported check level #{Params['instance_check_level']}"
|
661
|
+
end
|
662
|
+
end
|
347
663
|
|
348
|
-
unless (checksum2instances.has_key? checksum)
|
349
|
-
checksum2instances[checksum] = []
|
350
|
-
end
|
351
|
-
checksum2instances[checksum] << instance
|
352
664
|
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
665
|
+
# TODO simplify conditions
|
666
|
+
# This mehod is experimental and shouldn\'t be used
|
667
|
+
# nil is used to define +/- infinity for to/from method arguments
|
668
|
+
# from/to values are exlusive in condition'a calculations
|
669
|
+
# Need to take care about '==' operation that is used for object's comparison.
|
670
|
+
# In need of case user should define it's own '==' implemementation.
|
671
|
+
def get_query(variable, params)
|
672
|
+
raise RuntimeError.new 'This method is experimental and shouldn\'t be used'
|
359
673
|
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
checksum2time[checksum] = time
|
365
|
-
elsif ((checksum2time[checksum] <=> time) > 0)
|
366
|
-
checksum2time[checksum] = time
|
367
|
-
end
|
368
|
-
end
|
674
|
+
exact = params['exact'].nil? ? Array.new : params['exact']
|
675
|
+
from = params['from']
|
676
|
+
to = params ['to']
|
677
|
+
is_inside = params['is_inside']
|
369
678
|
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
if ((content.first_appearance_time <=> time) == 0)
|
374
|
-
mod_db.add_content(content)
|
375
|
-
else
|
376
|
-
mod_db.add_content(Content.new(checksum, content.size, time))
|
377
|
-
end
|
378
|
-
end
|
679
|
+
unless ContentInstance.new.instance_variable_defined?("@#{attribute}")
|
680
|
+
raise ArgumentError "#{variable} isn't a ContentInstance variable"
|
681
|
+
end
|
379
682
|
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
683
|
+
if (exact.nil? && from.nil? && to.nil?)
|
684
|
+
raise ArgumentError 'At least one of the argiments {exact, from, to} must be defined'
|
685
|
+
end
|
686
|
+
|
687
|
+
if (!(from.nil? || to.nil?) && from.kind_of?(to.class))
|
688
|
+
raise ArgumentError 'to and from arguments should be comparable one with another'
|
689
|
+
end
|
690
|
+
|
691
|
+
# FIXME add support for from/to for Strings
|
692
|
+
if ((!from.nil? && !from.kind_of?(Numeric.new.class))\
|
693
|
+
|| (!to.nil? && to.kind_of?(Numeric.new.class)))
|
694
|
+
raise ArgumentError 'from and to options supported only for numeric values'
|
695
|
+
end
|
696
|
+
|
697
|
+
if (!exact.empty? && (!from.nil? || !to.nil?))
|
698
|
+
raise ArgumentError 'exact and from/to options are mutually exclusive'
|
699
|
+
end
|
700
|
+
|
701
|
+
result_index = ContentData.new
|
702
|
+
instances.each_value do |instance|
|
703
|
+
is_match = false
|
704
|
+
var_value = instance.instance_variable_get("@#{variable}")
|
705
|
+
|
706
|
+
if exact.include? var_value
|
707
|
+
is_match = true
|
708
|
+
elsif (from.nil? || var_value > from) && (to.nil? || var_value < to)
|
709
|
+
is_match = true
|
710
|
+
end
|
711
|
+
|
712
|
+
if (is_match && is_inside) || (!is_match && !is_inside)
|
713
|
+
checksum = instance.checksum
|
714
|
+
result_index.add_content(contents[checksum]) unless result_index.content_exists(checksum)
|
715
|
+
result_index.add_instance instance
|
395
716
|
end
|
396
717
|
end
|
718
|
+
result_index
|
397
719
|
end
|
720
|
+
|
721
|
+
private :shallow_check, :deep_check, :check_instance, :get_query
|
398
722
|
end
|
723
|
+
|