content_data 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/content_data/content_data.rb +394 -502
- data/lib/content_data/dynamic_content_data.rb +44 -3
- data/lib/content_data/version.rb +1 -1
- data/test/content_data/content_data_test.rb +281 -135
- metadata +2 -2
|
@@ -1,210 +1,253 @@
|
|
|
1
|
+
require 'content_server/globals'
|
|
1
2
|
require 'log'
|
|
2
3
|
require 'params'
|
|
3
|
-
require 'time'
|
|
4
4
|
|
|
5
5
|
module ContentData
|
|
6
6
|
Params.string('instance_check_level', 'shallow', 'Defines check level. Supported levels are: ' \
|
|
7
7
|
'shallow - quick, tests instance for file existence and attributes. ' \
|
|
8
8
|
'deep - can take more time, in addition to shallow recalculates hash sum.')
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
10
|
+
# Content Data(CD) object holds files information as contents and instances
|
|
11
|
+
# Files info retrieved from hardware: checksum, size, time modification, server, device and path
|
|
12
|
+
# Those attributes are divided into content and instance attributes:
|
|
13
|
+
# unique checksum, size are content attributes
|
|
14
|
+
# time modification, server, device and path are instance attributes
|
|
15
|
+
# The relationship between content and instances is 1:many meaning that
|
|
16
|
+
# a content can have instances in many servers.
|
|
17
|
+
# content also has time attribute, which has the value of the time of the first instance.
|
|
18
|
+
# This can be changed by using unify_time method which sets all time attributes for a content and it's
|
|
19
|
+
# instances to the min time off all.
|
|
20
|
+
# Different files(instances) with same content(checksum), are grouped together under that content.
|
|
21
|
+
# Interface methods include:
|
|
22
|
+
# iterate over contents and instances info,
|
|
23
|
+
# unify time, add/remove instance, queries, merge, remove directory and more.
|
|
24
|
+
# Content info data structure:
|
|
25
|
+
# @contents_info = { Checksum -> [size, *instances*, content_modification_time] }
|
|
26
|
+
# *instances* = {[server,path] -> instance_modification_time }
|
|
27
|
+
# Notes:
|
|
28
|
+
# 1. content_modification_time is the instance_modification_time of the first
|
|
29
|
+
# instances which was added to @contents_info
|
|
30
|
+
class ContentData
|
|
30
31
|
|
|
32
|
+
def initialize(other = nil)
|
|
33
|
+
ObjectSpace.define_finalizer(self,
|
|
34
|
+
self.class.method(:finalize).to_proc)
|
|
35
|
+
if Params['enable_monitoring']
|
|
36
|
+
::ContentServer::Globals.process_vars.inc('obj add ContentData')
|
|
37
|
+
end
|
|
38
|
+
if other.nil?
|
|
39
|
+
@contents_info = {} # Checksum --> [size, paths-->time(instance), time(content)]
|
|
31
40
|
else
|
|
32
|
-
@
|
|
33
|
-
@size = size
|
|
34
|
-
@first_appearance_time = first_appearance_time
|
|
41
|
+
@contents_info = other.clone_contents_info
|
|
35
42
|
end
|
|
36
43
|
end
|
|
37
44
|
|
|
38
|
-
def
|
|
39
|
-
|
|
45
|
+
def self.finalize(id)
|
|
46
|
+
if Params['enable_monitoring']
|
|
47
|
+
::ContentServer::Globals.process_vars.inc('obj rem ContentData')
|
|
48
|
+
end
|
|
40
49
|
end
|
|
41
50
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
51
|
+
# getting a cloned data base
|
|
52
|
+
def clone_contents_info
|
|
53
|
+
@contents_info.keys.inject({}) { |clone_contents_info, checksum|
|
|
54
|
+
instances = @contents_info[checksum]
|
|
55
|
+
size = instances[0]
|
|
56
|
+
content_time = instances[2]
|
|
57
|
+
instances_db = instances[1]
|
|
58
|
+
instances_db_cloned = {}
|
|
59
|
+
instances_db.keys.each { |location|
|
|
60
|
+
instance_mtime = instances_db[location]
|
|
61
|
+
instances_db_cloned[[location[0].clone,location[1].clone]]=instance_mtime
|
|
62
|
+
}
|
|
63
|
+
clone_contents_info[checksum] = [size,
|
|
64
|
+
instances_db_cloned,
|
|
65
|
+
content_time]
|
|
66
|
+
clone_contents_info
|
|
67
|
+
}
|
|
46
68
|
end
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
class ContentInstance
|
|
50
|
-
attr_reader :checksum, :size, :server_name, :device, :full_path, :modification_time
|
|
51
69
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
raise ArgumentError.new("size have to be defined")
|
|
61
|
-
else
|
|
62
|
-
@size = content_instance_serializer.size
|
|
63
|
-
end
|
|
64
|
-
if (content_instance_serializer.modification_time == nil)
|
|
65
|
-
raise ArgumentError.new("modification_time have to be defined")
|
|
66
|
-
else
|
|
67
|
-
@modification_time = ContentData.parse_time(content_instance_serializer.modification_time)
|
|
68
|
-
end
|
|
69
|
-
if (content_instance_serializer.server_name == nil)
|
|
70
|
-
raise ArgumentError.new("server_name have to be defined")
|
|
71
|
-
else
|
|
72
|
-
@server_name = content_instance_serializer.server_name
|
|
73
|
-
end
|
|
74
|
-
if (content_instance_serializer.device == nil)
|
|
75
|
-
raise ArgumentError.new("device have to be defined")
|
|
76
|
-
else
|
|
77
|
-
@device = content_instance_serializer.device
|
|
78
|
-
end
|
|
79
|
-
if (content_instance_serializer.full_path == nil)
|
|
80
|
-
raise ArgumentError.new("full_path have to be defined")
|
|
81
|
-
else
|
|
82
|
-
@full_path = content_instance_serializer.full_path
|
|
83
|
-
end
|
|
84
|
-
else
|
|
85
|
-
@checksum = checksum
|
|
86
|
-
@size = size
|
|
87
|
-
@server_name = server_name
|
|
88
|
-
@device = device
|
|
89
|
-
@full_path = full_path
|
|
90
|
-
@modification_time = modification_time
|
|
91
|
-
end
|
|
70
|
+
# iterator over @contents_info data structure (not including instances)
|
|
71
|
+
# block is provided with: checksum, size and content modification time
|
|
72
|
+
def each_content(&block)
|
|
73
|
+
@contents_info.keys.each { |checksum|
|
|
74
|
+
content_val = @contents_info[checksum]
|
|
75
|
+
# provide checksum, size and content modification time to the block
|
|
76
|
+
block.call(checksum,content_val[0], content_val[2])
|
|
77
|
+
}
|
|
92
78
|
end
|
|
93
79
|
|
|
94
|
-
|
|
95
|
-
|
|
80
|
+
# iterator over @contents_info data structure (including instances)
|
|
81
|
+
# block is provided with: checksum, size, content modification time,
|
|
82
|
+
# instance modification time, server and file path
|
|
83
|
+
def each_instance(&block)
|
|
84
|
+
@contents_info.keys.each { |checksum|
|
|
85
|
+
content_info = @contents_info[checksum]
|
|
86
|
+
content_info[1].keys.each {|location|
|
|
87
|
+
# provide the block with: checksum, size, content modification time,instance modification time,
|
|
88
|
+
# server and path.
|
|
89
|
+
instance_modification_time = content_info[1][location]
|
|
90
|
+
block.call(checksum,content_info[0], content_info[2], instance_modification_time,
|
|
91
|
+
location[0], location[1])
|
|
92
|
+
}
|
|
93
|
+
}
|
|
96
94
|
end
|
|
97
95
|
|
|
98
|
-
|
|
99
|
-
|
|
96
|
+
# iterator of instances over specific content
|
|
97
|
+
# block is provided with: checksum, size, content modification time,
|
|
98
|
+
# instance modification time, server and file path
|
|
99
|
+
def content_each_instance(checksum, &block)
|
|
100
|
+
content_info = @contents_info[checksum]
|
|
101
|
+
content_info[1].keys.each {|location|
|
|
102
|
+
# provide the block with: checksum, size, content modification time,instance modification time,
|
|
103
|
+
# server and path.
|
|
104
|
+
instance_modification_time = content_info[1][location]
|
|
105
|
+
block.call(checksum,content_info[0], content_info[2], instance_modification_time,
|
|
106
|
+
location[0], location[1])
|
|
107
|
+
}
|
|
100
108
|
end
|
|
101
109
|
|
|
102
|
-
def
|
|
103
|
-
|
|
104
|
-
@device, @full_path, ContentData.format_time(@modification_time)]
|
|
110
|
+
def contents_size()
|
|
111
|
+
@contents_info.size
|
|
105
112
|
end
|
|
106
113
|
|
|
107
|
-
def
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
self.device.eql? other.device and
|
|
112
|
-
self.full_path.eql? other.full_path and
|
|
113
|
-
self.modification_time.to_i.eql? other.modification_time.to_i)
|
|
114
|
+
def instances_size(checksum)
|
|
115
|
+
content_info = @contents_info[checksum]
|
|
116
|
+
return 0 if content_info.nil?
|
|
117
|
+
content_info[1].size
|
|
114
118
|
end
|
|
115
|
-
end
|
|
116
119
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
120
|
+
def get_instance_mod_time(checksum, location)
|
|
121
|
+
content_info = @contents_info[checksum]
|
|
122
|
+
return nil if content_info.nil?
|
|
123
|
+
instances = content_info[1]
|
|
124
|
+
instance_time = instances[location]
|
|
125
|
+
end
|
|
123
126
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
@
|
|
127
|
+
def add_instance(checksum, size, server, path, modification_time)
|
|
128
|
+
location = [server, path]
|
|
129
|
+
content_info = @contents_info[checksum]
|
|
130
|
+
if content_info.nil?
|
|
131
|
+
@contents_info[checksum] = [size,
|
|
132
|
+
{location => modification_time},
|
|
133
|
+
modification_time]
|
|
129
134
|
else
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
135
|
+
if size != content_info[0]
|
|
136
|
+
Log.warning 'File size different from content size while same checksum'
|
|
137
|
+
Log.warning("instance location:server:'#{location[0]}' path:'#{location[1]}'")
|
|
138
|
+
Log.warning("instance mod time:'#{modification_time}'")
|
|
139
|
+
end
|
|
140
|
+
#override file if needed
|
|
141
|
+
content_info[0] = size
|
|
142
|
+
instances = content_info[1]
|
|
143
|
+
instances[location] = modification_time
|
|
133
144
|
end
|
|
134
145
|
end
|
|
135
146
|
|
|
136
|
-
def
|
|
137
|
-
@
|
|
147
|
+
def empty?
|
|
148
|
+
@contents_info.empty?
|
|
138
149
|
end
|
|
139
150
|
|
|
140
|
-
def
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
" checksum %s does not exists.\n", instance.checksum)
|
|
144
|
-
Log.warning sprintf("%s\n", instance.to_s)
|
|
145
|
-
return false
|
|
146
|
-
elsif (@contents[instance.checksum].size != instance.size)
|
|
147
|
-
Log.warning 'File size different from content size while same checksum'
|
|
148
|
-
Log.warning instance.to_s
|
|
149
|
-
return false
|
|
150
|
-
end
|
|
151
|
+
def content_exists(checksum)
|
|
152
|
+
@contents_info.has_key?(checksum)
|
|
153
|
+
end
|
|
151
154
|
|
|
152
|
-
key = instance.global_path
|
|
153
155
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
+
# TODO (genadyp) consider about using hash for optional defining of parameters
|
|
157
|
+
def instance_exists(path, server, checksum=nil)
|
|
158
|
+
location = [server, path]
|
|
159
|
+
if checksum.nil?
|
|
160
|
+
@contents_info.values.any? { |content_db|
|
|
161
|
+
content_db[1].has_key?(location)
|
|
162
|
+
}
|
|
163
|
+
else
|
|
164
|
+
content_info = @contents_info[checksum]
|
|
165
|
+
return false if content_info.nil?
|
|
166
|
+
content_info[1].has_key?(location)
|
|
167
|
+
end
|
|
156
168
|
end
|
|
157
169
|
|
|
158
|
-
def
|
|
159
|
-
@
|
|
170
|
+
def stats_by_location(location)
|
|
171
|
+
@contents_info.each_value { |content_db|
|
|
172
|
+
if content_db[1].has_key?(location)
|
|
173
|
+
return [content_db[0], content_db[1][location]]
|
|
174
|
+
end
|
|
175
|
+
}
|
|
176
|
+
return nil
|
|
160
177
|
end
|
|
161
178
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
179
|
+
|
|
180
|
+
# removes an instance from known content (faster then unknown content)
|
|
181
|
+
# remove also the content, if content becomes empty
|
|
182
|
+
def remove_instance(location, checksum=nil)
|
|
183
|
+
if checksum.nil?
|
|
184
|
+
@contents_info.keys.each { |checksum|
|
|
185
|
+
instances = @contents_info[checksum][1]
|
|
186
|
+
instances.delete(location)
|
|
187
|
+
@contents_info.delete(checksum) if instances.empty?
|
|
188
|
+
}
|
|
189
|
+
else
|
|
190
|
+
content_info = @contents_info[checksum]
|
|
191
|
+
unless content_info.nil?
|
|
192
|
+
instances = content_info[1]
|
|
193
|
+
instances.delete(location)
|
|
194
|
+
@contents_info.delete(checksum) if instances.empty?
|
|
195
|
+
end
|
|
196
|
+
end
|
|
165
197
|
end
|
|
166
198
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
199
|
+
def remove_directory(dir_to_remove, server)
|
|
200
|
+
@contents_info.keys.each { |checksum|
|
|
201
|
+
instances = @contents_info[checksum][1]
|
|
202
|
+
instances.delete_if { |location, _|
|
|
203
|
+
location[0] == server and location[1].scan(dir_to_remove).size > 0
|
|
204
|
+
}
|
|
205
|
+
@contents_info.delete(checksum) if instances.empty?
|
|
174
206
|
}
|
|
175
207
|
end
|
|
176
208
|
|
|
209
|
+
|
|
177
210
|
def ==(other)
|
|
178
|
-
return false if other
|
|
179
|
-
return false
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
if
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
end
|
|
211
|
+
return false if other.nil?
|
|
212
|
+
return false if @contents_info.size != other.contents_size
|
|
213
|
+
other.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
|
214
|
+
local_content_info = @contents_info[checksum]
|
|
215
|
+
return false if local_content_info.nil?
|
|
216
|
+
return false if local_content_info[0] != size
|
|
217
|
+
return false if local_content_info[2] != content_mod_time
|
|
218
|
+
#check instances
|
|
219
|
+
local_instances = local_content_info[1]
|
|
220
|
+
return false if other.instances_size(checksum) != local_instances.size
|
|
221
|
+
location = [server, path]
|
|
222
|
+
local_instance_mod_time = local_instances[location]
|
|
223
|
+
return false if local_instance_mod_time.nil?
|
|
224
|
+
return false if local_instance_mod_time != instance_mod_time
|
|
193
225
|
}
|
|
194
|
-
|
|
226
|
+
true
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def remove_content(checksum)
|
|
230
|
+
@contents_info.delete(checksum)
|
|
195
231
|
end
|
|
196
232
|
|
|
197
233
|
def to_s
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
234
|
+
return_str = ""
|
|
235
|
+
contents_str = ""
|
|
236
|
+
instances_str = ""
|
|
237
|
+
instances_counter = 0
|
|
238
|
+
each_content { |checksum, size, content_mod_time|
|
|
239
|
+
contents_str << "%s,%d,%d\n" % [checksum, size, content_mod_time]
|
|
202
240
|
}
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
241
|
+
instances_counter = 0
|
|
242
|
+
each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
|
243
|
+
instances_counter += 1
|
|
244
|
+
instances_str << "%s,%d,%s,%s,%d\n" % [checksum, size, server, path, instance_mod_time]
|
|
206
245
|
}
|
|
207
|
-
|
|
246
|
+
return_str << "%d\n" % [@contents_info.size]
|
|
247
|
+
return_str << contents_str
|
|
248
|
+
return_str << "%d\n" % [instances_counter]
|
|
249
|
+
return_str << instances_str
|
|
250
|
+
return_str
|
|
208
251
|
end
|
|
209
252
|
|
|
210
253
|
def to_file(filename)
|
|
@@ -216,192 +259,61 @@ module ContentData
|
|
|
216
259
|
# TODO validation that file indeed contains ContentData missing
|
|
217
260
|
def from_file(filename)
|
|
218
261
|
lines = IO.readlines(filename)
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
i += 1
|
|
222
|
-
number_of_contents.times {
|
|
223
|
-
parameters = lines[i].split(",")
|
|
224
|
-
add_content(Content.new(parameters[0],
|
|
225
|
-
parameters[1].to_i,
|
|
226
|
-
ContentData.parse_time(parameters[2])))
|
|
227
|
-
i += 1
|
|
228
|
-
}
|
|
229
|
-
|
|
262
|
+
number_of_contents = lines[0].to_i
|
|
263
|
+
i = 1 + number_of_contents
|
|
230
264
|
number_of_instances = lines[i].to_i
|
|
231
265
|
i += 1
|
|
232
266
|
number_of_instances.times {
|
|
233
267
|
if lines[i].nil?
|
|
234
|
-
Log.
|
|
268
|
+
Log.warning "line ##{i} is nil !!!, Backing filename: #{filename} to #{filename}.bad"
|
|
235
269
|
FileUtils.cp(filename, "#{filename}.bad")
|
|
236
|
-
Log.
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
270
|
+
Log.warning("Lines:\n#{lines[i].join("\n")}")
|
|
271
|
+
else
|
|
272
|
+
parameters = lines[i].split(',')
|
|
273
|
+
# bugfix: if file name consist a comma then parsing based on comma separating fails
|
|
274
|
+
if (parameters.size > 5)
|
|
275
|
+
(4..parameters.size-2).each do |i|
|
|
276
|
+
parameters[3] = [parameters[3], parameters[i]].join(",")
|
|
277
|
+
end
|
|
278
|
+
(4..parameters.size-2).each do |i|
|
|
279
|
+
parameters.delete_at(4)
|
|
280
|
+
end
|
|
246
281
|
end
|
|
247
|
-
end
|
|
248
282
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
283
|
+
add_instance(parameters[0],
|
|
284
|
+
parameters[1].to_i,
|
|
285
|
+
parameters[2],
|
|
286
|
+
parameters[3],
|
|
287
|
+
parameters[4].to_i)
|
|
288
|
+
end
|
|
255
289
|
i += 1
|
|
256
290
|
}
|
|
257
291
|
end
|
|
258
292
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
# merges content data a and content data b to a new content data and returns it.
|
|
272
|
-
def self.merge(a, b)
|
|
273
|
-
return b unless not a.nil?
|
|
274
|
-
return a unless not b.nil?
|
|
275
|
-
|
|
276
|
-
return nil unless a.instance_of?ContentData
|
|
277
|
-
return nil unless b.instance_of?ContentData
|
|
278
|
-
|
|
279
|
-
ret = ContentData.new
|
|
280
|
-
ret.merge(a)
|
|
281
|
-
ret.merge(b)
|
|
282
|
-
|
|
283
|
-
return ret
|
|
284
|
-
end
|
|
285
|
-
|
|
286
|
-
# removed content data a from content data b and returns the new content data.
|
|
287
|
-
def self.remove(a, b)
|
|
288
|
-
return nil unless a.instance_of?ContentData
|
|
289
|
-
return nil unless b.instance_of?ContentData
|
|
290
|
-
|
|
291
|
-
ret = ContentData.new
|
|
292
|
-
|
|
293
|
-
b.contents.values.each { |content|
|
|
294
|
-
#print "%s - %s\n" % [content.checksum, a.content_exists(content.checksum).to_s]
|
|
295
|
-
ret.add_content(content) unless a.content_exists(content.checksum)
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
#Log.debug1 "kaka"
|
|
299
|
-
|
|
300
|
-
b.instances.values.each { |instance|
|
|
301
|
-
#print "%s - %s\n" % [instance.checksum, a.content_exists(instance.checksum).to_s]
|
|
302
|
-
ret.add_instance(instance) unless a.content_exists(instance.checksum)
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
#print "kuku %s" % ret.contents.size.to_s
|
|
306
|
-
#print "kuku %s" % ret.instances.size.to_s
|
|
307
|
-
return ret
|
|
308
|
-
end
|
|
309
|
-
|
|
310
|
-
def self.remove_instances(a, b)
|
|
311
|
-
return nil unless a.instance_of?ContentData
|
|
312
|
-
return nil unless b.instance_of?ContentData
|
|
313
|
-
|
|
314
|
-
ret = ContentData.new
|
|
315
|
-
b.instances.values.each do |instance|
|
|
316
|
-
if !a.instances.key?(instance.global_path)
|
|
317
|
-
ret.add_content(b.contents[instance.checksum])
|
|
318
|
-
ret.add_instance(instance)
|
|
319
|
-
end
|
|
320
|
-
end
|
|
321
|
-
return ret
|
|
322
|
-
end
|
|
323
|
-
|
|
324
|
-
def self.remove_directory(cd, global_dir_path)
|
|
325
|
-
return nil unless cd.instance_of?ContentData
|
|
326
|
-
|
|
327
|
-
ret = ContentData.new
|
|
328
|
-
cd.instances.values.each do |instance|
|
|
329
|
-
if instance.global_path.scan(global_dir_path).size == 0
|
|
330
|
-
ret.add_content(cd.contents[instance.checksum])
|
|
331
|
-
ret.add_instance(instance)
|
|
332
|
-
end
|
|
333
|
-
end
|
|
334
|
-
return ret
|
|
335
|
-
end
|
|
336
|
-
|
|
337
|
-
# returns the common content in both a and b
|
|
338
|
-
def self.intersect(a, b)
|
|
339
|
-
b_minus_a = ContentData.remove(a, b)
|
|
340
|
-
return ContentData.remove(b_minus_a, b)
|
|
341
|
-
end
|
|
342
|
-
|
|
343
|
-
# unify time for all entries with same content to minimal time
|
|
344
|
-
def self.unify_time(db)
|
|
345
|
-
mod_db = ContentData.new # resulting ContentData that will consists objects with unified time
|
|
346
|
-
checksum2time = Hash.new # key=checksum value=min_time_for_this_checksum
|
|
347
|
-
checksum2instances = Hash.new # key=checksum value=array_of_instances_with_this_checksum (Will be replaced with ContentData method)
|
|
348
|
-
|
|
349
|
-
# populate tables with given ContentData entries
|
|
350
|
-
db.instances.each_value do |instance|
|
|
351
|
-
checksum = instance.checksum
|
|
352
|
-
time = instance.modification_time
|
|
353
|
-
|
|
354
|
-
unless (checksum2instances.has_key? checksum)
|
|
355
|
-
checksum2instances[checksum] = []
|
|
356
|
-
end
|
|
357
|
-
checksum2instances[checksum] << instance
|
|
358
|
-
|
|
359
|
-
if (not checksum2time.has_key? checksum)
|
|
360
|
-
checksum2time[checksum] = time
|
|
361
|
-
elsif ((checksum2time[checksum] <=> time) > 0)
|
|
362
|
-
checksum2time[checksum] = time
|
|
363
|
-
end
|
|
364
|
-
end
|
|
365
|
-
|
|
366
|
-
# update min time table with time information from contents
|
|
367
|
-
db.contents.each do |checksum, content|
|
|
368
|
-
time = content.first_appearance_time
|
|
369
|
-
if (not checksum2time.has_key? checksum)
|
|
370
|
-
checksum2time[checksum] = time
|
|
371
|
-
elsif ((checksum2time[checksum] <=> time) > 0)
|
|
372
|
-
checksum2time[checksum] = time
|
|
373
|
-
end
|
|
374
|
-
end
|
|
375
|
-
|
|
376
|
-
# add content entries to the output table. in need of case update time field with found min time
|
|
377
|
-
db.contents.each do |checksum, content|
|
|
378
|
-
time = checksum2time[checksum]
|
|
379
|
-
if ((content.first_appearance_time <=> time) == 0)
|
|
380
|
-
mod_db.add_content(content)
|
|
381
|
-
else
|
|
382
|
-
mod_db.add_content(Content.new(checksum, content.size, time))
|
|
383
|
-
end
|
|
384
|
-
end
|
|
385
|
-
|
|
386
|
-
# add instance entries to the output table. in need of case update time field with found min time
|
|
387
|
-
checksum2instances.each do |checksum, instances|
|
|
388
|
-
time = checksum2time[checksum]
|
|
389
|
-
instances.each do |instance|
|
|
390
|
-
if ((instance.modification_time <=> time) == 0)
|
|
391
|
-
mod_db.add_instance(instance)
|
|
392
|
-
else # must be bigger then found min time
|
|
393
|
-
mod_instance = ContentInstance.new(instance.checksum, instance.size,
|
|
394
|
-
instance.server_name, instance.device,
|
|
395
|
-
instance.full_path, time)
|
|
396
|
-
mod_db.add_instance(mod_instance)
|
|
293
|
+
# for each content, all time fields (content and instances) are replaced with the
|
|
294
|
+
# min time found, while going through all time fields.
|
|
295
|
+
def unify_time()
|
|
296
|
+
@contents_info.keys.each { |checksum|
|
|
297
|
+
content_info = @contents_info[checksum]
|
|
298
|
+
min_time_per_checksum = content_info[2]
|
|
299
|
+
instances = content_info[1]
|
|
300
|
+
instances.keys.each { |location|
|
|
301
|
+
instance_mod_time = instances[location]
|
|
302
|
+
if instance_mod_time < min_time_per_checksum
|
|
303
|
+
min_time_per_checksum = instance_mod_time
|
|
397
304
|
end
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
305
|
+
}
|
|
306
|
+
# update all instances with min time
|
|
307
|
+
instances.keys.each { |location|
|
|
308
|
+
instances[location] = min_time_per_checksum
|
|
309
|
+
}
|
|
310
|
+
# update content time with min time
|
|
311
|
+
content_info[2] = min_time_per_checksum
|
|
312
|
+
}
|
|
401
313
|
end
|
|
402
314
|
|
|
403
315
|
# Validates index against file system that all instances hold a correct data regarding files
|
|
404
|
-
# that they
|
|
316
|
+
# that they represents.
|
|
405
317
|
#
|
|
406
318
|
# There are two levels of validation, controlled by instance_check_level system parameter:
|
|
407
319
|
# * shallow - quick, tests instance for file existence and attributes.
|
|
@@ -411,6 +323,7 @@ module ContentData
|
|
|
411
323
|
# Supported key/value combinations:
|
|
412
324
|
# * key is <tt>:failed</tt> value is <tt>ContentData</tt> used to return failed instances
|
|
413
325
|
# @return [Boolean] true when index is correct, false otherwise
|
|
326
|
+
# @raise [ArgumentError] when instance_check_level is incorrect
|
|
414
327
|
def validate(params = nil)
|
|
415
328
|
# used to answer whether specific param was set
|
|
416
329
|
param_exists = Proc.new do |param|
|
|
@@ -419,47 +332,65 @@ module ContentData
|
|
|
419
332
|
|
|
420
333
|
# used to process method parameters centrally
|
|
421
334
|
process_params = Proc.new do |values|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
unless
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
335
|
+
if param_exists.call(:failed)
|
|
336
|
+
info = values[:details]
|
|
337
|
+
unless info.nil?
|
|
338
|
+
checksum = info[0]
|
|
339
|
+
content_mtime = info[1]
|
|
340
|
+
size = info[2]
|
|
341
|
+
inst_mtime = info[3]
|
|
342
|
+
server = info[4]
|
|
343
|
+
file_path = info[5]
|
|
344
|
+
params[:failed].add_instance(checksum, size, server, file_path, inst_mtime)
|
|
430
345
|
end
|
|
431
346
|
end
|
|
432
347
|
end
|
|
433
348
|
|
|
434
349
|
is_valid = true
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
350
|
+
@contents_info.keys.each { |checksum|
|
|
351
|
+
instances = @contents_info[checksum]
|
|
352
|
+
content_size = instances[0]
|
|
353
|
+
content_mtime = instances[2]
|
|
354
|
+
instances[1].keys.each { |unique_path|
|
|
355
|
+
instance_mtime = instances[1][unique_path]
|
|
356
|
+
instance_info = [checksum, content_mtime, content_size, instance_mtime]
|
|
357
|
+
instance_info.concat(unique_path)
|
|
358
|
+
unless check_instance(instance_info)
|
|
359
|
+
is_valid = false
|
|
360
|
+
|
|
361
|
+
unless params.nil? || params.empty?
|
|
362
|
+
process_params.call({:details => instance_info})
|
|
363
|
+
end
|
|
441
364
|
end
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
365
|
+
}
|
|
366
|
+
}
|
|
445
367
|
is_valid
|
|
446
368
|
end
|
|
447
369
|
|
|
448
|
-
|
|
449
|
-
|
|
370
|
+
# instance_info is an array:
|
|
371
|
+
# [0] - checksum
|
|
372
|
+
# [1] - content time
|
|
373
|
+
# [2] - content size
|
|
374
|
+
# [3] - instance mtime
|
|
375
|
+
# [4] - server name
|
|
376
|
+
# [5] - file path
|
|
377
|
+
def shallow_check(instance_info)
|
|
378
|
+
path = instance_info[5]
|
|
379
|
+
size = instance_info[2]
|
|
380
|
+
instance_mtime = instance_info[3]
|
|
450
381
|
is_valid = true
|
|
451
382
|
|
|
452
383
|
if (File.exists?(path))
|
|
453
|
-
if File.size(path) !=
|
|
384
|
+
if File.size(path) != size
|
|
454
385
|
is_valid = false
|
|
455
|
-
err_msg = "#{path} size #{File.size(path)} differs from indexed size #{
|
|
386
|
+
err_msg = "#{path} size #{File.size(path)} differs from indexed size #{size}"
|
|
456
387
|
Log.warning err_msg
|
|
457
388
|
end
|
|
458
389
|
#if ContentData.format_time(File.mtime(path)) != instance.modification_time
|
|
459
|
-
if File.mtime(path).to_i !=
|
|
390
|
+
if File.mtime(path).to_i != instance_mtime
|
|
460
391
|
is_valid = false
|
|
461
|
-
err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
|
|
462
|
-
|
|
392
|
+
err_msg = "#{path} modification time #{File.mtime(path).to_i} differs from " \
|
|
393
|
+
+ "indexed #{instance_mtime}"
|
|
463
394
|
Log.warning err_msg
|
|
464
395
|
end
|
|
465
396
|
else
|
|
@@ -470,14 +401,22 @@ module ContentData
|
|
|
470
401
|
is_valid
|
|
471
402
|
end
|
|
472
403
|
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
404
|
+
# instance_info is an array:
|
|
405
|
+
# [0] - checksum
|
|
406
|
+
# [1] - content time
|
|
407
|
+
# [2] - content size
|
|
408
|
+
# [3] - instance mtime
|
|
409
|
+
# [4] - server name
|
|
410
|
+
# [5] - file path
|
|
411
|
+
def deep_check(instance_info)
|
|
412
|
+
if shallow_check(instance_info)
|
|
413
|
+
instance_checksum = instance_info[0]
|
|
414
|
+
path = instance_info[5]
|
|
476
415
|
current_checksum = FileIndexing::IndexAgent.get_checksum(path)
|
|
477
|
-
if
|
|
416
|
+
if instance_checksum == current_checksum
|
|
478
417
|
true
|
|
479
418
|
else
|
|
480
|
-
err_msg = "#{path} checksum #{current_checksum} differs from indexed #{
|
|
419
|
+
err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance_checksum}"
|
|
481
420
|
Log.warning err_msg
|
|
482
421
|
false
|
|
483
422
|
end
|
|
@@ -486,6 +425,7 @@ module ContentData
|
|
|
486
425
|
end
|
|
487
426
|
end
|
|
488
427
|
|
|
428
|
+
# @raise [ArgumentError] when instance_check_level is incorrect
|
|
489
429
|
def check_instance(instance)
|
|
490
430
|
case Params['instance_check_level']
|
|
491
431
|
when 'deep'
|
|
@@ -558,163 +498,115 @@ module ContentData
|
|
|
558
498
|
private :shallow_check, :deep_check, :check_instance
|
|
559
499
|
end
|
|
560
500
|
|
|
561
|
-
#
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
# @return [Boolean] true when index is correct, false otherwise
|
|
572
|
-
# @raise [ArgumentError] when instance_check_level is incorrect
|
|
573
|
-
def validate(params = nil)
|
|
574
|
-
# used to answer whether specific param was set
|
|
575
|
-
param_exists = Proc.new do |param|
|
|
576
|
-
!(params.nil? || params[param].nil?)
|
|
577
|
-
end
|
|
578
|
-
|
|
579
|
-
# used to process method parameters centrally
|
|
580
|
-
process_params = Proc.new do |values|
|
|
581
|
-
# values is a Hash with keys: :content, :instance and value appropriate to key
|
|
582
|
-
if param_exists.call :failed
|
|
583
|
-
unless values[:content].nil?
|
|
584
|
-
params[:failed].add_content values[:content]
|
|
585
|
-
end
|
|
586
|
-
unless values[:instance].nil?
|
|
587
|
-
# appropriate content should be already added
|
|
588
|
-
params[:failed].add_instance values[:instance]
|
|
589
|
-
end
|
|
590
|
-
end
|
|
591
|
-
end
|
|
592
|
-
|
|
593
|
-
is_valid = true
|
|
594
|
-
instances.each_value do |instance|
|
|
595
|
-
unless check_instance instance
|
|
596
|
-
is_valid = false
|
|
597
|
-
|
|
598
|
-
unless params.nil? || params.empty?
|
|
599
|
-
process_params.call :content => contents[instance.checksum], :instance => instance
|
|
600
|
-
end
|
|
601
|
-
end
|
|
602
|
-
end
|
|
603
|
-
|
|
604
|
-
is_valid
|
|
501
|
+
# merges content data a and content data b to a new content data and returns it.
|
|
502
|
+
def self.merge(a, b)
|
|
503
|
+
return ContentData.new(a) if b.nil?
|
|
504
|
+
return ContentData.new(b) if a.nil?
|
|
505
|
+
c = ContentData.new(b)
|
|
506
|
+
# Add A instances to content data c
|
|
507
|
+
a.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
|
508
|
+
c.add_instance(checksum, size, server, path, instance_mod_time)
|
|
509
|
+
}
|
|
510
|
+
c
|
|
605
511
|
end
|
|
606
512
|
|
|
607
|
-
def
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
Log.warning err_msg
|
|
616
|
-
end
|
|
617
|
-
#if ContentData.format_time(File.mtime(path)) != instance.modification_time
|
|
618
|
-
if File.mtime(path).to_i != instance.modification_time.to_i
|
|
619
|
-
is_valid = false
|
|
620
|
-
err_msg = "#{path} modification time #{File.mtime(path)} differs from " \
|
|
621
|
-
+ "indexed #{instance.modification_time}"
|
|
622
|
-
Log.warning err_msg
|
|
623
|
-
end
|
|
624
|
-
else
|
|
625
|
-
is_valid = false
|
|
626
|
-
err_msg = "Indexed file #{path} doesn't exist"
|
|
627
|
-
Log.warning err_msg
|
|
628
|
-
end
|
|
629
|
-
is_valid
|
|
513
|
+
def self.merge_override_b(a, b)
|
|
514
|
+
return ContentData.new(a) if b.nil?
|
|
515
|
+
return ContentData.new(b) if a.nil?
|
|
516
|
+
# Add A instances to content data B
|
|
517
|
+
a.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
|
518
|
+
b.add_instance(checksum, size, server, path, instance_mod_time)
|
|
519
|
+
}
|
|
520
|
+
b
|
|
630
521
|
end
|
|
631
522
|
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
523
|
+
# B - A : Remove contents of A from B and return the new content data.
|
|
524
|
+
# instances are ignored
|
|
525
|
+
# e.g
|
|
526
|
+
# A db:
|
|
527
|
+
# Content_1 ->
|
|
528
|
+
# Instance_1
|
|
529
|
+
# Instance_2
|
|
530
|
+
#
|
|
531
|
+
# Content_2 ->
|
|
532
|
+
# Instance_3
|
|
533
|
+
#
|
|
534
|
+
# B db:
|
|
535
|
+
# Content_1 ->
|
|
536
|
+
# Instance_1
|
|
537
|
+
# Instance_2
|
|
538
|
+
#
|
|
539
|
+
# Content_2 ->
|
|
540
|
+
# Instance_3
|
|
541
|
+
# Instance_4
|
|
542
|
+
# Content_3 ->
|
|
543
|
+
# Instance_5
|
|
544
|
+
# B-A db:
|
|
545
|
+
# Content_3 ->
|
|
546
|
+
# Instance_5
|
|
547
|
+
def self.remove(a, b)
|
|
548
|
+
return nil if b.nil?
|
|
549
|
+
return ContentData.new(b) if a.nil?
|
|
550
|
+
c = ContentData.new(b) # create new cloned content C from B
|
|
551
|
+
# remove contents of A from newly cloned content A
|
|
552
|
+
a.each_content { |checksum, size, content_mod_time|
|
|
553
|
+
c.remove_content(checksum)
|
|
554
|
+
}
|
|
555
|
+
c
|
|
646
556
|
end
|
|
647
557
|
|
|
648
|
-
#
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
558
|
+
# B - A : Remove instances of A content from B content data B and return the new content data.
|
|
559
|
+
# If all instances are removed then the content record itself will be removed
|
|
560
|
+
# e.g
|
|
561
|
+
# A db:
|
|
562
|
+
# Content_1 ->
|
|
563
|
+
# Instance_1
|
|
564
|
+
# Instance_2
|
|
565
|
+
#
|
|
566
|
+
# Content_2 ->
|
|
567
|
+
# Instance_3
|
|
568
|
+
#
|
|
569
|
+
# B db:
|
|
570
|
+
# Content_1 ->
|
|
571
|
+
# Instance_1
|
|
572
|
+
# Instance_2
|
|
573
|
+
#
|
|
574
|
+
# Content_2 ->
|
|
575
|
+
# Instance_3
|
|
576
|
+
# Instance_4
|
|
577
|
+
# B-A db:
|
|
578
|
+
# Content_2 ->
|
|
579
|
+
# Instance_4
|
|
580
|
+
def self.remove_instances(a, b)
|
|
581
|
+
return nil if b.nil?
|
|
582
|
+
return ContentData.new(b) if a.nil?
|
|
583
|
+
c = ContentData.new(b) # create new cloned content C from B
|
|
584
|
+
# remove contents of A from newly cloned content A
|
|
585
|
+
a.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
|
586
|
+
location = [server, path]
|
|
587
|
+
c.remove_instance(location, checksum)
|
|
588
|
+
}
|
|
589
|
+
c
|
|
659
590
|
end
|
|
660
591
|
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
def get_query(variable, params)
|
|
669
|
-
raise RuntimeError.new 'This method is experimental and shouldn\'t be used'
|
|
670
|
-
|
|
671
|
-
exact = params['exact'].nil? ? Array.new : params['exact']
|
|
672
|
-
from = params['from']
|
|
673
|
-
to = params ['to']
|
|
674
|
-
is_inside = params['is_inside']
|
|
675
|
-
|
|
676
|
-
unless ContentInstance.new.instance_variable_defined?("@#{attribute}")
|
|
677
|
-
raise ArgumentError "#{variable} isn't a ContentInstance variable"
|
|
678
|
-
end
|
|
679
|
-
|
|
680
|
-
if (exact.nil? && from.nil? && to.nil?)
|
|
681
|
-
raise ArgumentError 'At least one of the argiments {exact, from, to} must be defined'
|
|
682
|
-
end
|
|
683
|
-
|
|
684
|
-
if (!(from.nil? || to.nil?) && from.kind_of?(to.class))
|
|
685
|
-
raise ArgumentError 'to and from arguments should be comparable one with another'
|
|
686
|
-
end
|
|
687
|
-
|
|
688
|
-
# FIXME add support for from/to for Strings
|
|
689
|
-
if ((!from.nil? && !from.kind_of?(Numeric.new.class))\
|
|
690
|
-
|| (!to.nil? && to.kind_of?(Numeric.new.class)))
|
|
691
|
-
raise ArgumentError 'from and to options supported only for numeric values'
|
|
692
|
-
end
|
|
693
|
-
|
|
694
|
-
if (!exact.empty? && (!from.nil? || !to.nil?))
|
|
695
|
-
raise ArgumentError 'exact and from/to options are mutually exclusive'
|
|
696
|
-
end
|
|
697
|
-
|
|
698
|
-
result_index = ContentData.new
|
|
699
|
-
instances.each_value do |instance|
|
|
700
|
-
is_match = false
|
|
701
|
-
var_value = instance.instance_variable_get("@#{variable}")
|
|
702
|
-
|
|
703
|
-
if exact.include? var_value
|
|
704
|
-
is_match = true
|
|
705
|
-
elsif (from.nil? || var_value > from) && (to.nil? || var_value < to)
|
|
706
|
-
is_match = true
|
|
707
|
-
end
|
|
708
|
-
|
|
709
|
-
if (is_match && is_inside) || (!is_match && !is_inside)
|
|
710
|
-
checksum = instance.checksum
|
|
711
|
-
result_index.add_content(contents[checksum]) unless result_index.content_exists(checksum)
|
|
712
|
-
result_index.add_instance instance
|
|
592
|
+
def self.remove_directory(content_data, dir_to_remove, server_to_remove)
|
|
593
|
+
return nil if content_data.nil?
|
|
594
|
+
result_content_data = ContentData.new()
|
|
595
|
+
content_data.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
|
596
|
+
# Keep instance if path is not of server to remove or path does not include dir to remove
|
|
597
|
+
if (server_to_remove!=server) or (path.scan(dir_to_remove).size == 0)
|
|
598
|
+
result_content_data.add_instance(checksum.clone, size, server, path.clone, instance_mod_time)
|
|
713
599
|
end
|
|
714
|
-
|
|
715
|
-
|
|
600
|
+
}
|
|
601
|
+
result_content_data
|
|
716
602
|
end
|
|
717
603
|
|
|
718
|
-
|
|
604
|
+
# returns the common content in both a and b
|
|
605
|
+
def self.intersect(a, b)
|
|
606
|
+
return nil if a.nil?
|
|
607
|
+
return nil if b.nil?
|
|
608
|
+
b_minus_a = remove(a, b)
|
|
609
|
+
b_minus_b_minus_a = remove(b_minus_a, b)
|
|
610
|
+
end
|
|
719
611
|
end
|
|
720
612
|
|