content_server 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/bin/file_utils +118 -0
- data/lib/content_data/content_data.rb +114 -48
- data/lib/content_server/version.rb +1 -1
- data/lib/file_monitoring/file_monitoring.rb +94 -50
- data/lib/file_monitoring/monitor_path.rb +196 -113
- data/lib/file_utils/file_utils.rb +10 -49
- data/lib/networking/tcp.rb +4 -4
- data/spec/content_data/content_data_spec.rb +331 -0
- data/spec/content_data/validations_spec.rb +5 -0
- data/spec/content_server/content_server_spec.rb +5 -0
- data/spec/content_server/file_streamer_spec.rb +5 -0
- data/spec/file_copy/copy_spec.rb +5 -0
- data/spec/file_indexing/index_agent_spec.rb +5 -0
- data/spec/networking/tcp_spec.rb +5 -0
- data/spec/validations/index_validations_spec.rb +5 -0
- metadata +9 -89
- data/test/content_data/content_data_test.rb +0 -291
- data/test/file_generator/file_generator_spec.rb +0 -85
- data/test/file_monitoring/monitor_path_test.rb +0 -189
- data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000 +0 -1000
- data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000.0 +0 -1000
- data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000.1 +0 -1000
- data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500 +0 -1500
- data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500.0 +0 -1500
- data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500.1 +0 -1500
- data/test/file_monitoring/monitor_path_test/test_file.500 +0 -500
- data/test/file_monitoring/monitor_path_test/test_file.500.0 +0 -500
- data/test/file_monitoring/monitor_path_test/test_file.500.1 +0 -500
- data/test/file_utils/fileutil_mksymlink_test.rb +0 -134
- data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500 +0 -1500
- data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500.0 +0 -1500
- data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500.1 +0 -1500
- data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000 +0 -1000
- data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000.0 +0 -1000
- data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000.1 +0 -1000
- data/test/file_utils/fileutil_mksymlink_test/test_file.500 +0 -500
- data/test/file_utils/fileutil_mksymlink_test/test_file.500.0 +0 -500
- data/test/file_utils/fileutil_mksymlink_test/test_file.500.1 +0 -500
- data/test/file_utils/time_modification_test.rb +0 -136
- data/test/params/params_spec.rb +0 -280
- data/test/params/params_test.rb +0 -43
- data/test/run_in_background/run_in_background_test.rb +0 -122
- data/test/run_in_background/test_app +0 -59
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ODk2ZWZlZGIwODU5Y2I1YTg3YzIyZWZmNDQyOGRiOTJhMWMwODJiZg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NWQxY2E1NGE2ZmQ3YzRlYzFjN2QwNTRlMWMxYzFjMmZhODlhNDVlNA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MmViNDAxZDU0NTk4ZDY0YzNiYTkxZWVkZjAxYzMyNjZhM2Y0ZDE2MmE3NDNl
|
10
|
+
YTE3MTJlZGU0NzBjZjJjYzNmM2Q5YWM1NzEyNzgxZDM4MmRmZDIyOWUxMGY3
|
11
|
+
MjU4YjMxMDFlOThkNDM1M2ZiZTJhN2RjNzFkZmZhYWZkZWE4MTM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YmQxOWYzZGFhZDY5YjNmOTBkN2VhMDBiOTQwZTdjMmQxNzhmZDgxMzE3YzVl
|
14
|
+
OTg4NDRmYmE5ZDU3ZWJhMGU2YzNiYTQ2Y2M4OTEyMGQzYzNmOWJkYzc0YmMw
|
15
|
+
OTJhNmQ3YTY3MDIzZDI5NmYzOThhNjA0MjQ4MTI0ZGY5OGI1Nzc=
|
data/bin/file_utils
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# This utility executable is used to easily execute BBFS operations on files.
|
4
|
+
|
5
|
+
require 'content_data'
|
6
|
+
require 'log'
|
7
|
+
require 'params'
|
8
|
+
|
9
|
+
#############################################
|
10
|
+
# Definitions
|
11
|
+
#############################################
|
12
|
+
Params.string 'command', nil ,'supported commands are: merge, intersect, minus'
|
13
|
+
Params.string 'dest', nil ,'destination path'
|
14
|
+
Params.string 'cd_a', nil ,'a path'
|
15
|
+
Params.string 'cd_b', nil ,'b path'
|
16
|
+
|
17
|
+
HELP_MSG = <<EOF
|
18
|
+
This is an utility functionality for BBFS,
|
19
|
+
such as algebra of sets operations on ContentData files.
|
20
|
+
Usage:
|
21
|
+
To get merge of two ContentData files,
|
22
|
+
i.e. contents and appropreate instances that exist in at least one ContentData file:
|
23
|
+
file_utils --command=merge --cd_a=<content_data_file1> --cd_b=<content_data_file_2> [--dest=<result_content_data_file>]
|
24
|
+
To get intersection of two ContentData files,
|
25
|
+
i.e. contents and appropreate instances that exist in the both ContentData files:
|
26
|
+
file_utils --command=intersect --cd_a=<content_data_file1> --cd_b=<content_data_file_2> [--dest=<result_content_data_file>]
|
27
|
+
To get content_data_2 minus content_data_1,
|
28
|
+
i.e. contents and appropreate instances that exist in content_data_file2 and absent in content_data_file1:
|
29
|
+
file_utils --command=minus --cd_a=<content_data_file1> --cd_b=<content_data_file_2> [--dest=<result_content_data_file>]
|
30
|
+
EOF
|
31
|
+
#############################################
|
32
|
+
# Init
|
33
|
+
#############################################
|
34
|
+
Params.init ARGV
|
35
|
+
Log.init
|
36
|
+
|
37
|
+
#############################################
|
38
|
+
# Methods
|
39
|
+
#############################################
|
40
|
+
|
41
|
+
# Algebra of sets operations (merge, intersect, minus) on ContentData files
|
42
|
+
def content_data_command()
|
43
|
+
|
44
|
+
['cd_a', 'cd_b'].each do |param|
|
45
|
+
if Params[param].nil?
|
46
|
+
err_msg = "--#{param} is not set"
|
47
|
+
puts err_msg
|
48
|
+
Log.error(err_msg)
|
49
|
+
return
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
cd_a = ContentData::ContentData.new()
|
54
|
+
begin
|
55
|
+
cd_a.from_file(Params['cd_a'])
|
56
|
+
rescue Exception => e
|
57
|
+
err_msg = "Error loading content data cd_a = %s: %s" % [Params['cd_a'], e.message]
|
58
|
+
puts err_msg
|
59
|
+
Log.error(err_msg)
|
60
|
+
Log.flush
|
61
|
+
return
|
62
|
+
end
|
63
|
+
|
64
|
+
cd_b = ContentData::ContentData.new()
|
65
|
+
begin
|
66
|
+
cd_b.from_file(Params['cd_b'])
|
67
|
+
rescue Exception => e
|
68
|
+
err_msg = "Error loading content data cd_b = %s: %s" % [Params['cd_b'], e.message]
|
69
|
+
puts err_msg
|
70
|
+
Log.error(err_msg)
|
71
|
+
Log.flush
|
72
|
+
return
|
73
|
+
end
|
74
|
+
|
75
|
+
if Params['command'] == "merge"
|
76
|
+
res = ContentData.merge(cd_a, cd_b)
|
77
|
+
preface = '# ' + "Merge of #{Params['cd_a']} and #{Params['cd_b']}"
|
78
|
+
elsif Params['command'] == "intersect"
|
79
|
+
res = ContentData.intersect(cd_a, cd_b)
|
80
|
+
preface = '# ' + "Intersection of #{Params['cd_a']} and #{Params['cd_b']}"
|
81
|
+
elsif Params['command'] == "minus"
|
82
|
+
res = ContentData.remove(cd_a, cd_b) # cd_b - cd_a
|
83
|
+
preface = '# ' + "Contents and appropreate instances that exist in #{Params['cd_b']}" +
|
84
|
+
" and absent in #{Params['cd_a']}"
|
85
|
+
end
|
86
|
+
|
87
|
+
unless res.nil?
|
88
|
+
puts preface
|
89
|
+
puts res.to_s
|
90
|
+
unless Params['dest'].nil?
|
91
|
+
res.to_file Params['dest']
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Main method
|
97
|
+
def run
|
98
|
+
if ARGV.empty?
|
99
|
+
puts HELP_MSG
|
100
|
+
exit
|
101
|
+
end
|
102
|
+
|
103
|
+
if (Params['command'] == "merge" ||
|
104
|
+
Params['command'] == "intersect" ||
|
105
|
+
Params['command'] == "minus")
|
106
|
+
content_data_command
|
107
|
+
else
|
108
|
+
err_msg = "Unsupported command: #{Params['command']}"
|
109
|
+
puts err_msg
|
110
|
+
puts HELP_MSG
|
111
|
+
Log.error(err_msg)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
#############################################
|
116
|
+
# Main
|
117
|
+
#############################################
|
118
|
+
run
|
@@ -35,16 +35,18 @@ module ContentData
|
|
35
35
|
if other.nil?
|
36
36
|
@contents_info = {} # Checksum --> [size, paths-->time(instance), time(content)]
|
37
37
|
@instances_info = {} # location --> checksum to optimize instances query
|
38
|
+
@symlinks_info = {} # [server,symlink path] -> target
|
38
39
|
else
|
39
40
|
@contents_info = other.clone_contents_info
|
40
41
|
@instances_info = other.clone_instances_info # location --> checksum to optimize instances query
|
42
|
+
@symlinks_info = other.clone_symlinks_info
|
41
43
|
end
|
42
44
|
end
|
43
45
|
|
44
46
|
# Content Data unique identification
|
45
47
|
# @return [ID] hash identification
|
46
48
|
def unique_id
|
47
|
-
@
|
49
|
+
[@contents_info.hash,@symlinks_info.hash]
|
48
50
|
end
|
49
51
|
|
50
52
|
def clone_instances_info
|
@@ -83,6 +85,16 @@ module ContentData
|
|
83
85
|
clone_contents_info
|
84
86
|
end
|
85
87
|
|
88
|
+
def clone_symlinks_info
|
89
|
+
symlinks_info_enum = @symlinks_info.each_key
|
90
|
+
cloned_symlinks = {}
|
91
|
+
loop {
|
92
|
+
symlink_key = symlinks_info_enum.next rescue break
|
93
|
+
cloned_symlinks[[symlink_key[0].clone, symlink_key[0].clone]] = @symlinks_info[symlink_key].clone
|
94
|
+
}
|
95
|
+
cloned_symlinks
|
96
|
+
end
|
97
|
+
|
86
98
|
# iterator over @contents_info data structure (not including instances)
|
87
99
|
# block is provided with: checksum, size and content modification time
|
88
100
|
def each_content(&block)
|
@@ -131,6 +143,17 @@ module ContentData
|
|
131
143
|
}
|
132
144
|
end
|
133
145
|
|
146
|
+
# iterator over @symlinks_info data structure
|
147
|
+
# block is provided with: server, file path and target
|
148
|
+
def each_symlink(&block)
|
149
|
+
symlink_enum = @symlinks_info.each_key
|
150
|
+
loop {
|
151
|
+
symlink_key = symlink_enum.next rescue break
|
152
|
+
symlink_target = @symlinks_info[symlink_key]
|
153
|
+
block.call(symlink_key[0], symlink_key[1], symlink_target)
|
154
|
+
}
|
155
|
+
end
|
156
|
+
|
134
157
|
def contents_size()
|
135
158
|
@contents_info.length
|
136
159
|
end
|
@@ -139,6 +162,10 @@ module ContentData
|
|
139
162
|
@instances_info.length
|
140
163
|
end
|
141
164
|
|
165
|
+
def symlinks_size()
|
166
|
+
@symlinks_info.length
|
167
|
+
end
|
168
|
+
|
142
169
|
def checksum_instances_size(checksum)
|
143
170
|
content_info = @contents_info[checksum]
|
144
171
|
return 0 if content_info.nil?
|
@@ -181,8 +208,16 @@ module ContentData
|
|
181
208
|
@instances_info[location] = checksum
|
182
209
|
end
|
183
210
|
|
211
|
+
def add_symlink(server, path, target)
|
212
|
+
@symlinks_info[[server,path]] = target
|
213
|
+
end
|
214
|
+
|
215
|
+
def remove_symlink(server, path)
|
216
|
+
@symlinks_info.delete([server,path])
|
217
|
+
end
|
218
|
+
|
184
219
|
def empty?
|
185
|
-
@contents_info.empty?
|
220
|
+
@contents_info.empty? and @symlinks_info.empty?
|
186
221
|
end
|
187
222
|
|
188
223
|
def content_exists(checksum)
|
@@ -193,6 +228,11 @@ module ContentData
|
|
193
228
|
@instances_info.has_key?([server, path])
|
194
229
|
end
|
195
230
|
|
231
|
+
def symlink_exists(path, server)
|
232
|
+
@symlinks_info.has_key?([server, path])
|
233
|
+
end
|
234
|
+
|
235
|
+
|
196
236
|
# removes an instance record both in @instances_info and @instances_info.
|
197
237
|
# input params: server & path - are the instance unique key (called location)
|
198
238
|
# removes also the content, if content becomes empty after removing the instance
|
@@ -208,7 +248,7 @@ module ContentData
|
|
208
248
|
end
|
209
249
|
|
210
250
|
# removes all instances records which are located under input param: dir_to_remove.
|
211
|
-
# found records are removed from
|
251
|
+
# found records are removed from @contents_info , @instances_info and @symlinks_info
|
212
252
|
# input params: server & dir_to_remove - are used to check each instance unique key (called location)
|
213
253
|
# removes also content\s, if a content\s become\s empty after removing instance\s
|
214
254
|
def remove_directory(dir_to_remove, server)
|
@@ -216,7 +256,9 @@ module ContentData
|
|
216
256
|
loop {
|
217
257
|
checksum = contents_enum.next rescue break
|
218
258
|
instances = @contents_info[checksum][1]
|
219
|
-
instances.each_key
|
259
|
+
instances_enum = instances.each_key
|
260
|
+
loop {
|
261
|
+
location = instances_enum.next rescue break
|
220
262
|
if location[0] == server and location[1].scan(dir_to_remove).size > 0
|
221
263
|
instances.delete(location)
|
222
264
|
@instances_info.delete(location)
|
@@ -224,27 +266,20 @@ module ContentData
|
|
224
266
|
}
|
225
267
|
@contents_info.delete(checksum) if instances.empty?
|
226
268
|
}
|
227
|
-
end
|
228
269
|
|
270
|
+
# handle symlinks
|
271
|
+
symlinks_enum = @symlinks_info.each_key
|
272
|
+
loop {
|
273
|
+
symlink_key = symlinks_enum.next rescue break
|
274
|
+
if symlink_key[0] == server and symlink_key[1].scan(dir_to_remove).size > 0
|
275
|
+
@symlinks_info.delete(symlink_key)
|
276
|
+
end
|
277
|
+
}
|
278
|
+
end
|
229
279
|
|
230
280
|
def ==(other)
|
231
|
-
return
|
232
|
-
|
233
|
-
other.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
234
|
-
return false if instance_exists(path, server) != other.instance_exists(path, server)
|
235
|
-
local_content_info = @contents_info[checksum]
|
236
|
-
return false if local_content_info.nil?
|
237
|
-
return false if local_content_info[0] != size
|
238
|
-
return false if local_content_info[2] != content_mod_time
|
239
|
-
#check instances
|
240
|
-
local_instances = local_content_info[1]
|
241
|
-
return false if other.checksum_instances_size(checksum) != local_instances.length
|
242
|
-
location = [server, path]
|
243
|
-
local_instance_mod_time, _ = local_instances[location]
|
244
|
-
return false if local_instance_mod_time.nil?
|
245
|
-
return false if local_instance_mod_time != instance_mod_time
|
246
|
-
}
|
247
|
-
true
|
281
|
+
return nil if other.nil? # for this case: content_data == nil
|
282
|
+
unique_id == other.unique_id
|
248
283
|
end
|
249
284
|
|
250
285
|
def remove_content(checksum)
|
@@ -282,6 +317,7 @@ module ContentData
|
|
282
317
|
content_data_dir = File.dirname(filename)
|
283
318
|
FileUtils.makedirs(content_data_dir) unless File.directory?(content_data_dir)
|
284
319
|
File.open(filename, 'w') { |file|
|
320
|
+
# Write contents
|
285
321
|
file.write("#{@contents_info.length}\n")
|
286
322
|
contents_enum = @contents_info.each_key
|
287
323
|
content_chunks = @contents_info.length / CHUNK_SIZE + 1
|
@@ -291,6 +327,8 @@ module ContentData
|
|
291
327
|
GC.start
|
292
328
|
chunks_counter += 1
|
293
329
|
end
|
330
|
+
|
331
|
+
# Write instances
|
294
332
|
file.write("#{@instances_info.length}\n")
|
295
333
|
contents_enum = @contents_info.each_key
|
296
334
|
chunks_counter = 0
|
@@ -299,6 +337,14 @@ module ContentData
|
|
299
337
|
GC.start
|
300
338
|
chunks_counter += 1
|
301
339
|
end
|
340
|
+
|
341
|
+
# Write symlinks
|
342
|
+
symlinks_info_enum = @symlinks_info.each_key
|
343
|
+
file.write("#{@symlinks_info.length}\n")
|
344
|
+
loop {
|
345
|
+
symlink_key = symlinks_info_enum.next rescue break
|
346
|
+
file.write("#{symlink_key[0]}<#{symlink_key[1]}<#{@symlinks_info[symlink_key]}\n")
|
347
|
+
}
|
302
348
|
}
|
303
349
|
end
|
304
350
|
|
@@ -307,7 +353,7 @@ module ContentData
|
|
307
353
|
while chunk_counter < chunk_size
|
308
354
|
checksum = contents_enum.next rescue return
|
309
355
|
content_info = @contents_info[checksum]
|
310
|
-
file.write("#{checksum}
|
356
|
+
file.write("#{checksum}<#{content_info[0]}<#{content_info[2]}\n")
|
311
357
|
chunk_counter += 1
|
312
358
|
end
|
313
359
|
end
|
@@ -323,8 +369,8 @@ module ContentData
|
|
323
369
|
# provide the block with: checksum, size, content modification time,instance modification time,
|
324
370
|
# server and path.
|
325
371
|
instance_modification_time,instance_index_time = content_info[1][location]
|
326
|
-
file.write("#{checksum}
|
327
|
-
"#{instance_modification_time}
|
372
|
+
file.write("#{checksum}<#{content_info[0]}<#{location[0]}<#{location[1]}<" +
|
373
|
+
"#{instance_modification_time}<#{instance_index_time}\n")
|
328
374
|
}
|
329
375
|
chunk_counter += 1
|
330
376
|
break if chunk_counter == chunk_size
|
@@ -332,6 +378,7 @@ module ContentData
|
|
332
378
|
end
|
333
379
|
|
334
380
|
# TODO validation that file indeed contains ContentData missing
|
381
|
+
# TODO class level method?
|
335
382
|
# Loading db from file using chunks for better memory performance
|
336
383
|
def from_file(filename)
|
337
384
|
# read first line (number of contents)
|
@@ -339,11 +386,16 @@ module ContentData
|
|
339
386
|
# read number of instances.
|
340
387
|
# loop over instances lines (using chunks) and add instances
|
341
388
|
|
389
|
+
unless File.exists? filename
|
390
|
+
raise ArgumentError.new "No such a file #{filename}"
|
391
|
+
end
|
392
|
+
|
342
393
|
File.open(filename, 'r') { |file|
|
343
394
|
# Get number of contents (at first line)
|
344
395
|
number_of_contents = file.gets # this gets the next line or return nil at EOF
|
345
396
|
unless (number_of_contents and number_of_contents.match(/^[\d]+$/)) # check that line is of Number format
|
346
|
-
|
397
|
+
raise("Parse error of content data file:#{filename} line ##{$.}\n" +
|
398
|
+
"number of contents should be a number. We got:#{number_of_contents}")
|
347
399
|
end
|
348
400
|
number_of_contents = number_of_contents.to_i
|
349
401
|
# advance file lines over all contents. We need only the instances data to build the content data object
|
@@ -365,7 +417,8 @@ module ContentData
|
|
365
417
|
# get number of instances
|
366
418
|
number_of_instances = file.gets
|
367
419
|
unless (number_of_instances and number_of_instances.match(/^[\d]+$/)) # check that line is of Number format
|
368
|
-
|
420
|
+
raise("Parse error of content data file:#{filename} line ##{$.}\n" +
|
421
|
+
"number of instances should be a Number. We got:#{number_of_instances}")
|
369
422
|
end
|
370
423
|
number_of_instances = number_of_instances.to_i
|
371
424
|
# read in instances chunks and GC
|
@@ -382,35 +435,56 @@ module ContentData
|
|
382
435
|
GC.start
|
383
436
|
chunk_index += 1
|
384
437
|
end
|
438
|
+
|
439
|
+
# get number of symlinks
|
440
|
+
number_of_symlinks = file.gets
|
441
|
+
unless (number_of_symlinks and number_of_symlinks.match(/^[\d]+$/)) # check that line is of Number format
|
442
|
+
raise("Parse error of content data file:#{filename} line ##{$.}\n" +
|
443
|
+
"number of symlinks should be a Number. We got:#{number_of_symlinks}")
|
444
|
+
end
|
445
|
+
number_of_symlinks.to_i.times {
|
446
|
+
symlinks_line = file.gets
|
447
|
+
unless symlinks_line
|
448
|
+
raise("Parse error of content data file:#{filename} line ##{$.}\n" +
|
449
|
+
"Expected to read symlink line but reached EOF")
|
450
|
+
end
|
451
|
+
parameters = symlinks_line.split('<')
|
452
|
+
if (3 != parameters.length)
|
453
|
+
raise("Parse error of content data file:#{filename} line ##{$.}\n" +
|
454
|
+
"Expected to read 3 fields ('<' separated) but got #{parameters.length}.\nLine:#{symlinks_line}")
|
455
|
+
end
|
456
|
+
|
457
|
+
@symlinks_info[[parameters[0],parameters[1]]] = parameters[2]
|
458
|
+
}
|
385
459
|
}
|
386
460
|
end
|
387
461
|
|
388
462
|
def read_contents_chunk(filename, file, chunk_size)
|
389
463
|
chunk_index = 0
|
390
464
|
while chunk_index < chunk_size
|
391
|
-
|
392
|
-
|
465
|
+
unless file.gets
|
466
|
+
raise("Parse error of content data file:#{filename} line ##{$.}\n" +
|
467
|
+
"Expecting content line but reached end of file")
|
468
|
+
end
|
393
469
|
chunk_index += 1
|
394
470
|
end
|
395
471
|
true
|
396
472
|
end
|
397
473
|
|
398
|
-
def
|
474
|
+
def read_instances_chunk(filename, file, chunk_size)
|
399
475
|
chunk_index = 0
|
400
476
|
while chunk_index < chunk_size
|
401
477
|
instance_line = file.gets
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
if (parameters.size > 6)
|
406
|
-
(4..parameters.size-3).each do |i|
|
407
|
-
parameters[3] = [parameters[3], parameters[i]].join(",")
|
408
|
-
end
|
409
|
-
(4..parameters.size-3).each do |i|
|
410
|
-
parameters.delete_at(4)
|
411
|
-
end
|
478
|
+
unless instance_line
|
479
|
+
raise("Parse error of content data file:#{filename} line ##{$.}\n" +
|
480
|
+
"Expected to read Instance line but reached EOF")
|
412
481
|
end
|
413
482
|
|
483
|
+
parameters = instance_line.split('<')
|
484
|
+
if (6 != parameters.length)
|
485
|
+
raise("Parse error of content data file:#{filename} line ##{$.}\n" +
|
486
|
+
"Expected to read 6 fields ('<' separated) but got #{parameters.length}.\nLine:#{instance_line}")
|
487
|
+
end
|
414
488
|
add_instance(parameters[0], #checksum
|
415
489
|
parameters[1].to_i, # size
|
416
490
|
parameters[2], # server
|
@@ -422,14 +496,6 @@ module ContentData
|
|
422
496
|
true
|
423
497
|
end
|
424
498
|
|
425
|
-
def reset_load_from_file(file_name, file_io, err_msg)
|
426
|
-
Log.error("unexpected error reading file:#{file_name}\nError message:#{err_msg}")
|
427
|
-
@contents_info = {} # Checksum --> [size, paths-->time(instance), time(content)]
|
428
|
-
@instances_info = {} # location --> checksum to optimize instances query
|
429
|
-
file_io.close
|
430
|
-
nil
|
431
|
-
end
|
432
|
-
|
433
499
|
# for each content, all time fields (content and instances) are replaced with the
|
434
500
|
# min time found, while going through all time fields.
|
435
501
|
def unify_time()
|