content_server 1.5.0 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +15 -0
  2. data/bin/file_utils +118 -0
  3. data/lib/content_data/content_data.rb +114 -48
  4. data/lib/content_server/version.rb +1 -1
  5. data/lib/file_monitoring/file_monitoring.rb +94 -50
  6. data/lib/file_monitoring/monitor_path.rb +196 -113
  7. data/lib/file_utils/file_utils.rb +10 -49
  8. data/lib/networking/tcp.rb +4 -4
  9. data/spec/content_data/content_data_spec.rb +331 -0
  10. data/spec/content_data/validations_spec.rb +5 -0
  11. data/spec/content_server/content_server_spec.rb +5 -0
  12. data/spec/content_server/file_streamer_spec.rb +5 -0
  13. data/spec/file_copy/copy_spec.rb +5 -0
  14. data/spec/file_indexing/index_agent_spec.rb +5 -0
  15. data/spec/networking/tcp_spec.rb +5 -0
  16. data/spec/validations/index_validations_spec.rb +5 -0
  17. metadata +9 -89
  18. data/test/content_data/content_data_test.rb +0 -291
  19. data/test/file_generator/file_generator_spec.rb +0 -85
  20. data/test/file_monitoring/monitor_path_test.rb +0 -189
  21. data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000 +0 -1000
  22. data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000.0 +0 -1000
  23. data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000.1 +0 -1000
  24. data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500 +0 -1500
  25. data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500.0 +0 -1500
  26. data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500.1 +0 -1500
  27. data/test/file_monitoring/monitor_path_test/test_file.500 +0 -500
  28. data/test/file_monitoring/monitor_path_test/test_file.500.0 +0 -500
  29. data/test/file_monitoring/monitor_path_test/test_file.500.1 +0 -500
  30. data/test/file_utils/fileutil_mksymlink_test.rb +0 -134
  31. data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500 +0 -1500
  32. data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500.0 +0 -1500
  33. data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500.1 +0 -1500
  34. data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000 +0 -1000
  35. data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000.0 +0 -1000
  36. data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000.1 +0 -1000
  37. data/test/file_utils/fileutil_mksymlink_test/test_file.500 +0 -500
  38. data/test/file_utils/fileutil_mksymlink_test/test_file.500.0 +0 -500
  39. data/test/file_utils/fileutil_mksymlink_test/test_file.500.1 +0 -500
  40. data/test/file_utils/time_modification_test.rb +0 -136
  41. data/test/params/params_spec.rb +0 -280
  42. data/test/params/params_test.rb +0 -43
  43. data/test/run_in_background/run_in_background_test.rb +0 -122
  44. data/test/run_in_background/test_app +0 -59
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ODk2ZWZlZGIwODU5Y2I1YTg3YzIyZWZmNDQyOGRiOTJhMWMwODJiZg==
5
+ data.tar.gz: !binary |-
6
+ NWQxY2E1NGE2ZmQ3YzRlYzFjN2QwNTRlMWMxYzFjMmZhODlhNDVlNA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MmViNDAxZDU0NTk4ZDY0YzNiYTkxZWVkZjAxYzMyNjZhM2Y0ZDE2MmE3NDNl
10
+ YTE3MTJlZGU0NzBjZjJjYzNmM2Q5YWM1NzEyNzgxZDM4MmRmZDIyOWUxMGY3
11
+ MjU4YjMxMDFlOThkNDM1M2ZiZTJhN2RjNzFkZmZhYWZkZWE4MTM=
12
+ data.tar.gz: !binary |-
13
+ YmQxOWYzZGFhZDY5YjNmOTBkN2VhMDBiOTQwZTdjMmQxNzhmZDgxMzE3YzVl
14
+ OTg4NDRmYmE5ZDU3ZWJhMGU2YzNiYTQ2Y2M4OTEyMGQzYzNmOWJkYzc0YmMw
15
+ OTJhNmQ3YTY3MDIzZDI5NmYzOThhNjA0MjQ4MTI0ZGY5OGI1Nzc=
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This utility executable is used to easily execute BBFS operations on files.
4
+
5
+ require 'content_data'
6
+ require 'log'
7
+ require 'params'
8
+
9
+ #############################################
10
+ # Definitions
11
+ #############################################
12
+ Params.string 'command', nil ,'supported commands are: merge, intersect, minus'
13
+ Params.string 'dest', nil ,'destination path'
14
+ Params.string 'cd_a', nil ,'a path'
15
+ Params.string 'cd_b', nil ,'b path'
16
+
17
+ HELP_MSG = <<EOF
18
+ This is an utility functionality for BBFS,
19
+ such as algebra of sets operations on ContentData files.
20
+ Usage:
21
+ To get merge of two ContentData files,
22
+ i.e. contents and appropreate instances that exist in at least one ContentData file:
23
+ file_utils --command=merge --cd_a=<content_data_file1> --cd_b=<content_data_file_2> [--dest=<result_content_data_file>]
24
+ To get intersection of two ContentData files,
25
+ i.e. contents and appropreate instances that exist in the both ContentData files:
26
+ file_utils --command=intersect --cd_a=<content_data_file1> --cd_b=<content_data_file_2> [--dest=<result_content_data_file>]
27
+ To get content_data_2 minus content_data_1,
28
+ i.e. contents and appropreate instances that exist in content_data_file2 and absent in content_data_file1:
29
+ file_utils --command=minus --cd_a=<content_data_file1> --cd_b=<content_data_file_2> [--dest=<result_content_data_file>]
30
+ EOF
31
+ #############################################
32
+ # Init
33
+ #############################################
34
+ Params.init ARGV
35
+ Log.init
36
+
37
+ #############################################
38
+ # Methods
39
+ #############################################
40
+
41
+ # Algebra of sets operations (merge, intersect, minus) on ContentData files
42
+ def content_data_command()
43
+
44
+ ['cd_a', 'cd_b'].each do |param|
45
+ if Params[param].nil?
46
+ err_msg = "--#{param} is not set"
47
+ puts err_msg
48
+ Log.error(err_msg)
49
+ return
50
+ end
51
+ end
52
+
53
+ cd_a = ContentData::ContentData.new()
54
+ begin
55
+ cd_a.from_file(Params['cd_a'])
56
+ rescue Exception => e
57
+ err_msg = "Error loading content data cd_a = %s: %s" % [Params['cd_a'], e.message]
58
+ puts err_msg
59
+ Log.error(err_msg)
60
+ Log.flush
61
+ return
62
+ end
63
+
64
+ cd_b = ContentData::ContentData.new()
65
+ begin
66
+ cd_b.from_file(Params['cd_b'])
67
+ rescue Exception => e
68
+ err_msg = "Error loading content data cd_b = %s: %s" % [Params['cd_b'], e.message]
69
+ puts err_msg
70
+ Log.error(err_msg)
71
+ Log.flush
72
+ return
73
+ end
74
+
75
+ if Params['command'] == "merge"
76
+ res = ContentData.merge(cd_a, cd_b)
77
+ preface = '# ' + "Merge of #{Params['cd_a']} and #{Params['cd_b']}"
78
+ elsif Params['command'] == "intersect"
79
+ res = ContentData.intersect(cd_a, cd_b)
80
+ preface = '# ' + "Intersection of #{Params['cd_a']} and #{Params['cd_b']}"
81
+ elsif Params['command'] == "minus"
82
+ res = ContentData.remove(cd_a, cd_b) # cd_b - cd_a
83
+ preface = '# ' + "Contents and appropreate instances that exist in #{Params['cd_b']}" +
84
+ " and absent in #{Params['cd_a']}"
85
+ end
86
+
87
+ unless res.nil?
88
+ puts preface
89
+ puts res.to_s
90
+ unless Params['dest'].nil?
91
+ res.to_file Params['dest']
92
+ end
93
+ end
94
+ end
95
+
96
+ # Main method
97
+ def run
98
+ if ARGV.empty?
99
+ puts HELP_MSG
100
+ exit
101
+ end
102
+
103
+ if (Params['command'] == "merge" ||
104
+ Params['command'] == "intersect" ||
105
+ Params['command'] == "minus")
106
+ content_data_command
107
+ else
108
+ err_msg = "Unsupported command: #{Params['command']}"
109
+ puts err_msg
110
+ puts HELP_MSG
111
+ Log.error(err_msg)
112
+ end
113
+ end
114
+
115
+ #############################################
116
+ # Main
117
+ #############################################
118
+ run
@@ -35,16 +35,18 @@ module ContentData
35
35
  if other.nil?
36
36
  @contents_info = {} # Checksum --> [size, paths-->time(instance), time(content)]
37
37
  @instances_info = {} # location --> checksum to optimize instances query
38
+ @symlinks_info = {} # [server,symlink path] -> target
38
39
  else
39
40
  @contents_info = other.clone_contents_info
40
41
  @instances_info = other.clone_instances_info # location --> checksum to optimize instances query
42
+ @symlinks_info = other.clone_symlinks_info
41
43
  end
42
44
  end
43
45
 
44
46
  # Content Data unique identification
45
47
  # @return [ID] hash identification
46
48
  def unique_id
47
- @instances_info.hash
49
+ [@contents_info.hash,@symlinks_info.hash]
48
50
  end
49
51
 
50
52
  def clone_instances_info
@@ -83,6 +85,16 @@ module ContentData
83
85
  clone_contents_info
84
86
  end
85
87
 
88
+ def clone_symlinks_info
89
+ symlinks_info_enum = @symlinks_info.each_key
90
+ cloned_symlinks = {}
91
+ loop {
92
+ symlink_key = symlinks_info_enum.next rescue break
93
+ cloned_symlinks[[symlink_key[0].clone, symlink_key[0].clone]] = @symlinks_info[symlink_key].clone
94
+ }
95
+ cloned_symlinks
96
+ end
97
+
86
98
  # iterator over @contents_info data structure (not including instances)
87
99
  # block is provided with: checksum, size and content modification time
88
100
  def each_content(&block)
@@ -131,6 +143,17 @@ module ContentData
131
143
  }
132
144
  end
133
145
 
146
+ # iterator over @symlinks_info data structure
147
+ # block is provided with: server, file path and target
148
+ def each_symlink(&block)
149
+ symlink_enum = @symlinks_info.each_key
150
+ loop {
151
+ symlink_key = symlink_enum.next rescue break
152
+ symlink_target = @symlinks_info[symlink_key]
153
+ block.call(symlink_key[0], symlink_key[1], symlink_target)
154
+ }
155
+ end
156
+
134
157
  def contents_size()
135
158
  @contents_info.length
136
159
  end
@@ -139,6 +162,10 @@ module ContentData
139
162
  @instances_info.length
140
163
  end
141
164
 
165
+ def symlinks_size()
166
+ @symlinks_info.length
167
+ end
168
+
142
169
  def checksum_instances_size(checksum)
143
170
  content_info = @contents_info[checksum]
144
171
  return 0 if content_info.nil?
@@ -181,8 +208,16 @@ module ContentData
181
208
  @instances_info[location] = checksum
182
209
  end
183
210
 
211
+ def add_symlink(server, path, target)
212
+ @symlinks_info[[server,path]] = target
213
+ end
214
+
215
+ def remove_symlink(server, path)
216
+ @symlinks_info.delete([server,path])
217
+ end
218
+
184
219
  def empty?
185
- @contents_info.empty?
220
+ @contents_info.empty? and @symlinks_info.empty?
186
221
  end
187
222
 
188
223
  def content_exists(checksum)
@@ -193,6 +228,11 @@ module ContentData
193
228
  @instances_info.has_key?([server, path])
194
229
  end
195
230
 
231
+ def symlink_exists(path, server)
232
+ @symlinks_info.has_key?([server, path])
233
+ end
234
+
235
+
196
236
  # removes an instance record both in @instances_info and @instances_info.
197
237
  # input params: server & path - are the instance unique key (called location)
198
238
  # removes also the content, if content becomes empty after removing the instance
@@ -208,7 +248,7 @@ module ContentData
208
248
  end
209
249
 
210
250
  # removes all instances records which are located under input param: dir_to_remove.
211
- # found records are removed from both @instances_info and @instances_info.
251
+ # found records are removed from @contents_info , @instances_info and @symlinks_info
212
252
  # input params: server & dir_to_remove - are used to check each instance unique key (called location)
213
253
  # removes also content\s, if a content\s become\s empty after removing instance\s
214
254
  def remove_directory(dir_to_remove, server)
@@ -216,7 +256,9 @@ module ContentData
216
256
  loop {
217
257
  checksum = contents_enum.next rescue break
218
258
  instances = @contents_info[checksum][1]
219
- instances.each_key { |location|
259
+ instances_enum = instances.each_key
260
+ loop {
261
+ location = instances_enum.next rescue break
220
262
  if location[0] == server and location[1].scan(dir_to_remove).size > 0
221
263
  instances.delete(location)
222
264
  @instances_info.delete(location)
@@ -224,27 +266,20 @@ module ContentData
224
266
  }
225
267
  @contents_info.delete(checksum) if instances.empty?
226
268
  }
227
- end
228
269
 
270
+ # handle symlinks
271
+ symlinks_enum = @symlinks_info.each_key
272
+ loop {
273
+ symlink_key = symlinks_enum.next rescue break
274
+ if symlink_key[0] == server and symlink_key[1].scan(dir_to_remove).size > 0
275
+ @symlinks_info.delete(symlink_key)
276
+ end
277
+ }
278
+ end
229
279
 
230
280
  def ==(other)
231
- return false if other.nil?
232
- return false if @contents_info.length != other.contents_size
233
- other.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
234
- return false if instance_exists(path, server) != other.instance_exists(path, server)
235
- local_content_info = @contents_info[checksum]
236
- return false if local_content_info.nil?
237
- return false if local_content_info[0] != size
238
- return false if local_content_info[2] != content_mod_time
239
- #check instances
240
- local_instances = local_content_info[1]
241
- return false if other.checksum_instances_size(checksum) != local_instances.length
242
- location = [server, path]
243
- local_instance_mod_time, _ = local_instances[location]
244
- return false if local_instance_mod_time.nil?
245
- return false if local_instance_mod_time != instance_mod_time
246
- }
247
- true
281
+ return nil if other.nil? # for this case: content_data == nil
282
+ unique_id == other.unique_id
248
283
  end
249
284
 
250
285
  def remove_content(checksum)
@@ -282,6 +317,7 @@ module ContentData
282
317
  content_data_dir = File.dirname(filename)
283
318
  FileUtils.makedirs(content_data_dir) unless File.directory?(content_data_dir)
284
319
  File.open(filename, 'w') { |file|
320
+ # Write contents
285
321
  file.write("#{@contents_info.length}\n")
286
322
  contents_enum = @contents_info.each_key
287
323
  content_chunks = @contents_info.length / CHUNK_SIZE + 1
@@ -291,6 +327,8 @@ module ContentData
291
327
  GC.start
292
328
  chunks_counter += 1
293
329
  end
330
+
331
+ # Write instances
294
332
  file.write("#{@instances_info.length}\n")
295
333
  contents_enum = @contents_info.each_key
296
334
  chunks_counter = 0
@@ -299,6 +337,14 @@ module ContentData
299
337
  GC.start
300
338
  chunks_counter += 1
301
339
  end
340
+
341
+ # Write symlinks
342
+ symlinks_info_enum = @symlinks_info.each_key
343
+ file.write("#{@symlinks_info.length}\n")
344
+ loop {
345
+ symlink_key = symlinks_info_enum.next rescue break
346
+ file.write("#{symlink_key[0]}<#{symlink_key[1]}<#{@symlinks_info[symlink_key]}\n")
347
+ }
302
348
  }
303
349
  end
304
350
 
@@ -307,7 +353,7 @@ module ContentData
307
353
  while chunk_counter < chunk_size
308
354
  checksum = contents_enum.next rescue return
309
355
  content_info = @contents_info[checksum]
310
- file.write("#{checksum},#{content_info[0]},#{content_info[2]}\n")
356
+ file.write("#{checksum}<#{content_info[0]}<#{content_info[2]}\n")
311
357
  chunk_counter += 1
312
358
  end
313
359
  end
@@ -323,8 +369,8 @@ module ContentData
323
369
  # provide the block with: checksum, size, content modification time,instance modification time,
324
370
  # server and path.
325
371
  instance_modification_time,instance_index_time = content_info[1][location]
326
- file.write("#{checksum},#{content_info[0]},#{location[0]},#{location[1]}," +
327
- "#{instance_modification_time},#{instance_index_time}\n")
372
+ file.write("#{checksum}<#{content_info[0]}<#{location[0]}<#{location[1]}<" +
373
+ "#{instance_modification_time}<#{instance_index_time}\n")
328
374
  }
329
375
  chunk_counter += 1
330
376
  break if chunk_counter == chunk_size
@@ -332,6 +378,7 @@ module ContentData
332
378
  end
333
379
 
334
380
  # TODO validation that file indeed contains ContentData missing
381
+ # TODO class level method?
335
382
  # Loading db from file using chunks for better memory performance
336
383
  def from_file(filename)
337
384
  # read first line (number of contents)
@@ -339,11 +386,16 @@ module ContentData
339
386
  # read number of instances.
340
387
  # loop over instances lines (using chunks) and add instances
341
388
 
389
+ unless File.exists? filename
390
+ raise ArgumentError.new "No such a file #{filename}"
391
+ end
392
+
342
393
  File.open(filename, 'r') { |file|
343
394
  # Get number of contents (at first line)
344
395
  number_of_contents = file.gets # this gets the next line or return nil at EOF
345
396
  unless (number_of_contents and number_of_contents.match(/^[\d]+$/)) # check that line is of Number format
346
- return reset_load_from_file(filename, file, "number of contents should be a number. We got:#{number_of_contents}")
397
+ raise("Parse error of content data file:#{filename} line ##{$.}\n" +
398
+ "number of contents should be a number. We got:#{number_of_contents}")
347
399
  end
348
400
  number_of_contents = number_of_contents.to_i
349
401
  # advance file lines over all contents. We need only the instances data to build the content data object
@@ -365,7 +417,8 @@ module ContentData
365
417
  # get number of instances
366
418
  number_of_instances = file.gets
367
419
  unless (number_of_instances and number_of_instances.match(/^[\d]+$/)) # check that line is of Number format
368
- return reset_load_from_file(filename, file, "number of instances should be a Number. We got:#{number_of_instances}")
420
+ raise("Parse error of content data file:#{filename} line ##{$.}\n" +
421
+ "number of instances should be a Number. We got:#{number_of_instances}")
369
422
  end
370
423
  number_of_instances = number_of_instances.to_i
371
424
  # read in instances chunks and GC
@@ -382,35 +435,56 @@ module ContentData
382
435
  GC.start
383
436
  chunk_index += 1
384
437
  end
438
+
439
+ # get number of symlinks
440
+ number_of_symlinks = file.gets
441
+ unless (number_of_symlinks and number_of_symlinks.match(/^[\d]+$/)) # check that line is of Number format
442
+ raise("Parse error of content data file:#{filename} line ##{$.}\n" +
443
+ "number of symlinks should be a Number. We got:#{number_of_symlinks}")
444
+ end
445
+ number_of_symlinks.to_i.times {
446
+ symlinks_line = file.gets
447
+ unless symlinks_line
448
+ raise("Parse error of content data file:#{filename} line ##{$.}\n" +
449
+ "Expected to read symlink line but reached EOF")
450
+ end
451
+ parameters = symlinks_line.split('<')
452
+ if (3 != parameters.length)
453
+ raise("Parse error of content data file:#{filename} line ##{$.}\n" +
454
+ "Expected to read 3 fields ('<' separated) but got #{parameters.length}.\nLine:#{symlinks_line}")
455
+ end
456
+
457
+ @symlinks_info[[parameters[0],parameters[1]]] = parameters[2]
458
+ }
385
459
  }
386
460
  end
387
461
 
388
462
  def read_contents_chunk(filename, file, chunk_size)
389
463
  chunk_index = 0
390
464
  while chunk_index < chunk_size
391
- return reset_load_from_file(filename, file, "Expecting content line but " +
392
- "reached end of file after line #{$.}") unless file.gets
465
+ unless file.gets
466
+ raise("Parse error of content data file:#{filename} line ##{$.}\n" +
467
+ "Expecting content line but reached end of file")
468
+ end
393
469
  chunk_index += 1
394
470
  end
395
471
  true
396
472
  end
397
473
 
398
- def read_instances_chunk(filename, file, chunk_size)
474
+ def read_instances_chunk(filename, file, chunk_size)
399
475
  chunk_index = 0
400
476
  while chunk_index < chunk_size
401
477
  instance_line = file.gets
402
- return reset_load_from_file(filename, file, "Expected to read Instance line but reached EOF") unless instance_line
403
- parameters = instance_line.split(',')
404
- # bugfix: if file name consist a comma then parsing based on comma separating fails
405
- if (parameters.size > 6)
406
- (4..parameters.size-3).each do |i|
407
- parameters[3] = [parameters[3], parameters[i]].join(",")
408
- end
409
- (4..parameters.size-3).each do |i|
410
- parameters.delete_at(4)
411
- end
478
+ unless instance_line
479
+ raise("Parse error of content data file:#{filename} line ##{$.}\n" +
480
+ "Expected to read Instance line but reached EOF")
412
481
  end
413
482
 
483
+ parameters = instance_line.split('<')
484
+ if (6 != parameters.length)
485
+ raise("Parse error of content data file:#{filename} line ##{$.}\n" +
486
+ "Expected to read 6 fields ('<' separated) but got #{parameters.length}.\nLine:#{instance_line}")
487
+ end
414
488
  add_instance(parameters[0], #checksum
415
489
  parameters[1].to_i, # size
416
490
  parameters[2], # server
@@ -422,14 +496,6 @@ module ContentData
422
496
  true
423
497
  end
424
498
 
425
- def reset_load_from_file(file_name, file_io, err_msg)
426
- Log.error("unexpected error reading file:#{file_name}\nError message:#{err_msg}")
427
- @contents_info = {} # Checksum --> [size, paths-->time(instance), time(content)]
428
- @instances_info = {} # location --> checksum to optimize instances query
429
- file_io.close
430
- nil
431
- end
432
-
433
499
  # for each content, all time fields (content and instances) are replaced with the
434
500
  # min time found, while going through all time fields.
435
501
  def unify_time()