content_server 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (137) hide show
  1. data/bin/backup_server +8 -20
  2. data/bin/content_server +8 -20
  3. data/bin/testing_memory +60 -0
  4. data/bin/testing_server +57 -0
  5. data/ext/run_in_background/mkrf_conf.rb +34 -0
  6. data/lib/content_data/content_data.rb +613 -0
  7. data/lib/content_data/version.rb +3 -0
  8. data/lib/content_data.rb +6 -0
  9. data/lib/content_server/backup_server.rb +65 -86
  10. data/lib/content_server/content_server.rb +47 -77
  11. data/lib/content_server/file_streamer.rb +27 -33
  12. data/lib/content_server/queue_copy.rb +154 -49
  13. data/lib/content_server/queue_indexer.rb +19 -11
  14. data/lib/content_server/remote_content.rb +41 -23
  15. data/lib/content_server/server.rb +91 -0
  16. data/lib/content_server/version.rb +1 -1
  17. data/lib/content_server.rb +0 -15
  18. data/lib/email/email.rb +87 -0
  19. data/lib/email/version.rb +3 -0
  20. data/lib/email.rb +4 -0
  21. data/lib/file_copy/copy.rb +68 -0
  22. data/lib/file_copy/version.rb +4 -0
  23. data/lib/file_copy.rb +4 -0
  24. data/lib/file_indexing/index_agent.rb +170 -0
  25. data/lib/file_indexing/indexer_patterns.rb +72 -0
  26. data/lib/file_indexing/version.rb +3 -0
  27. data/lib/file_indexing.rb +9 -0
  28. data/lib/file_monitoring/file_monitoring.rb +105 -0
  29. data/lib/file_monitoring/monitor_path.rb +304 -0
  30. data/lib/file_monitoring/version.rb +3 -0
  31. data/lib/file_monitoring.rb +29 -0
  32. data/lib/file_utils/file_generator/README +97 -0
  33. data/lib/file_utils/file_generator/file_generator.rb +156 -0
  34. data/lib/file_utils/file_utils.rb +260 -0
  35. data/lib/file_utils/version.rb +3 -0
  36. data/lib/file_utils.rb +4 -0
  37. data/lib/log/version.rb +3 -0
  38. data/lib/log.rb +188 -0
  39. data/lib/networking/tcp.rb +213 -0
  40. data/lib/networking/version.rb +3 -0
  41. data/lib/networking.rb +4 -0
  42. data/lib/params/version.rb +3 -0
  43. data/lib/params.rb +419 -0
  44. data/lib/process_monitoring/monitoring.rb +85 -0
  45. data/lib/process_monitoring/monitoring_info.rb +79 -0
  46. data/lib/process_monitoring/send_email.rb +40 -0
  47. data/lib/process_monitoring/thread_safe_hash.rb +77 -0
  48. data/lib/process_monitoring/version.rb +3 -0
  49. data/lib/process_monitoring.rb +6 -0
  50. data/lib/run_in_background/version.rb +3 -0
  51. data/lib/run_in_background.rb +432 -0
  52. data/lib/testing_memory/testing_memory.rb +187 -0
  53. data/lib/testing_server/testing_server.rb +236 -0
  54. data/lib/testing_server/version.rb +3 -0
  55. data/lib/testing_server.rb +12 -0
  56. data/lib/validations/index_validations.rb +106 -0
  57. data/lib/validations/version.rb +3 -0
  58. data/lib/validations.rb +4 -0
  59. data/spec/content_data/validations_spec.rb +113 -0
  60. data/spec/file_copy/copy_spec.rb +54 -0
  61. data/spec/file_indexing/index_agent_spec.rb +53 -0
  62. data/spec/networking/tcp_spec.rb +95 -0
  63. data/spec/validations/index_validations_spec.rb +77 -0
  64. data/test/content_data/content_data_test.rb +290 -0
  65. data/test/file_generator/file_generator_spec.rb +84 -0
  66. data/test/file_indexing/index_agent_test/New.txt +0 -0
  67. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/libexslt.dll +0 -0
  68. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/libxslt.dll +0 -0
  69. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/xsltproc.exe +0 -0
  70. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exslt.h +102 -0
  71. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exsltconfig.h +73 -0
  72. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exsltexports.h +140 -0
  73. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/libexslt.h +29 -0
  74. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/attributes.h +38 -0
  75. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/documents.h +93 -0
  76. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/extensions.h +262 -0
  77. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/extra.h +80 -0
  78. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/functions.h +78 -0
  79. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/imports.h +75 -0
  80. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/keys.h +53 -0
  81. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/libxslt.h +30 -0
  82. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/namespaces.h +68 -0
  83. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/numbersInternals.h +69 -0
  84. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/pattern.h +81 -0
  85. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/preproc.h +43 -0
  86. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/security.h +104 -0
  87. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/templates.h +77 -0
  88. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/transform.h +207 -0
  89. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/trio.h +216 -0
  90. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/triodef.h +220 -0
  91. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/variables.h +91 -0
  92. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/win32config.h +101 -0
  93. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xslt.h +103 -0
  94. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltInternals.h +1967 -0
  95. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltconfig.h +172 -0
  96. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltexports.h +142 -0
  97. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltlocale.h +57 -0
  98. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltutils.h +309 -0
  99. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltwin32config.h +105 -0
  100. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libexslt.lib +0 -0
  101. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libexslt_a.lib +0 -0
  102. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libxslt.lib +0 -0
  103. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libxslt_a.lib +0 -0
  104. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/readme.txt +22 -0
  105. data/test/file_indexing/index_agent_test/patterns.input +3 -0
  106. data/test/file_indexing/index_agent_test.rb +51 -0
  107. data/test/file_monitoring/file_monitoring_test/conf.yml +4 -0
  108. data/test/file_monitoring/file_monitoring_test/conf_win32.yml +5 -0
  109. data/test/file_monitoring/file_monitoring_test/log +56 -0
  110. data/test/file_monitoring/file_monitoring_test.rb +0 -0
  111. data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000 +1000 -0
  112. data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000.0 +1000 -0
  113. data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000.1 +1000 -0
  114. data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500 +1500 -0
  115. data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500.0 +1500 -0
  116. data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500.1 +1500 -0
  117. data/test/file_monitoring/monitor_path_test/test_file.500 +500 -0
  118. data/test/file_monitoring/monitor_path_test/test_file.500.0 +500 -0
  119. data/test/file_monitoring/monitor_path_test/test_file.500.1 +500 -0
  120. data/test/file_monitoring/monitor_path_test.rb +153 -0
  121. data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500 +1500 -0
  122. data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500.0 +1500 -0
  123. data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500.1 +1500 -0
  124. data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000 +1000 -0
  125. data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000.0 +1000 -0
  126. data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000.1 +1000 -0
  127. data/test/file_utils/fileutil_mksymlink_test/test_file.500 +500 -0
  128. data/test/file_utils/fileutil_mksymlink_test/test_file.500.0 +500 -0
  129. data/test/file_utils/fileutil_mksymlink_test/test_file.500.1 +500 -0
  130. data/test/file_utils/fileutil_mksymlink_test.rb +125 -0
  131. data/test/file_utils/time_modification_test.rb +132 -0
  132. data/test/params/params_spec.rb +280 -0
  133. data/test/params/params_test.rb +43 -0
  134. data/test/run_in_background/run_in_background_test.rb +122 -0
  135. data/test/run_in_background/test_app +57 -0
  136. metadata +272 -132
  137. data/lib/content_server/globals.rb +0 -10
@@ -0,0 +1,613 @@
1
+ require 'content_server/server'
2
+ require 'log'
3
+ require 'params'
4
+
5
+ module ContentData
6
+ Params.string('instance_check_level', 'shallow', 'Defines check level. Supported levels are: ' \
7
+ 'shallow - quick, tests instance for file existence and attributes. ' \
8
+ 'deep - can take more time, in addition to shallow recalculates hash sum.')
9
+
10
+ # Content Data(CD) object holds files information as contents and instances
11
+ # Files info retrieved from hardware: checksum, size, time modification, server, device and path
12
+ # Those attributes are divided into content and instance attributes:
13
+ # unique checksum, size are content attributes
14
+ # time modification, server, device and path are instance attributes
15
+ # The relationship between content and instances is 1:many meaning that
16
+ # a content can have instances in many servers.
17
+ # content also has time attribute, which has the value of the time of the first instance.
18
+ # This can be changed by using unify_time method which sets all time attributes for a content and it's
19
+ # instances to the min time off all.
20
+ # Different files(instances) with same content(checksum), are grouped together under that content.
21
+ # Interface methods include:
22
+ # iterate over contents and instances info,
23
+ # unify time, add/remove instance, queries, merge, remove directory and more.
24
+ # Content info data structure:
25
+ # @contents_info = { Checksum -> [size, *instances*, content_modification_time] }
26
+ # *instances* = {[server,path] -> instance_modification_time }
27
+ # Notes:
28
+ # 1. content_modification_time is the instance_modification_time of the first
29
+ # instances which was added to @contents_info
30
+ class ContentData
31
+
32
+ def initialize(other = nil)
33
+ if other.nil?
34
+ @contents_info = {} # Checksum --> [size, paths-->time(instance), time(content)]
35
+ @instances_info = {} # location --> checksum to optimize instances query
36
+ else
37
+ @contents_info = other.clone_contents_info
38
+ @instances_info = other.clone_instances_info # location --> checksum to optimize instances query
39
+ end
40
+ end
41
+
42
+ # Content Data unique identification
43
+ # @return [ID] hash identification
44
+ def unique_id
45
+ @instances_info.hash
46
+ end
47
+
48
+ def clone_instances_info
49
+ @instances_info.keys.inject({}) { |clone_instances_info, location|
50
+ clone_instances_info[[location[0].clone, location[1].clone]] = @instances_info[location].clone
51
+ clone_instances_info
52
+ }
53
+ end
54
+
55
+ def clone_contents_info
56
+ @contents_info.keys.inject({}) { |clone_contents_info, checksum|
57
+ instances = @contents_info[checksum]
58
+ size = instances[0]
59
+ content_time = instances[2]
60
+ instances_db = instances[1]
61
+ instances_db_cloned = {}
62
+ instances_db.keys.each { |location|
63
+ instance_mtime = instances_db[location]
64
+ instances_db_cloned[[location[0].clone,location[1].clone]]=instance_mtime
65
+ }
66
+ clone_contents_info[checksum] = [size,
67
+ instances_db_cloned,
68
+ content_time]
69
+ clone_contents_info
70
+ }
71
+ end
72
+
73
+ # iterator over @contents_info data structure (not including instances)
74
+ # block is provided with: checksum, size and content modification time
75
+ def each_content(&block)
76
+ @contents_info.keys.each { |checksum|
77
+ content_val = @contents_info[checksum]
78
+ # provide checksum, size and content modification time to the block
79
+ block.call(checksum,content_val[0], content_val[2])
80
+ }
81
+ end
82
+
83
+ # iterator over @contents_info data structure (including instances)
84
+ # block is provided with: checksum, size, content modification time,
85
+ # instance modification time, server and file path
86
+ def each_instance(&block)
87
+ @contents_info.keys.each { |checksum|
88
+ content_info = @contents_info[checksum]
89
+ content_info[1].keys.each {|location|
90
+ # provide the block with: checksum, size, content modification time,instance modification time,
91
+ # server and path.
92
+ instance_modification_time = content_info[1][location]
93
+ block.call(checksum,content_info[0], content_info[2], instance_modification_time,
94
+ location[0], location[1])
95
+ }
96
+ }
97
+ end
98
+
99
+ # iterator of instances over specific content
100
+ # block is provided with: checksum, size, content modification time,
101
+ # instance modification time, server and file path
102
+ def content_each_instance(checksum, &block)
103
+ content_info = @contents_info[checksum]
104
+ content_info[1].keys.each {|location|
105
+ # provide the block with: checksum, size, content modification time,instance modification time,
106
+ # server and path.
107
+ instance_modification_time = content_info[1][location]
108
+ block.call(checksum,content_info[0], content_info[2], instance_modification_time,
109
+ location[0], location[1])
110
+ }
111
+ end
112
+
113
+ def contents_size()
114
+ @contents_info.length
115
+ end
116
+
117
+ def instances_size()
118
+ counter=0
119
+ @contents_info.values.each { |content_info|
120
+ counter += content_info[1].length
121
+ }
122
+ counter
123
+ end
124
+
125
+ def checksum_instances_size(checksum)
126
+ content_info = @contents_info[checksum]
127
+ return 0 if content_info.nil?
128
+ content_info[1].length
129
+ end
130
+
131
+ def get_instance_mod_time(checksum, location)
132
+ content_info = @contents_info[checksum]
133
+ return nil if content_info.nil?
134
+ instances = content_info[1]
135
+ instance_time = instances[location]
136
+ end
137
+
138
+ def add_instance(checksum, size, server, path, modification_time)
139
+ location = [server, path]
140
+ content_info = @contents_info[checksum]
141
+ if content_info.nil?
142
+ @contents_info[checksum] = [size,
143
+ {location => modification_time},
144
+ modification_time]
145
+ else
146
+ if size != content_info[0]
147
+ Log.warning 'File size different from content size while same checksum'
148
+ Log.warning("instance location:server:'#{location[0]}' path:'#{location[1]}'")
149
+ Log.warning("instance mod time:'#{modification_time}'")
150
+ end
151
+ #override file if needed
152
+ content_info[0] = size
153
+ instances = content_info[1]
154
+ instances[location] = modification_time
155
+ end
156
+ @instances_info[location] = checksum
157
+ end
158
+
159
+ def empty?
160
+ @contents_info.empty?
161
+ end
162
+
163
+ def content_exists(checksum)
164
+ @contents_info.has_key?(checksum)
165
+ end
166
+
167
+ def instance_exists(path, server)
168
+ @instances_info.has_key?([server, path])
169
+ end
170
+
171
+ def stats_by_location(location)
172
+ checksum = @instances_info[location]
173
+ content_info = @contents_info[checksum]
174
+ return nil if content_info.nil?
175
+ return [content_info[0], content_info[1][location]]
176
+ end
177
+
178
+ # removes an instance record both in @instances_info and @instances_info.
179
+ # input params: server & path - are the instance unique key (called location)
180
+ # removes also the content, if content becomes empty after removing the instance
181
+ def remove_instance(server, path)
182
+ location = [server, path]
183
+ checksum = @instances_info[location]
184
+ content_info = @contents_info[checksum]
185
+ return nil if content_info.nil?
186
+ instances = content_info[1]
187
+ instances.delete(location)
188
+ @contents_info.delete(checksum) if instances.empty?
189
+ @instances_info.delete(location)
190
+ end
191
+
192
+ # removes all instances records which are located under input param: dir_to_remove.
193
+ # found records are removed from both @instances_info and @instances_info.
194
+ # input params: server & dir_to_remove - are used to check each instance unique key (called location)
195
+ # removes also content\s, if a content\s become\s empty after removing instance\s
196
+ def remove_directory(server, dir_to_remove)
197
+ @contents_info.keys.each { |checksum|
198
+ instances = @contents_info[checksum][1]
199
+ instances.each_key { |location|
200
+ if location[0] == server and location[1].scan(dir_to_remove).size > 0
201
+ instances.delete(location)
202
+ @instances_info.delete(location)
203
+ end
204
+ }
205
+ @contents_info.delete(checksum) if instances.empty?
206
+ }
207
+ end
208
+
209
+
210
+ def ==(other)
211
+ return false if other.nil?
212
+ return false if @contents_info.length != other.contents_size
213
+ other.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
214
+ return false if instance_exists(path, server) != other.instance_exists(path, server)
215
+ local_content_info = @contents_info[checksum]
216
+ return false if local_content_info.nil?
217
+ return false if local_content_info[0] != size
218
+ return false if local_content_info[2] != content_mod_time
219
+ #check instances
220
+ local_instances = local_content_info[1]
221
+ return false if other.checksum_instances_size(checksum) != local_instances.length
222
+ location = [server, path]
223
+ local_instance_mod_time = local_instances[location]
224
+ return false if local_instance_mod_time.nil?
225
+ return false if local_instance_mod_time != instance_mod_time
226
+ }
227
+ true
228
+ end
229
+
230
+ def remove_content(checksum)
231
+ content_info = @contents_info[checksum]
232
+ if content_info
233
+ content_info[1].each_key { |location|
234
+ @instances_info.delete(location)
235
+ }
236
+ @contents_info.delete(checksum)
237
+ end
238
+ end
239
+
240
+ def to_s
241
+ return_str = ""
242
+ contents_str = ""
243
+ instances_str = ""
244
+ instances_counter = 0
245
+ each_content { |checksum, size, content_mod_time|
246
+ contents_str << "%s,%d,%d\n" % [checksum, size, content_mod_time]
247
+ }
248
+ instances_counter = 0
249
+ each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
250
+ instances_counter += 1
251
+ instances_str << "%s,%d,%s,%s,%d\n" % [checksum, size, server, path, instance_mod_time]
252
+ }
253
+ return_str << "%d\n" % [@contents_info.length]
254
+ return_str << contents_str
255
+ return_str << "%d\n" % [instances_counter]
256
+ return_str << instances_str
257
+ return_str
258
+ end
259
+
260
+ def to_file(filename)
261
+ content_data_dir = File.dirname(filename)
262
+ FileUtils.makedirs(content_data_dir) unless File.directory?(content_data_dir)
263
+ File.open(filename, 'w') {|f| f.write(to_s) }
264
+ end
265
+
266
+ # TODO validation that file indeed contains ContentData missing
267
+ def from_file(filename)
268
+ lines = IO.readlines(filename)
269
+ number_of_contents = lines[0].to_i
270
+ i = 1 + number_of_contents
271
+ number_of_instances = lines[i].to_i
272
+ i += 1
273
+ number_of_instances.times {
274
+ if lines[i].nil?
275
+ Log.warning "line ##{i} is nil !!!, Backing filename: #{filename} to #{filename}.bad"
276
+ FileUtils.cp(filename, "#{filename}.bad")
277
+ Log.warning("Lines:\n#{lines[i].join("\n")}")
278
+ else
279
+ parameters = lines[i].split(',')
280
+ # bugfix: if file name consist a comma then parsing based on comma separating fails
281
+ if (parameters.size > 5)
282
+ (4..parameters.size-2).each do |i|
283
+ parameters[3] = [parameters[3], parameters[i]].join(",")
284
+ end
285
+ (4..parameters.size-2).each do |i|
286
+ parameters.delete_at(4)
287
+ end
288
+ end
289
+
290
+ add_instance(parameters[0],
291
+ parameters[1].to_i,
292
+ parameters[2],
293
+ parameters[3],
294
+ parameters[4].to_i)
295
+ end
296
+ i += 1
297
+ }
298
+ end
299
+
300
+ # for each content, all time fields (content and instances) are replaced with the
301
+ # min time found, while going through all time fields.
302
+ def unify_time()
303
+ @contents_info.keys.each { |checksum|
304
+ content_info = @contents_info[checksum]
305
+ min_time_per_checksum = content_info[2]
306
+ instances = content_info[1]
307
+ instances.keys.each { |location|
308
+ instance_mod_time = instances[location]
309
+ if instance_mod_time < min_time_per_checksum
310
+ min_time_per_checksum = instance_mod_time
311
+ end
312
+ }
313
+ # update all instances with min time
314
+ instances.keys.each { |location|
315
+ instances[location] = min_time_per_checksum
316
+ }
317
+ # update content time with min time
318
+ content_info[2] = min_time_per_checksum
319
+ }
320
+ end
321
+
322
+ # Validates index against file system that all instances hold a correct data regarding files
323
+ # that they represents.
324
+ #
325
+ # There are two levels of validation, controlled by instance_check_level system parameter:
326
+ # * shallow - quick, tests instance for file existence and attributes.
327
+ # * deep - can take more time, in addition to shallow recalculates hash sum.
328
+ # @param [Hash] params hash of parameters of validation, can be used to return additional data.
329
+ #
330
+ # Supported key/value combinations:
331
+ # * key is <tt>:failed</tt> value is <tt>ContentData</tt> used to return failed instances
332
+ # @return [Boolean] true when index is correct, false otherwise
333
+ # @raise [ArgumentError] when instance_check_level is incorrect
334
+ def validate(params = nil)
335
+ # used to answer whether specific param was set
336
+ param_exists = Proc.new do |param|
337
+ !(params.nil? || params[param].nil?)
338
+ end
339
+
340
+ # used to process method parameters centrally
341
+ process_params = Proc.new do |values|
342
+ if param_exists.call(:failed)
343
+ info = values[:details]
344
+ unless info.nil?
345
+ checksum = info[0]
346
+ content_mtime = info[1]
347
+ size = info[2]
348
+ inst_mtime = info[3]
349
+ server = info[4]
350
+ file_path = info[5]
351
+ params[:failed].add_instance(checksum, size, server, file_path, inst_mtime)
352
+ end
353
+ end
354
+ end
355
+
356
+ is_valid = true
357
+ @contents_info.keys.each { |checksum|
358
+ instances = @contents_info[checksum]
359
+ content_size = instances[0]
360
+ content_mtime = instances[2]
361
+ instances[1].keys.each { |unique_path|
362
+ instance_mtime = instances[1][unique_path]
363
+ instance_info = [checksum, content_mtime, content_size, instance_mtime]
364
+ instance_info.concat(unique_path)
365
+ unless check_instance(instance_info)
366
+ is_valid = false
367
+
368
+ unless params.nil? || params.empty?
369
+ process_params.call({:details => instance_info})
370
+ end
371
+ end
372
+ }
373
+ }
374
+ is_valid
375
+ end
376
+
377
+ # instance_info is an array:
378
+ # [0] - checksum
379
+ # [1] - content time
380
+ # [2] - content size
381
+ # [3] - instance mtime
382
+ # [4] - server name
383
+ # [5] - file path
384
+ def shallow_check(instance_info)
385
+ path = instance_info[5]
386
+ size = instance_info[2]
387
+ instance_mtime = instance_info[3]
388
+ is_valid = true
389
+
390
+ if (File.exists?(path))
391
+ if File.size(path) != size
392
+ is_valid = false
393
+ err_msg = "#{path} size #{File.size(path)} differs from indexed size #{size}"
394
+ Log.warning err_msg
395
+ end
396
+ #if ContentData.format_time(File.mtime(path)) != instance.modification_time
397
+ if File.mtime(path).to_i != instance_mtime
398
+ is_valid = false
399
+ err_msg = "#{path} modification time #{File.mtime(path).to_i} differs from " \
400
+ + "indexed #{instance_mtime}"
401
+ Log.warning err_msg
402
+ end
403
+ else
404
+ is_valid = false
405
+ err_msg = "Indexed file #{path} doesn't exist"
406
+ Log.warning err_msg
407
+ end
408
+ is_valid
409
+ end
410
+
411
+ # instance_info is an array:
412
+ # [0] - checksum
413
+ # [1] - content time
414
+ # [2] - content size
415
+ # [3] - instance mtime
416
+ # [4] - server name
417
+ # [5] - file path
418
+ def deep_check(instance_info)
419
+ if shallow_check(instance_info)
420
+ instance_checksum = instance_info[0]
421
+ path = instance_info[5]
422
+ current_checksum = FileIndexing::IndexAgent.get_checksum(path)
423
+ if instance_checksum == current_checksum
424
+ true
425
+ else
426
+ err_msg = "#{path} checksum #{current_checksum} differs from indexed #{instance_checksum}"
427
+ Log.warning err_msg
428
+ false
429
+ end
430
+ else
431
+ false
432
+ end
433
+ end
434
+
435
+ # @raise [ArgumentError] when instance_check_level is incorrect
436
+ def check_instance(instance)
437
+ case Params['instance_check_level']
438
+ when 'deep'
439
+ deep_check instance
440
+ when 'shallow'
441
+ shallow_check instance
442
+ else
443
+ # TODO remove it when params will support set of values
444
+ throw ArgumentError.new "Unsupported check level #{Params['instance_check_level']}"
445
+ end
446
+ end
447
+
448
+
449
+ # TODO simplify conditions
450
+ # This mehod is experimental and shouldn\'t be used
451
+ # nil is used to define +/- infinity for to/from method arguments
452
+ # from/to values are exlusive in condition'a calculations
453
+ # Need to take care about '==' operation that is used for object's comparison.
454
+ # In need of case user should define it's own '==' implemementation.
455
+ def get_query(variable, params)
456
+ raise RuntimeError.new 'This method is experimental and shouldn\'t be used'
457
+
458
+ exact = params['exact'].nil? ? Array.new : params['exact']
459
+ from = params['from']
460
+ to = params ['to']
461
+ is_inside = params['is_inside']
462
+
463
+ unless ContentInstance.new.instance_variable_defined?("@#{attribute}")
464
+ raise ArgumentError "#{variable} isn't a ContentInstance variable"
465
+ end
466
+
467
+ if (exact.nil? && from.nil? && to.nil?)
468
+ raise ArgumentError 'At least one of the argiments {exact, from, to} must be defined'
469
+ end
470
+
471
+ if (!(from.nil? || to.nil?) && from.kind_of?(to.class))
472
+ raise ArgumentError 'to and from arguments should be comparable one with another'
473
+ end
474
+
475
+ # FIXME add support for from/to for Strings
476
+ if ((!from.nil? && !from.kind_of?(Numeric.new.class))\
477
+ || (!to.nil? && to.kind_of?(Numeric.new.class)))
478
+ raise ArgumentError 'from and to options supported only for numeric values'
479
+ end
480
+
481
+ if (!exact.empty? && (!from.nil? || !to.nil?))
482
+ raise ArgumentError 'exact and from/to options are mutually exclusive'
483
+ end
484
+
485
+ result_index = ContentData.new
486
+ instances.each_value do |instance|
487
+ is_match = false
488
+ var_value = instance.instance_variable_get("@#{variable}")
489
+
490
+ if exact.include? var_value
491
+ is_match = true
492
+ elsif (from.nil? || var_value > from) && (to.nil? || var_value < to)
493
+ is_match = true
494
+ end
495
+
496
+ if (is_match && is_inside) || (!is_match && !is_inside)
497
+ checksum = instance.checksum
498
+ result_index.add_content(contents[checksum]) unless result_index.content_exists(checksum)
499
+ result_index.add_instance instance
500
+ end
501
+ end
502
+ result_index
503
+ end
504
+
505
+ private :shallow_check, :deep_check, :check_instance
506
+ end
507
+
508
+ # merges content data a and content data b to a new content data and returns it.
509
+ def self.merge(a, b)
510
+ return ContentData.new(a) if b.nil?
511
+ return ContentData.new(b) if a.nil?
512
+ c = ContentData.new(b)
513
+ # Add A instances to content data c
514
+ a.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
515
+ c.add_instance(checksum, size, server, path, instance_mod_time)
516
+ }
517
+ c
518
+ end
519
+
520
+ def self.merge_override_b(a, b)
521
+ return ContentData.new(a) if b.nil?
522
+ return ContentData.new(b) if a.nil?
523
+ # Add A instances to content data B
524
+ a.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
525
+ b.add_instance(checksum, size, server, path, instance_mod_time)
526
+ }
527
+ b
528
+ end
529
+
530
+ # B - A : Remove contents of A from B and return the new content data.
531
+ # instances are ignored
532
+ # e.g
533
+ # A db:
534
+ # Content_1 ->
535
+ # Instance_1
536
+ # Instance_2
537
+ #
538
+ # Content_2 ->
539
+ # Instance_3
540
+ #
541
+ # B db:
542
+ # Content_1 ->
543
+ # Instance_1
544
+ # Instance_2
545
+ #
546
+ # Content_2 ->
547
+ # Instance_3
548
+ # Instance_4
549
+ # Content_3 ->
550
+ # Instance_5
551
+ # B-A db:
552
+ # Content_3 ->
553
+ # Instance_5
554
+ def self.remove(a, b)
555
+ return nil if b.nil?
556
+ return ContentData.new(b) if a.nil?
557
+ c = ContentData.new(b) # create new cloned content C from B
558
+ # remove contents of A from newly cloned content A
559
+ a.each_content { |checksum, size, content_mod_time|
560
+ c.remove_content(checksum)
561
+ }
562
+ c
563
+ end
564
+
565
+ # B - A : Remove instances of A content from B content data B and return the new content data.
566
+ # If all instances are removed then the content record itself will be removed
567
+ # e.g
568
+ # A db:
569
+ # Content_1 ->
570
+ # Instance_1
571
+ # Instance_2
572
+ #
573
+ # Content_2 ->
574
+ # Instance_3
575
+ #
576
+ # B db:
577
+ # Content_1 ->
578
+ # Instance_1
579
+ # Instance_2
580
+ #
581
+ # Content_2 ->
582
+ # Instance_3
583
+ # Instance_4
584
+ # B-A db:
585
+ # Content_2 ->
586
+ # Instance_4
587
+ def self.remove_instances(a, b)
588
+ return nil if b.nil?
589
+ return ContentData.new(b) if a.nil?
590
+ c = ContentData.new(b) # create new cloned content C from B
591
+ # remove contents of A from newly cloned content A
592
+ a.each_instance { |_, _, _, _, server, path|
593
+ c.remove_instance(server, path)
594
+ }
595
+ c
596
+ end
597
+
598
+ def self.remove_directory(content_data, dir_to_remove, server_to_remove)
599
+ return nil if content_data.nil?
600
+ result_content_data = ContentData.new(content_data) # clone from content_data
601
+ result_content_data.remove_directory(dir_to_remove, server_to_remove)
602
+ result_content_data
603
+ end
604
+
605
+ # returns the common content in both a and b
606
+ def self.intersect(a, b)
607
+ return nil if a.nil?
608
+ return nil if b.nil?
609
+ b_minus_a = remove(a, b)
610
+ b_minus_b_minus_a = remove(b_minus_a, b)
611
+ end
612
+ end
613
+
@@ -0,0 +1,3 @@
1
+ module ContentData
2
+ VERSION = "1.1.0"
3
+ end
@@ -0,0 +1,6 @@
1
+ require 'content_data/content_data'
2
+
3
+ # Data structure for an abstract layer over files.
4
+ # Each binary sequence is a content, each file is content instance.
5
+ module ContentData
6
+ end