content_server 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/bin/file_utils +118 -0
- data/lib/content_data/content_data.rb +114 -48
- data/lib/content_server/version.rb +1 -1
- data/lib/file_monitoring/file_monitoring.rb +94 -50
- data/lib/file_monitoring/monitor_path.rb +196 -113
- data/lib/file_utils/file_utils.rb +10 -49
- data/lib/networking/tcp.rb +4 -4
- data/spec/content_data/content_data_spec.rb +331 -0
- data/spec/content_data/validations_spec.rb +5 -0
- data/spec/content_server/content_server_spec.rb +5 -0
- data/spec/content_server/file_streamer_spec.rb +5 -0
- data/spec/file_copy/copy_spec.rb +5 -0
- data/spec/file_indexing/index_agent_spec.rb +5 -0
- data/spec/networking/tcp_spec.rb +5 -0
- data/spec/validations/index_validations_spec.rb +5 -0
- metadata +9 -89
- data/test/content_data/content_data_test.rb +0 -291
- data/test/file_generator/file_generator_spec.rb +0 -85
- data/test/file_monitoring/monitor_path_test.rb +0 -189
- data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000 +0 -1000
- data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000.0 +0 -1000
- data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000.1 +0 -1000
- data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500 +0 -1500
- data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500.0 +0 -1500
- data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500.1 +0 -1500
- data/test/file_monitoring/monitor_path_test/test_file.500 +0 -500
- data/test/file_monitoring/monitor_path_test/test_file.500.0 +0 -500
- data/test/file_monitoring/monitor_path_test/test_file.500.1 +0 -500
- data/test/file_utils/fileutil_mksymlink_test.rb +0 -134
- data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500 +0 -1500
- data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500.0 +0 -1500
- data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500.1 +0 -1500
- data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000 +0 -1000
- data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000.0 +0 -1000
- data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000.1 +0 -1000
- data/test/file_utils/fileutil_mksymlink_test/test_file.500 +0 -500
- data/test/file_utils/fileutil_mksymlink_test/test_file.500.0 +0 -500
- data/test/file_utils/fileutil_mksymlink_test/test_file.500.1 +0 -500
- data/test/file_utils/time_modification_test.rb +0 -136
- data/test/params/params_spec.rb +0 -280
- data/test/params/params_test.rb +0 -43
- data/test/run_in_background/run_in_background_test.rb +0 -122
- data/test/run_in_background/test_app +0 -59
@@ -11,6 +11,93 @@ module FileMonitoring
|
|
11
11
|
# Manages file monitoring of number of file system locations
|
12
12
|
class FileMonitoring
|
13
13
|
|
14
|
+
def create_sub_paths(path)
|
15
|
+
sub_paths=[]
|
16
|
+
while path != '.' && path != '/'
|
17
|
+
sub_paths.push(path)
|
18
|
+
path = File.dirname(path)
|
19
|
+
end
|
20
|
+
sub_paths.reverse!
|
21
|
+
end
|
22
|
+
|
23
|
+
def load_instances(file_attr_to_checksum, dir_stat_array)
|
24
|
+
inst_count = 0
|
25
|
+
# If file is under monitoring path - Add to DirStat tree as stable with path,size,mod_time read from file
|
26
|
+
# If file is NOT under monitoring path - skip (not a valid usage)
|
27
|
+
$local_content_data.each_instance {
|
28
|
+
|checksum, size, _, mod_time, _, path, index_time|
|
29
|
+
|
30
|
+
if Params['manual_file_changes']
|
31
|
+
file_attr_key = [File.basename(path), size, mod_time]
|
32
|
+
ident_file_info = file_attr_to_checksum[file_attr_key]
|
33
|
+
unless ident_file_info
|
34
|
+
# Add file checksum to map
|
35
|
+
file_attr_to_checksum[file_attr_key] = IdentFileInfo.new(checksum, index_time)
|
36
|
+
else
|
37
|
+
# File already in map. Need to mark as not unique
|
38
|
+
ident_file_info.unique = false # file will be skipped if found at new location
|
39
|
+
end
|
40
|
+
end
|
41
|
+
# construct sub paths array from full file path:
|
42
|
+
# Example:
|
43
|
+
# instance path = /dir1/dir2/file_name
|
44
|
+
# sub_paths holds array => ["/dir1","/dir1/dir2","/dir1/dir2/file_name"]
|
45
|
+
# sub paths would create DirStat objs or FileStat(FileStat create using last sub path).
|
46
|
+
sub_paths = create_sub_paths(path)
|
47
|
+
|
48
|
+
# Loop over monitor paths to start build tree under each
|
49
|
+
dir_stat_array.each { | dir_stat|
|
50
|
+
# check if monitor path is one of the sub paths and find it's sub path index
|
51
|
+
# if index is found then it the monitor path
|
52
|
+
# the next index indicates the next sub path to insert to the tree
|
53
|
+
# the index will be raised at each recursive call down the tree
|
54
|
+
sub_paths_index = sub_paths.index(dir_stat[0].path)
|
55
|
+
if sub_paths_index
|
56
|
+
# monitor path was found. Add to tree
|
57
|
+
# start the recursive call with next sub path index
|
58
|
+
::FileMonitoring.stable_state = dir_stat[1]
|
59
|
+
inst_count += 1
|
60
|
+
dir_stat[0].load_instance(sub_paths, sub_paths_index+1, size, mod_time)
|
61
|
+
break
|
62
|
+
end
|
63
|
+
}
|
64
|
+
}
|
65
|
+
Log.info("loaded instances:#{inst_count}")
|
66
|
+
end
|
67
|
+
|
68
|
+
def load_symlinks(dir_stat_array)
|
69
|
+
# If symlink file is under monitoring path - Add to DirStat tree
|
70
|
+
# If file is NOT under monitoring path - skip (not a valid usage)
|
71
|
+
symlink_count = 0
|
72
|
+
$local_content_data.each_symlink {
|
73
|
+
|_, symlink_path, symlink_target|
|
74
|
+
|
75
|
+
# construct sub paths array from symlink path:
|
76
|
+
# Example:
|
77
|
+
# symlink path = /dir1/dir2/file_name
|
78
|
+
# sub_paths holds array => ["/dir1","/dir1/dir2","/dir1/dir2/file_name"]
|
79
|
+
# sub paths should match the paths of DirStat objs in the tree to reach the symlink location in Tree.
|
80
|
+
sub_paths = create_sub_paths(symlink_path)
|
81
|
+
|
82
|
+
# Loop over monitor paths to start enter tree
|
83
|
+
dir_stat_array.each { | dir_stat|
|
84
|
+
# check if monitor path is one of the sub paths and find it's sub path index
|
85
|
+
# if index is found then it the monitor path
|
86
|
+
# the next index indicates the next sub path to insert to the tree
|
87
|
+
# the index will be raised at each recursive call down the tree
|
88
|
+
sub_paths_index = sub_paths.index(dir_stat[0].path)
|
89
|
+
if sub_paths_index
|
90
|
+
# monitor path was found. Add to tree
|
91
|
+
# start the recursive call with next sub path index
|
92
|
+
::FileMonitoring.stable_state = dir_stat[1]
|
93
|
+
symlink_count += 1
|
94
|
+
dir_stat[0].load_symlink(sub_paths, sub_paths_index+1, symlink_path, symlink_target)
|
95
|
+
break
|
96
|
+
end
|
97
|
+
}
|
98
|
+
}
|
99
|
+
Log.info("loaded symlinks:#{symlink_count}")
|
100
|
+
end
|
14
101
|
# The main method. Loops on all paths, each time span and monitors them.
|
15
102
|
#
|
16
103
|
# =Algorithm:
|
@@ -45,7 +132,7 @@ module FileMonitoring
|
|
45
132
|
@log4r.outputters << file_outputter
|
46
133
|
::FileMonitoring::DirStat.set_log(@log4r)
|
47
134
|
|
48
|
-
|
135
|
+
conf_array = Params['monitoring_paths']
|
49
136
|
|
50
137
|
# create root dirs of monitoring
|
51
138
|
dir_stat_array = []
|
@@ -54,58 +141,15 @@ module FileMonitoring
|
|
54
141
|
dir_stat_array.push([dir_stat, elem['stable_state']])
|
55
142
|
}
|
56
143
|
|
144
|
+
# This structure is used to optimize indexing when user specifies a directory was moved.
|
145
|
+
file_attr_to_checksum = {}
|
146
|
+
|
57
147
|
#Look over loaded content data if not empty
|
58
|
-
# If file is under monitoring path - Add to DirStat tree as stable with path,size,mod_time read from file
|
59
|
-
# If file is NOT under monitoring path - skip (not a valid usage)
|
60
|
-
file_attr_to_checksum = {} # This structure is used to optimize indexing when user specifies a directory was moved.
|
61
148
|
unless $local_content_data.empty?
|
62
149
|
Log.info("Start build data base from loaded file. This could take several minutes")
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
if Params['manual_file_changes']
|
68
|
-
file_attr_key = [File.basename(path), size, mod_time]
|
69
|
-
ident_file_info = file_attr_to_checksum[file_attr_key]
|
70
|
-
unless ident_file_info
|
71
|
-
# Add file checksum to map
|
72
|
-
file_attr_to_checksum[file_attr_key] = IdentFileInfo.new(checksum, index_time)
|
73
|
-
else
|
74
|
-
# File already in map. Need to mark as not unique
|
75
|
-
ident_file_info.unique = false # file will be skipped if found at new location
|
76
|
-
end
|
77
|
-
end
|
78
|
-
# construct sub paths array from full file path:
|
79
|
-
# Example:
|
80
|
-
# instance path = /dir1/dir2/file_name
|
81
|
-
# Sub path 1: /dir1
|
82
|
-
# Sub path 2: /dir1/dir2
|
83
|
-
# Sub path 3: /dir1/dir2/file_name
|
84
|
-
# sub paths would create DirStat objs or FileStat(FileStat create using last sub path).
|
85
|
-
split_path = path.split(File::SEPARATOR)
|
86
|
-
sub_paths = (0..split_path.size-1).inject([]) { |paths, i|
|
87
|
-
paths.push(File.join(*split_path.values_at(0..i)))
|
88
|
-
}
|
89
|
-
# sub_paths holds array => ["/dir1","/dir1/dir2","/dir1/dir2/file_name"]
|
90
|
-
|
91
|
-
# Loop over monitor paths to start build tree under each
|
92
|
-
dir_stat_array.each { | dir_stat|
|
93
|
-
# check if monitor path is one of the sub paths and find it's sub path index
|
94
|
-
# if index is found then it the monitor path
|
95
|
-
# the next index indicates the next sub path to insert to the tree
|
96
|
-
# the index will be raised at each recursive call down the tree
|
97
|
-
sub_paths_index = sub_paths.index(dir_stat[0].path)
|
98
|
-
next if sub_paths_index.nil? # monitor path was not found. skip this instance.
|
99
|
-
|
100
|
-
# monitor path was found. Add to tree
|
101
|
-
# start the recursive call with next sub path index
|
102
|
-
::FileMonitoring.stable_state = dir_stat[1]
|
103
|
-
inst_count += 1
|
104
|
-
dir_stat[0].load_instance(sub_paths, sub_paths_index+1, size, mod_time)
|
105
|
-
break
|
106
|
-
}
|
107
|
-
}
|
108
|
-
Log.info("End build data base from loaded file. loaded instances:#{inst_count}")
|
150
|
+
load_instances(file_attr_to_checksum, dir_stat_array)
|
151
|
+
load_symlinks(dir_stat_array)
|
152
|
+
Log.info("End build data base from loaded file")
|
109
153
|
$last_content_data_id = $local_content_data.unique_id
|
110
154
|
|
111
155
|
if Params['manual_file_changes']
|
@@ -137,6 +137,13 @@ module FileMonitoring
|
|
137
137
|
@@log = log
|
138
138
|
end
|
139
139
|
|
140
|
+
def write_to_log(msg)
|
141
|
+
if @@log
|
142
|
+
@@log.info(msg)
|
143
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
140
147
|
public_class_method :set_log
|
141
148
|
|
142
149
|
# Initializes new directory monitoring object
|
@@ -148,7 +155,7 @@ module FileMonitoring
|
|
148
155
|
@dirs = {}
|
149
156
|
@files = {}
|
150
157
|
@non_utf8_paths = {} # Hash: ["path" -> true|false]
|
151
|
-
|
158
|
+
@symlinks = {} # Hash: [[server, link file name]] -> " target file name"]
|
152
159
|
# indicates if path EXISTS in file system.
|
153
160
|
# If true, file will not be removed during removed_unmarked_paths phase.
|
154
161
|
@marked = false
|
@@ -169,7 +176,7 @@ module FileMonitoring
|
|
169
176
|
# size - the instance size to insert to the tree
|
170
177
|
# modification_time - the instance modification_time to insert to the tree
|
171
178
|
def load_instance(sub_paths, sub_paths_index, size, modification_time)
|
172
|
-
# initialize dirs and files.
|
179
|
+
# initialize dirs and files.
|
173
180
|
@dirs = {} unless @dirs
|
174
181
|
@files = {} unless @files
|
175
182
|
if sub_paths.size-1 == sub_paths_index
|
@@ -189,6 +196,40 @@ module FileMonitoring
|
|
189
196
|
end
|
190
197
|
end
|
191
198
|
|
199
|
+
# add symlink while initializing tree using content data from file.
|
200
|
+
# Assumption is that Tree already built
|
201
|
+
# Parameters:
|
202
|
+
# sub_paths - Array of sub paths of the symlink which is added to tree
|
203
|
+
# Example:
|
204
|
+
# instance path = /dir1/dir2/file_name
|
205
|
+
# Sub path 1: /dir1
|
206
|
+
# Sub path 2: /dir1/dir2
|
207
|
+
# Sub path 3: /dir1/dir2/file_name
|
208
|
+
# sub paths would create DirStat objs or FileStat(FileStat create using last sub path).
|
209
|
+
# sub_paths_index - the index indicates the next sub path to insert to the tree
|
210
|
+
# the index will be raised at each recursive call down the tree
|
211
|
+
# symlink_path - symlink file path
|
212
|
+
# symlink_target - the target path pointed by the symlink
|
213
|
+
def load_symlink(sub_paths, sub_paths_index, symlink_path, symlink_target)
|
214
|
+
# initialize dirs and files.
|
215
|
+
@dirs = {} unless @dirs
|
216
|
+
@files = {} unless @files
|
217
|
+
if sub_paths.size-1 == sub_paths_index
|
218
|
+
# index points to last entry - leaf case. Add the symlink.
|
219
|
+
@symlinks[symlink_path] = symlink_target
|
220
|
+
else
|
221
|
+
# Add Dir to tree if not present. index points to new dir path.
|
222
|
+
dir_stat = @dirs[sub_paths[sub_paths_index]]
|
223
|
+
#create new dir if not exist
|
224
|
+
unless dir_stat
|
225
|
+
dir_stat = DirStat.new(sub_paths[sub_paths_index])
|
226
|
+
add_dir(dir_stat)
|
227
|
+
end
|
228
|
+
# continue recursive call on tree with next sub path index
|
229
|
+
dir_stat.load_instance(sub_paths, sub_paths_index+1, symlink_path, symlink_target)
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
192
233
|
# Adds directory for monitoring.
|
193
234
|
def add_dir (dir)
|
194
235
|
@dirs[dir.path] = dir
|
@@ -223,7 +264,7 @@ module FileMonitoring
|
|
223
264
|
def to_s(indent = 0)
|
224
265
|
indent_increment = 2
|
225
266
|
child_indent = indent + indent_increment
|
226
|
-
res = super
|
267
|
+
res = super()
|
227
268
|
@files.each_value do |file|
|
228
269
|
res += "\n" + file.to_s(child_indent)
|
229
270
|
end if @files
|
@@ -245,10 +286,7 @@ module FileMonitoring
|
|
245
286
|
dir_stat.removed_unmarked_paths
|
246
287
|
else
|
247
288
|
# directory is not marked. Remove it, since it does not exist.
|
248
|
-
|
249
|
-
@@log.info("NON_EXISTING dir: " + dir_stat.path)
|
250
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
251
|
-
end
|
289
|
+
write_to_log("NON_EXISTING dir: " + dir_stat.path)
|
252
290
|
# remove file with changed checksum
|
253
291
|
$local_content_data_lock.synchronize{
|
254
292
|
$local_content_data.remove_directory(dir_stat.path, Params['local_server_name'])
|
@@ -265,10 +303,7 @@ module FileMonitoring
|
|
265
303
|
file_stat.marked = false # unset flag for next monitoring\index\remove phase
|
266
304
|
else
|
267
305
|
# file not marked meaning it is no longer exist. Remove.
|
268
|
-
|
269
|
-
@@log.info("NON_EXISTING file: " + file_stat.path)
|
270
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
271
|
-
end
|
306
|
+
write_to_log("NON_EXISTING file: " + file_stat.path)
|
272
307
|
# remove file with changed checksum
|
273
308
|
$local_content_data_lock.synchronize{
|
274
309
|
$local_content_data.remove_instance(Params['local_server_name'], file_stat.path)
|
@@ -279,14 +314,139 @@ module FileMonitoring
|
|
279
314
|
end
|
280
315
|
end
|
281
316
|
|
282
|
-
|
283
|
-
|
284
|
-
#
|
285
|
-
|
286
|
-
|
317
|
+
################ All monitoring helper methods #################
|
318
|
+
|
319
|
+
# Checks that the globed path is valid.
|
320
|
+
def is_globed_path_valid(globed_path)
|
321
|
+
# UTF-8 - keep only files with names in
|
322
|
+
return true if @non_utf8_paths[globed_path]
|
323
|
+
check_utf_8_encoding_file = globed_path.clone
|
324
|
+
unless check_utf_8_encoding_file.force_encoding("UTF-8").valid_encoding?
|
325
|
+
Log.warning("Non UTF-8 file name '#{check_utf_8_encoding_file}', skipping.")
|
326
|
+
@non_utf8_paths[globed_path] = true
|
327
|
+
# TODO(bbfsdev): Remove line below and redundant clones of string after
|
328
|
+
# those lines are not a GC problem.
|
329
|
+
check_utf_8_encoding_file = nil
|
330
|
+
return false
|
331
|
+
end
|
332
|
+
|
333
|
+
true
|
334
|
+
end
|
335
|
+
|
336
|
+
def handle_existing_file(child_stat, globed_path, globed_path_stat)
|
337
|
+
if child_stat.changed?(globed_path_stat)
|
338
|
+
# ---------- STATUS CHANGED
|
339
|
+
# Update changed status
|
340
|
+
child_stat.state = FileStatEnum::CHANGED
|
341
|
+
child_stat.cycles = 0
|
342
|
+
child_stat.size = globed_path_stat.size
|
343
|
+
child_stat.modification_time = globed_path_stat.mtime.to_i
|
344
|
+
write_to_log("CHANGED file: " + globed_path)
|
345
|
+
# remove file with changed checksum. File will be added once indexed
|
346
|
+
$local_content_data_lock.synchronize{
|
347
|
+
$local_content_data.remove_instance(Params['local_server_name'], globed_path)
|
348
|
+
}
|
349
|
+
else # case child_stat did not change
|
350
|
+
# ---------- SAME STATUS
|
351
|
+
# File status is the same
|
352
|
+
if child_stat.state != FileStatEnum::STABLE
|
353
|
+
child_stat.cycles += 1
|
354
|
+
if child_stat.cycles >= ::FileMonitoring.stable_state
|
355
|
+
child_stat.state = FileStatEnum::STABLE
|
356
|
+
write_to_log("STABLE file: " + globed_path)
|
357
|
+
else
|
358
|
+
child_stat.state = FileStatEnum::UNCHANGED
|
359
|
+
write_to_log("UNCHANGED file: " + globed_path)
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
# This method handles the case where we set the 'manual_file_changes' param meaning
|
366
|
+
# some files were moved/copied and no need to reindex them. In that case search "new files"
|
367
|
+
# in old files to get the checksum (skipp the index phase).
|
368
|
+
# The lookup is done via specially prepared file_attr_to_checksum map.
|
369
|
+
def handle_moved_file(globed_path, globed_path_stat, file_attr_to_checksum)
|
370
|
+
# --------------------- MANUAL MODE
|
371
|
+
# check if file name and attributes exist in global file attr map
|
372
|
+
file_attr_key = [File.basename(globed_path), globed_path_stat.size, globed_path_stat.mtime.to_i]
|
373
|
+
file_ident_info = file_attr_to_checksum[file_attr_key]
|
374
|
+
# If not found (real new file) or found but not unique then file needs indexing. skip in manual mode.
|
375
|
+
if file_ident_info && file_ident_info.unique
|
376
|
+
Log.debug1("update content data with file:%s checksum:%s index_time:%s",
|
377
|
+
File.basename(globed_path), file_ident_info.checksum, file_ident_info.index_time.to_s)
|
378
|
+
# update content data (no need to update Dir tree)
|
379
|
+
$local_content_data_lock.synchronize{
|
380
|
+
$local_content_data.add_instance(file_ident_info.checksum,
|
381
|
+
globed_path_stat.size,
|
382
|
+
Params['local_server_name'],
|
383
|
+
globed_path,
|
384
|
+
globed_path_stat.mtime.to_i,
|
385
|
+
file_ident_info.index_time)
|
386
|
+
}
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
def handle_new_file(child_stat, globed_path, globed_path_stat)
|
391
|
+
child_stat = FileStat.new(globed_path,
|
392
|
+
FileStatEnum::NEW,
|
393
|
+
globed_path_stat.size,
|
394
|
+
globed_path_stat.mtime.to_i)
|
395
|
+
write_to_log("NEW file: " + globed_path)
|
396
|
+
child_stat.marked = true
|
397
|
+
add_file(child_stat)
|
398
|
+
end
|
399
|
+
|
400
|
+
def handle_dir(globed_path, file_attr_to_checksum)
|
401
|
+
# ------------------------------ DIR -----------------------
|
402
|
+
child_stat = @dirs[globed_path]
|
403
|
+
unless child_stat
|
404
|
+
# ----------- ADD NEW DIR
|
405
|
+
child_stat = DirStat.new(globed_path)
|
406
|
+
add_dir(child_stat)
|
407
|
+
write_to_log("NEW dir: " + globed_path)
|
408
|
+
end
|
409
|
+
child_stat.marked = true
|
410
|
+
# recursive call for dirs
|
411
|
+
child_stat.monitor(file_attr_to_checksum)
|
412
|
+
end
|
413
|
+
|
414
|
+
def add_found_symlinks(globed_path, found_symlinks)
|
415
|
+
# if symlink - add to symlink temporary map and content data (even override).
|
416
|
+
# later all non existing symlinks will be removed from content data
|
417
|
+
pointed_file_name = File.readlink(globed_path)
|
418
|
+
found_symlinks[globed_path] = pointed_file_name
|
419
|
+
# add to content data
|
420
|
+
$local_content_data_lock.synchronize{
|
421
|
+
$local_content_data.add_symlink(Params['local_server_name'], globed_path, pointed_file_name)
|
422
|
+
}
|
423
|
+
end
|
424
|
+
|
425
|
+
def remove_not_found_symlinks(found_symlinks)
|
426
|
+
# check if any symlink was removed and update current symlinks map
|
427
|
+
symlinks_enum = @symlinks.each_key
|
428
|
+
loop {
|
429
|
+
symlink_key = symlinks_enum.next rescue break
|
430
|
+
unless found_symlinks.has_key?(symlink_key)
|
431
|
+
# symlink was removed. remove from content data
|
432
|
+
$local_content_data_lock.synchronize{
|
433
|
+
$local_content_data.remove_symlink(Params['local_server_name'], symlink_key)
|
434
|
+
}
|
435
|
+
end
|
436
|
+
}
|
437
|
+
@symlinks = found_symlinks
|
438
|
+
end
|
439
|
+
|
440
|
+
# Recursively, read files and dirs lists from file system (using Glob)
|
441
|
+
# - Adds new files\dirs.
|
442
|
+
# - Change state for existing files\dirs
|
443
|
+
# - Index stable files
|
444
|
+
# - Remove non existing files\dirs is handled in method: remove_unmarked_paths
|
445
|
+
# - Handles special case for param 'manual_file_changes' where files are moved and
|
446
|
+
# there is no need to index them
|
287
447
|
def monitor(file_attr_to_checksum=nil)
|
288
448
|
|
289
|
-
# Algorithm:
|
449
|
+
# Marking/Removing Algorithm:
|
290
450
|
# assume that current dir is present
|
291
451
|
# ls (glob) the dir path for child dirs and files
|
292
452
|
# if child file is not already present, add it as new, mark it and handle its state
|
@@ -298,19 +458,14 @@ module FileMonitoring
|
|
298
458
|
|
299
459
|
# ls (glob) the dir path for child dirs and files
|
300
460
|
globed_paths_enum = Dir.glob(@path + "/*").to_enum
|
461
|
+
|
462
|
+
found_symlinks = {} # Store found symlinks under dir
|
301
463
|
loop do
|
302
464
|
globed_path = globed_paths_enum.next rescue break
|
303
465
|
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
# UTF-8 - keep only files with names in
|
308
|
-
next if @non_utf8_paths[globed_path]
|
309
|
-
check_utf_8_encoding_file = globed_path.clone
|
310
|
-
unless check_utf_8_encoding_file.force_encoding("UTF-8").valid_encoding?
|
311
|
-
Log.warning("Non UTF-8 file name '#{check_utf_8_encoding_file}', skipping.")
|
312
|
-
@non_utf8_paths[globed_path]=true
|
313
|
-
check_utf_8_encoding_file=nil
|
466
|
+
next unless is_globed_path_valid(globed_path)
|
467
|
+
if File.symlink?(globed_path)
|
468
|
+
add_found_symlinks(globed_path, found_symlinks)
|
314
469
|
next
|
315
470
|
end
|
316
471
|
|
@@ -320,99 +475,27 @@ module FileMonitoring
|
|
320
475
|
# ----------------------------- FILE -----------------------
|
321
476
|
child_stat = @files[globed_path]
|
322
477
|
if child_stat
|
323
|
-
#
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
# ---------- STATUS CHANGED
|
329
|
-
# Update changed status
|
330
|
-
child_stat.state = FileStatEnum::CHANGED
|
331
|
-
child_stat.cycles = 0
|
332
|
-
child_stat.size = globed_path_stat.size
|
333
|
-
child_stat.modification_time = globed_path_stat.mtime.to_i
|
334
|
-
if @@log
|
335
|
-
@@log.info("CHANGED file: " + globed_path)
|
336
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
337
|
-
end
|
338
|
-
# remove file with changed checksum. File will be added once indexed
|
339
|
-
$local_content_data_lock.synchronize{
|
340
|
-
$local_content_data.remove_instance(Params['local_server_name'], globed_path)
|
341
|
-
}
|
342
|
-
else # case child_stat did not change
|
343
|
-
# ---------- SAME STATUS
|
344
|
-
# File status is the same
|
345
|
-
if child_stat.state != FileStatEnum::STABLE
|
346
|
-
child_stat.state = FileStatEnum::UNCHANGED
|
347
|
-
child_stat.cycles += 1
|
348
|
-
if child_stat.cycles >= ::FileMonitoring.stable_state
|
349
|
-
child_stat.state = FileStatEnum::STABLE
|
350
|
-
if @@log
|
351
|
-
@@log.info("STABLE file: " + globed_path)
|
352
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
353
|
-
end
|
354
|
-
else
|
355
|
-
if @@log
|
356
|
-
@@log.info("UNCHANGED file: " + globed_path)
|
357
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
358
|
-
end
|
359
|
-
end
|
360
|
-
end
|
361
|
-
end
|
362
|
-
else # case Params['manual_file_changes']
|
363
|
-
# --------- MANUAL MODE
|
364
|
-
child_stat.marked = true
|
365
|
-
end
|
478
|
+
# Mark that file exists (will not be deleted at end of monitoring)
|
479
|
+
child_stat.marked = true
|
480
|
+
# Handle existing file If we are not in manual mode.
|
481
|
+
# In manual mode do nothing
|
482
|
+
handle_existing_file(child_stat, globed_path, globed_path_stat) unless Params['manual_file_changes']
|
366
483
|
else
|
367
|
-
# ---------------------------- NEW FILE ----------
|
368
484
|
unless Params['manual_file_changes']
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
@@log.info("NEW file: " + globed_path)
|
375
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
376
|
-
end
|
377
|
-
child_stat.marked = true
|
378
|
-
add_file(child_stat)
|
379
|
-
else # case Params['manual_file_changes']
|
380
|
-
# --------------------- MANUAL MODE
|
381
|
-
# check if file name and attributes exist in global file attr map
|
382
|
-
file_attr_key = [File.basename(globed_path), globed_path_stat.size, globed_path_stat.mtime.to_i]
|
383
|
-
file_ident_info = file_attr_to_checksum[file_attr_key]
|
384
|
-
# If not found (real new file) or found but not unique then file needs indexing. skip in manual mode.
|
385
|
-
next unless (file_ident_info and file_ident_info.unique)
|
386
|
-
Log.debug1("update content data with file:%s checksum:%s index_time:%s",
|
387
|
-
File.basename(globed_path), file_ident_info.checksum, file_ident_info.index_time.to_s)
|
388
|
-
# update content data (no need to update Dir tree)
|
389
|
-
$local_content_data_lock.synchronize{
|
390
|
-
$local_content_data.add_instance(file_ident_info.checksum,
|
391
|
-
globed_path_stat.size,
|
392
|
-
Params['local_server_name'],
|
393
|
-
globed_path,
|
394
|
-
globed_path_stat.mtime.to_i,
|
395
|
-
file_ident_info.index_time)
|
396
|
-
}
|
485
|
+
# Handle regular case of new file.
|
486
|
+
handle_new_file(child_stat, globed_path, globed_path_stat)
|
487
|
+
else
|
488
|
+
# Only create new content data instance based on copied/moved filed.
|
489
|
+
handle_moved_file(globed_path, globed_path_stat, file_attr_to_checksum)
|
397
490
|
end
|
398
491
|
end
|
399
492
|
else
|
400
|
-
|
401
|
-
child_stat = @dirs[globed_path]
|
402
|
-
unless child_stat
|
403
|
-
# ----------- ADD NEW DIR
|
404
|
-
child_stat = DirStat.new(globed_path)
|
405
|
-
add_dir(child_stat)
|
406
|
-
if @@log
|
407
|
-
@@log.info("NEW dir: " + globed_path)
|
408
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
409
|
-
end
|
410
|
-
end
|
411
|
-
child_stat.marked = true
|
412
|
-
# recursive call for dirs
|
413
|
-
child_stat.monitor(file_attr_to_checksum)
|
493
|
+
handle_dir(globed_path, file_attr_to_checksum)
|
414
494
|
end
|
415
495
|
end
|
496
|
+
|
497
|
+
remove_not_found_symlinks(found_symlinks)
|
498
|
+
|
416
499
|
GC.start
|
417
500
|
end
|
418
501
|
|