content_server 1.5.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/bin/file_utils +118 -0
- data/lib/content_data/content_data.rb +114 -48
- data/lib/content_server/version.rb +1 -1
- data/lib/file_monitoring/file_monitoring.rb +94 -50
- data/lib/file_monitoring/monitor_path.rb +196 -113
- data/lib/file_utils/file_utils.rb +10 -49
- data/lib/networking/tcp.rb +4 -4
- data/spec/content_data/content_data_spec.rb +331 -0
- data/spec/content_data/validations_spec.rb +5 -0
- data/spec/content_server/content_server_spec.rb +5 -0
- data/spec/content_server/file_streamer_spec.rb +5 -0
- data/spec/file_copy/copy_spec.rb +5 -0
- data/spec/file_indexing/index_agent_spec.rb +5 -0
- data/spec/networking/tcp_spec.rb +5 -0
- data/spec/validations/index_validations_spec.rb +5 -0
- metadata +9 -89
- data/test/content_data/content_data_test.rb +0 -291
- data/test/file_generator/file_generator_spec.rb +0 -85
- data/test/file_monitoring/monitor_path_test.rb +0 -189
- data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000 +0 -1000
- data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000.0 +0 -1000
- data/test/file_monitoring/monitor_path_test/dir1000/test_file.1000.1 +0 -1000
- data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500 +0 -1500
- data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500.0 +0 -1500
- data/test/file_monitoring/monitor_path_test/dir1500/test_file.1500.1 +0 -1500
- data/test/file_monitoring/monitor_path_test/test_file.500 +0 -500
- data/test/file_monitoring/monitor_path_test/test_file.500.0 +0 -500
- data/test/file_monitoring/monitor_path_test/test_file.500.1 +0 -500
- data/test/file_utils/fileutil_mksymlink_test.rb +0 -134
- data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500 +0 -1500
- data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500.0 +0 -1500
- data/test/file_utils/fileutil_mksymlink_test/dir1000/dir1500/test_file.1500.1 +0 -1500
- data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000 +0 -1000
- data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000.0 +0 -1000
- data/test/file_utils/fileutil_mksymlink_test/dir1000/test_file.1000.1 +0 -1000
- data/test/file_utils/fileutil_mksymlink_test/test_file.500 +0 -500
- data/test/file_utils/fileutil_mksymlink_test/test_file.500.0 +0 -500
- data/test/file_utils/fileutil_mksymlink_test/test_file.500.1 +0 -500
- data/test/file_utils/time_modification_test.rb +0 -136
- data/test/params/params_spec.rb +0 -280
- data/test/params/params_test.rb +0 -43
- data/test/run_in_background/run_in_background_test.rb +0 -122
- data/test/run_in_background/test_app +0 -59
@@ -11,6 +11,93 @@ module FileMonitoring
|
|
11
11
|
# Manages file monitoring of number of file system locations
|
12
12
|
class FileMonitoring
|
13
13
|
|
14
|
+
def create_sub_paths(path)
|
15
|
+
sub_paths=[]
|
16
|
+
while path != '.' && path != '/'
|
17
|
+
sub_paths.push(path)
|
18
|
+
path = File.dirname(path)
|
19
|
+
end
|
20
|
+
sub_paths.reverse!
|
21
|
+
end
|
22
|
+
|
23
|
+
def load_instances(file_attr_to_checksum, dir_stat_array)
|
24
|
+
inst_count = 0
|
25
|
+
# If file is under monitoring path - Add to DirStat tree as stable with path,size,mod_time read from file
|
26
|
+
# If file is NOT under monitoring path - skip (not a valid usage)
|
27
|
+
$local_content_data.each_instance {
|
28
|
+
|checksum, size, _, mod_time, _, path, index_time|
|
29
|
+
|
30
|
+
if Params['manual_file_changes']
|
31
|
+
file_attr_key = [File.basename(path), size, mod_time]
|
32
|
+
ident_file_info = file_attr_to_checksum[file_attr_key]
|
33
|
+
unless ident_file_info
|
34
|
+
# Add file checksum to map
|
35
|
+
file_attr_to_checksum[file_attr_key] = IdentFileInfo.new(checksum, index_time)
|
36
|
+
else
|
37
|
+
# File already in map. Need to mark as not unique
|
38
|
+
ident_file_info.unique = false # file will be skipped if found at new location
|
39
|
+
end
|
40
|
+
end
|
41
|
+
# construct sub paths array from full file path:
|
42
|
+
# Example:
|
43
|
+
# instance path = /dir1/dir2/file_name
|
44
|
+
# sub_paths holds array => ["/dir1","/dir1/dir2","/dir1/dir2/file_name"]
|
45
|
+
# sub paths would create DirStat objs or FileStat(FileStat create using last sub path).
|
46
|
+
sub_paths = create_sub_paths(path)
|
47
|
+
|
48
|
+
# Loop over monitor paths to start build tree under each
|
49
|
+
dir_stat_array.each { | dir_stat|
|
50
|
+
# check if monitor path is one of the sub paths and find it's sub path index
|
51
|
+
# if index is found then it the monitor path
|
52
|
+
# the next index indicates the next sub path to insert to the tree
|
53
|
+
# the index will be raised at each recursive call down the tree
|
54
|
+
sub_paths_index = sub_paths.index(dir_stat[0].path)
|
55
|
+
if sub_paths_index
|
56
|
+
# monitor path was found. Add to tree
|
57
|
+
# start the recursive call with next sub path index
|
58
|
+
::FileMonitoring.stable_state = dir_stat[1]
|
59
|
+
inst_count += 1
|
60
|
+
dir_stat[0].load_instance(sub_paths, sub_paths_index+1, size, mod_time)
|
61
|
+
break
|
62
|
+
end
|
63
|
+
}
|
64
|
+
}
|
65
|
+
Log.info("loaded instances:#{inst_count}")
|
66
|
+
end
|
67
|
+
|
68
|
+
def load_symlinks(dir_stat_array)
|
69
|
+
# If symlink file is under monitoring path - Add to DirStat tree
|
70
|
+
# If file is NOT under monitoring path - skip (not a valid usage)
|
71
|
+
symlink_count = 0
|
72
|
+
$local_content_data.each_symlink {
|
73
|
+
|_, symlink_path, symlink_target|
|
74
|
+
|
75
|
+
# construct sub paths array from symlink path:
|
76
|
+
# Example:
|
77
|
+
# symlink path = /dir1/dir2/file_name
|
78
|
+
# sub_paths holds array => ["/dir1","/dir1/dir2","/dir1/dir2/file_name"]
|
79
|
+
# sub paths should match the paths of DirStat objs in the tree to reach the symlink location in Tree.
|
80
|
+
sub_paths = create_sub_paths(symlink_path)
|
81
|
+
|
82
|
+
# Loop over monitor paths to start enter tree
|
83
|
+
dir_stat_array.each { | dir_stat|
|
84
|
+
# check if monitor path is one of the sub paths and find it's sub path index
|
85
|
+
# if index is found then it the monitor path
|
86
|
+
# the next index indicates the next sub path to insert to the tree
|
87
|
+
# the index will be raised at each recursive call down the tree
|
88
|
+
sub_paths_index = sub_paths.index(dir_stat[0].path)
|
89
|
+
if sub_paths_index
|
90
|
+
# monitor path was found. Add to tree
|
91
|
+
# start the recursive call with next sub path index
|
92
|
+
::FileMonitoring.stable_state = dir_stat[1]
|
93
|
+
symlink_count += 1
|
94
|
+
dir_stat[0].load_symlink(sub_paths, sub_paths_index+1, symlink_path, symlink_target)
|
95
|
+
break
|
96
|
+
end
|
97
|
+
}
|
98
|
+
}
|
99
|
+
Log.info("loaded symlinks:#{symlink_count}")
|
100
|
+
end
|
14
101
|
# The main method. Loops on all paths, each time span and monitors them.
|
15
102
|
#
|
16
103
|
# =Algorithm:
|
@@ -45,7 +132,7 @@ module FileMonitoring
|
|
45
132
|
@log4r.outputters << file_outputter
|
46
133
|
::FileMonitoring::DirStat.set_log(@log4r)
|
47
134
|
|
48
|
-
|
135
|
+
conf_array = Params['monitoring_paths']
|
49
136
|
|
50
137
|
# create root dirs of monitoring
|
51
138
|
dir_stat_array = []
|
@@ -54,58 +141,15 @@ module FileMonitoring
|
|
54
141
|
dir_stat_array.push([dir_stat, elem['stable_state']])
|
55
142
|
}
|
56
143
|
|
144
|
+
# This structure is used to optimize indexing when user specifies a directory was moved.
|
145
|
+
file_attr_to_checksum = {}
|
146
|
+
|
57
147
|
#Look over loaded content data if not empty
|
58
|
-
# If file is under monitoring path - Add to DirStat tree as stable with path,size,mod_time read from file
|
59
|
-
# If file is NOT under monitoring path - skip (not a valid usage)
|
60
|
-
file_attr_to_checksum = {} # This structure is used to optimize indexing when user specifies a directory was moved.
|
61
148
|
unless $local_content_data.empty?
|
62
149
|
Log.info("Start build data base from loaded file. This could take several minutes")
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
if Params['manual_file_changes']
|
68
|
-
file_attr_key = [File.basename(path), size, mod_time]
|
69
|
-
ident_file_info = file_attr_to_checksum[file_attr_key]
|
70
|
-
unless ident_file_info
|
71
|
-
# Add file checksum to map
|
72
|
-
file_attr_to_checksum[file_attr_key] = IdentFileInfo.new(checksum, index_time)
|
73
|
-
else
|
74
|
-
# File already in map. Need to mark as not unique
|
75
|
-
ident_file_info.unique = false # file will be skipped if found at new location
|
76
|
-
end
|
77
|
-
end
|
78
|
-
# construct sub paths array from full file path:
|
79
|
-
# Example:
|
80
|
-
# instance path = /dir1/dir2/file_name
|
81
|
-
# Sub path 1: /dir1
|
82
|
-
# Sub path 2: /dir1/dir2
|
83
|
-
# Sub path 3: /dir1/dir2/file_name
|
84
|
-
# sub paths would create DirStat objs or FileStat(FileStat create using last sub path).
|
85
|
-
split_path = path.split(File::SEPARATOR)
|
86
|
-
sub_paths = (0..split_path.size-1).inject([]) { |paths, i|
|
87
|
-
paths.push(File.join(*split_path.values_at(0..i)))
|
88
|
-
}
|
89
|
-
# sub_paths holds array => ["/dir1","/dir1/dir2","/dir1/dir2/file_name"]
|
90
|
-
|
91
|
-
# Loop over monitor paths to start build tree under each
|
92
|
-
dir_stat_array.each { | dir_stat|
|
93
|
-
# check if monitor path is one of the sub paths and find it's sub path index
|
94
|
-
# if index is found then it the monitor path
|
95
|
-
# the next index indicates the next sub path to insert to the tree
|
96
|
-
# the index will be raised at each recursive call down the tree
|
97
|
-
sub_paths_index = sub_paths.index(dir_stat[0].path)
|
98
|
-
next if sub_paths_index.nil? # monitor path was not found. skip this instance.
|
99
|
-
|
100
|
-
# monitor path was found. Add to tree
|
101
|
-
# start the recursive call with next sub path index
|
102
|
-
::FileMonitoring.stable_state = dir_stat[1]
|
103
|
-
inst_count += 1
|
104
|
-
dir_stat[0].load_instance(sub_paths, sub_paths_index+1, size, mod_time)
|
105
|
-
break
|
106
|
-
}
|
107
|
-
}
|
108
|
-
Log.info("End build data base from loaded file. loaded instances:#{inst_count}")
|
150
|
+
load_instances(file_attr_to_checksum, dir_stat_array)
|
151
|
+
load_symlinks(dir_stat_array)
|
152
|
+
Log.info("End build data base from loaded file")
|
109
153
|
$last_content_data_id = $local_content_data.unique_id
|
110
154
|
|
111
155
|
if Params['manual_file_changes']
|
@@ -137,6 +137,13 @@ module FileMonitoring
|
|
137
137
|
@@log = log
|
138
138
|
end
|
139
139
|
|
140
|
+
def write_to_log(msg)
|
141
|
+
if @@log
|
142
|
+
@@log.info(msg)
|
143
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
140
147
|
public_class_method :set_log
|
141
148
|
|
142
149
|
# Initializes new directory monitoring object
|
@@ -148,7 +155,7 @@ module FileMonitoring
|
|
148
155
|
@dirs = {}
|
149
156
|
@files = {}
|
150
157
|
@non_utf8_paths = {} # Hash: ["path" -> true|false]
|
151
|
-
|
158
|
+
@symlinks = {} # Hash: [[server, link file name]] -> " target file name"]
|
152
159
|
# indicates if path EXISTS in file system.
|
153
160
|
# If true, file will not be removed during removed_unmarked_paths phase.
|
154
161
|
@marked = false
|
@@ -169,7 +176,7 @@ module FileMonitoring
|
|
169
176
|
# size - the instance size to insert to the tree
|
170
177
|
# modification_time - the instance modification_time to insert to the tree
|
171
178
|
def load_instance(sub_paths, sub_paths_index, size, modification_time)
|
172
|
-
# initialize dirs and files.
|
179
|
+
# initialize dirs and files.
|
173
180
|
@dirs = {} unless @dirs
|
174
181
|
@files = {} unless @files
|
175
182
|
if sub_paths.size-1 == sub_paths_index
|
@@ -189,6 +196,40 @@ module FileMonitoring
|
|
189
196
|
end
|
190
197
|
end
|
191
198
|
|
199
|
+
# add symlink while initializing tree using content data from file.
|
200
|
+
# Assumption is that Tree already built
|
201
|
+
# Parameters:
|
202
|
+
# sub_paths - Array of sub paths of the symlink which is added to tree
|
203
|
+
# Example:
|
204
|
+
# instance path = /dir1/dir2/file_name
|
205
|
+
# Sub path 1: /dir1
|
206
|
+
# Sub path 2: /dir1/dir2
|
207
|
+
# Sub path 3: /dir1/dir2/file_name
|
208
|
+
# sub paths would create DirStat objs or FileStat(FileStat create using last sub path).
|
209
|
+
# sub_paths_index - the index indicates the next sub path to insert to the tree
|
210
|
+
# the index will be raised at each recursive call down the tree
|
211
|
+
# symlink_path - symlink file path
|
212
|
+
# symlink_target - the target path pointed by the symlink
|
213
|
+
def load_symlink(sub_paths, sub_paths_index, symlink_path, symlink_target)
|
214
|
+
# initialize dirs and files.
|
215
|
+
@dirs = {} unless @dirs
|
216
|
+
@files = {} unless @files
|
217
|
+
if sub_paths.size-1 == sub_paths_index
|
218
|
+
# index points to last entry - leaf case. Add the symlink.
|
219
|
+
@symlinks[symlink_path] = symlink_target
|
220
|
+
else
|
221
|
+
# Add Dir to tree if not present. index points to new dir path.
|
222
|
+
dir_stat = @dirs[sub_paths[sub_paths_index]]
|
223
|
+
#create new dir if not exist
|
224
|
+
unless dir_stat
|
225
|
+
dir_stat = DirStat.new(sub_paths[sub_paths_index])
|
226
|
+
add_dir(dir_stat)
|
227
|
+
end
|
228
|
+
# continue recursive call on tree with next sub path index
|
229
|
+
dir_stat.load_instance(sub_paths, sub_paths_index+1, symlink_path, symlink_target)
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
192
233
|
# Adds directory for monitoring.
|
193
234
|
def add_dir (dir)
|
194
235
|
@dirs[dir.path] = dir
|
@@ -223,7 +264,7 @@ module FileMonitoring
|
|
223
264
|
def to_s(indent = 0)
|
224
265
|
indent_increment = 2
|
225
266
|
child_indent = indent + indent_increment
|
226
|
-
res = super
|
267
|
+
res = super()
|
227
268
|
@files.each_value do |file|
|
228
269
|
res += "\n" + file.to_s(child_indent)
|
229
270
|
end if @files
|
@@ -245,10 +286,7 @@ module FileMonitoring
|
|
245
286
|
dir_stat.removed_unmarked_paths
|
246
287
|
else
|
247
288
|
# directory is not marked. Remove it, since it does not exist.
|
248
|
-
|
249
|
-
@@log.info("NON_EXISTING dir: " + dir_stat.path)
|
250
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
251
|
-
end
|
289
|
+
write_to_log("NON_EXISTING dir: " + dir_stat.path)
|
252
290
|
# remove file with changed checksum
|
253
291
|
$local_content_data_lock.synchronize{
|
254
292
|
$local_content_data.remove_directory(dir_stat.path, Params['local_server_name'])
|
@@ -265,10 +303,7 @@ module FileMonitoring
|
|
265
303
|
file_stat.marked = false # unset flag for next monitoring\index\remove phase
|
266
304
|
else
|
267
305
|
# file not marked meaning it is no longer exist. Remove.
|
268
|
-
|
269
|
-
@@log.info("NON_EXISTING file: " + file_stat.path)
|
270
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
271
|
-
end
|
306
|
+
write_to_log("NON_EXISTING file: " + file_stat.path)
|
272
307
|
# remove file with changed checksum
|
273
308
|
$local_content_data_lock.synchronize{
|
274
309
|
$local_content_data.remove_instance(Params['local_server_name'], file_stat.path)
|
@@ -279,14 +314,139 @@ module FileMonitoring
|
|
279
314
|
end
|
280
315
|
end
|
281
316
|
|
282
|
-
|
283
|
-
|
284
|
-
#
|
285
|
-
|
286
|
-
|
317
|
+
################ All monitoring helper methods #################
|
318
|
+
|
319
|
+
# Checks that the globed path is valid.
|
320
|
+
def is_globed_path_valid(globed_path)
|
321
|
+
# UTF-8 - keep only files with names in
|
322
|
+
return true if @non_utf8_paths[globed_path]
|
323
|
+
check_utf_8_encoding_file = globed_path.clone
|
324
|
+
unless check_utf_8_encoding_file.force_encoding("UTF-8").valid_encoding?
|
325
|
+
Log.warning("Non UTF-8 file name '#{check_utf_8_encoding_file}', skipping.")
|
326
|
+
@non_utf8_paths[globed_path] = true
|
327
|
+
# TODO(bbfsdev): Remove line below and redundant clones of string after
|
328
|
+
# those lines are not a GC problem.
|
329
|
+
check_utf_8_encoding_file = nil
|
330
|
+
return false
|
331
|
+
end
|
332
|
+
|
333
|
+
true
|
334
|
+
end
|
335
|
+
|
336
|
+
def handle_existing_file(child_stat, globed_path, globed_path_stat)
|
337
|
+
if child_stat.changed?(globed_path_stat)
|
338
|
+
# ---------- STATUS CHANGED
|
339
|
+
# Update changed status
|
340
|
+
child_stat.state = FileStatEnum::CHANGED
|
341
|
+
child_stat.cycles = 0
|
342
|
+
child_stat.size = globed_path_stat.size
|
343
|
+
child_stat.modification_time = globed_path_stat.mtime.to_i
|
344
|
+
write_to_log("CHANGED file: " + globed_path)
|
345
|
+
# remove file with changed checksum. File will be added once indexed
|
346
|
+
$local_content_data_lock.synchronize{
|
347
|
+
$local_content_data.remove_instance(Params['local_server_name'], globed_path)
|
348
|
+
}
|
349
|
+
else # case child_stat did not change
|
350
|
+
# ---------- SAME STATUS
|
351
|
+
# File status is the same
|
352
|
+
if child_stat.state != FileStatEnum::STABLE
|
353
|
+
child_stat.cycles += 1
|
354
|
+
if child_stat.cycles >= ::FileMonitoring.stable_state
|
355
|
+
child_stat.state = FileStatEnum::STABLE
|
356
|
+
write_to_log("STABLE file: " + globed_path)
|
357
|
+
else
|
358
|
+
child_stat.state = FileStatEnum::UNCHANGED
|
359
|
+
write_to_log("UNCHANGED file: " + globed_path)
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
# This method handles the case where we set the 'manual_file_changes' param meaning
|
366
|
+
# some files were moved/copied and no need to reindex them. In that case search "new files"
|
367
|
+
# in old files to get the checksum (skipp the index phase).
|
368
|
+
# The lookup is done via specially prepared file_attr_to_checksum map.
|
369
|
+
def handle_moved_file(globed_path, globed_path_stat, file_attr_to_checksum)
|
370
|
+
# --------------------- MANUAL MODE
|
371
|
+
# check if file name and attributes exist in global file attr map
|
372
|
+
file_attr_key = [File.basename(globed_path), globed_path_stat.size, globed_path_stat.mtime.to_i]
|
373
|
+
file_ident_info = file_attr_to_checksum[file_attr_key]
|
374
|
+
# If not found (real new file) or found but not unique then file needs indexing. skip in manual mode.
|
375
|
+
if file_ident_info && file_ident_info.unique
|
376
|
+
Log.debug1("update content data with file:%s checksum:%s index_time:%s",
|
377
|
+
File.basename(globed_path), file_ident_info.checksum, file_ident_info.index_time.to_s)
|
378
|
+
# update content data (no need to update Dir tree)
|
379
|
+
$local_content_data_lock.synchronize{
|
380
|
+
$local_content_data.add_instance(file_ident_info.checksum,
|
381
|
+
globed_path_stat.size,
|
382
|
+
Params['local_server_name'],
|
383
|
+
globed_path,
|
384
|
+
globed_path_stat.mtime.to_i,
|
385
|
+
file_ident_info.index_time)
|
386
|
+
}
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
def handle_new_file(child_stat, globed_path, globed_path_stat)
|
391
|
+
child_stat = FileStat.new(globed_path,
|
392
|
+
FileStatEnum::NEW,
|
393
|
+
globed_path_stat.size,
|
394
|
+
globed_path_stat.mtime.to_i)
|
395
|
+
write_to_log("NEW file: " + globed_path)
|
396
|
+
child_stat.marked = true
|
397
|
+
add_file(child_stat)
|
398
|
+
end
|
399
|
+
|
400
|
+
def handle_dir(globed_path, file_attr_to_checksum)
|
401
|
+
# ------------------------------ DIR -----------------------
|
402
|
+
child_stat = @dirs[globed_path]
|
403
|
+
unless child_stat
|
404
|
+
# ----------- ADD NEW DIR
|
405
|
+
child_stat = DirStat.new(globed_path)
|
406
|
+
add_dir(child_stat)
|
407
|
+
write_to_log("NEW dir: " + globed_path)
|
408
|
+
end
|
409
|
+
child_stat.marked = true
|
410
|
+
# recursive call for dirs
|
411
|
+
child_stat.monitor(file_attr_to_checksum)
|
412
|
+
end
|
413
|
+
|
414
|
+
def add_found_symlinks(globed_path, found_symlinks)
|
415
|
+
# if symlink - add to symlink temporary map and content data (even override).
|
416
|
+
# later all non existing symlinks will be removed from content data
|
417
|
+
pointed_file_name = File.readlink(globed_path)
|
418
|
+
found_symlinks[globed_path] = pointed_file_name
|
419
|
+
# add to content data
|
420
|
+
$local_content_data_lock.synchronize{
|
421
|
+
$local_content_data.add_symlink(Params['local_server_name'], globed_path, pointed_file_name)
|
422
|
+
}
|
423
|
+
end
|
424
|
+
|
425
|
+
def remove_not_found_symlinks(found_symlinks)
|
426
|
+
# check if any symlink was removed and update current symlinks map
|
427
|
+
symlinks_enum = @symlinks.each_key
|
428
|
+
loop {
|
429
|
+
symlink_key = symlinks_enum.next rescue break
|
430
|
+
unless found_symlinks.has_key?(symlink_key)
|
431
|
+
# symlink was removed. remove from content data
|
432
|
+
$local_content_data_lock.synchronize{
|
433
|
+
$local_content_data.remove_symlink(Params['local_server_name'], symlink_key)
|
434
|
+
}
|
435
|
+
end
|
436
|
+
}
|
437
|
+
@symlinks = found_symlinks
|
438
|
+
end
|
439
|
+
|
440
|
+
# Recursively, read files and dirs lists from file system (using Glob)
|
441
|
+
# - Adds new files\dirs.
|
442
|
+
# - Change state for existing files\dirs
|
443
|
+
# - Index stable files
|
444
|
+
# - Remove non existing files\dirs is handled in method: remove_unmarked_paths
|
445
|
+
# - Handles special case for param 'manual_file_changes' where files are moved and
|
446
|
+
# there is no need to index them
|
287
447
|
def monitor(file_attr_to_checksum=nil)
|
288
448
|
|
289
|
-
# Algorithm:
|
449
|
+
# Marking/Removing Algorithm:
|
290
450
|
# assume that current dir is present
|
291
451
|
# ls (glob) the dir path for child dirs and files
|
292
452
|
# if child file is not already present, add it as new, mark it and handle its state
|
@@ -298,19 +458,14 @@ module FileMonitoring
|
|
298
458
|
|
299
459
|
# ls (glob) the dir path for child dirs and files
|
300
460
|
globed_paths_enum = Dir.glob(@path + "/*").to_enum
|
461
|
+
|
462
|
+
found_symlinks = {} # Store found symlinks under dir
|
301
463
|
loop do
|
302
464
|
globed_path = globed_paths_enum.next rescue break
|
303
465
|
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
# UTF-8 - keep only files with names in
|
308
|
-
next if @non_utf8_paths[globed_path]
|
309
|
-
check_utf_8_encoding_file = globed_path.clone
|
310
|
-
unless check_utf_8_encoding_file.force_encoding("UTF-8").valid_encoding?
|
311
|
-
Log.warning("Non UTF-8 file name '#{check_utf_8_encoding_file}', skipping.")
|
312
|
-
@non_utf8_paths[globed_path]=true
|
313
|
-
check_utf_8_encoding_file=nil
|
466
|
+
next unless is_globed_path_valid(globed_path)
|
467
|
+
if File.symlink?(globed_path)
|
468
|
+
add_found_symlinks(globed_path, found_symlinks)
|
314
469
|
next
|
315
470
|
end
|
316
471
|
|
@@ -320,99 +475,27 @@ module FileMonitoring
|
|
320
475
|
# ----------------------------- FILE -----------------------
|
321
476
|
child_stat = @files[globed_path]
|
322
477
|
if child_stat
|
323
|
-
#
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
# ---------- STATUS CHANGED
|
329
|
-
# Update changed status
|
330
|
-
child_stat.state = FileStatEnum::CHANGED
|
331
|
-
child_stat.cycles = 0
|
332
|
-
child_stat.size = globed_path_stat.size
|
333
|
-
child_stat.modification_time = globed_path_stat.mtime.to_i
|
334
|
-
if @@log
|
335
|
-
@@log.info("CHANGED file: " + globed_path)
|
336
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
337
|
-
end
|
338
|
-
# remove file with changed checksum. File will be added once indexed
|
339
|
-
$local_content_data_lock.synchronize{
|
340
|
-
$local_content_data.remove_instance(Params['local_server_name'], globed_path)
|
341
|
-
}
|
342
|
-
else # case child_stat did not change
|
343
|
-
# ---------- SAME STATUS
|
344
|
-
# File status is the same
|
345
|
-
if child_stat.state != FileStatEnum::STABLE
|
346
|
-
child_stat.state = FileStatEnum::UNCHANGED
|
347
|
-
child_stat.cycles += 1
|
348
|
-
if child_stat.cycles >= ::FileMonitoring.stable_state
|
349
|
-
child_stat.state = FileStatEnum::STABLE
|
350
|
-
if @@log
|
351
|
-
@@log.info("STABLE file: " + globed_path)
|
352
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
353
|
-
end
|
354
|
-
else
|
355
|
-
if @@log
|
356
|
-
@@log.info("UNCHANGED file: " + globed_path)
|
357
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
358
|
-
end
|
359
|
-
end
|
360
|
-
end
|
361
|
-
end
|
362
|
-
else # case Params['manual_file_changes']
|
363
|
-
# --------- MANUAL MODE
|
364
|
-
child_stat.marked = true
|
365
|
-
end
|
478
|
+
# Mark that file exists (will not be deleted at end of monitoring)
|
479
|
+
child_stat.marked = true
|
480
|
+
# Handle existing file If we are not in manual mode.
|
481
|
+
# In manual mode do nothing
|
482
|
+
handle_existing_file(child_stat, globed_path, globed_path_stat) unless Params['manual_file_changes']
|
366
483
|
else
|
367
|
-
# ---------------------------- NEW FILE ----------
|
368
484
|
unless Params['manual_file_changes']
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
@@log.info("NEW file: " + globed_path)
|
375
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
376
|
-
end
|
377
|
-
child_stat.marked = true
|
378
|
-
add_file(child_stat)
|
379
|
-
else # case Params['manual_file_changes']
|
380
|
-
# --------------------- MANUAL MODE
|
381
|
-
# check if file name and attributes exist in global file attr map
|
382
|
-
file_attr_key = [File.basename(globed_path), globed_path_stat.size, globed_path_stat.mtime.to_i]
|
383
|
-
file_ident_info = file_attr_to_checksum[file_attr_key]
|
384
|
-
# If not found (real new file) or found but not unique then file needs indexing. skip in manual mode.
|
385
|
-
next unless (file_ident_info and file_ident_info.unique)
|
386
|
-
Log.debug1("update content data with file:%s checksum:%s index_time:%s",
|
387
|
-
File.basename(globed_path), file_ident_info.checksum, file_ident_info.index_time.to_s)
|
388
|
-
# update content data (no need to update Dir tree)
|
389
|
-
$local_content_data_lock.synchronize{
|
390
|
-
$local_content_data.add_instance(file_ident_info.checksum,
|
391
|
-
globed_path_stat.size,
|
392
|
-
Params['local_server_name'],
|
393
|
-
globed_path,
|
394
|
-
globed_path_stat.mtime.to_i,
|
395
|
-
file_ident_info.index_time)
|
396
|
-
}
|
485
|
+
# Handle regular case of new file.
|
486
|
+
handle_new_file(child_stat, globed_path, globed_path_stat)
|
487
|
+
else
|
488
|
+
# Only create new content data instance based on copied/moved filed.
|
489
|
+
handle_moved_file(globed_path, globed_path_stat, file_attr_to_checksum)
|
397
490
|
end
|
398
491
|
end
|
399
492
|
else
|
400
|
-
|
401
|
-
child_stat = @dirs[globed_path]
|
402
|
-
unless child_stat
|
403
|
-
# ----------- ADD NEW DIR
|
404
|
-
child_stat = DirStat.new(globed_path)
|
405
|
-
add_dir(child_stat)
|
406
|
-
if @@log
|
407
|
-
@@log.info("NEW dir: " + globed_path)
|
408
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
409
|
-
end
|
410
|
-
end
|
411
|
-
child_stat.marked = true
|
412
|
-
# recursive call for dirs
|
413
|
-
child_stat.monitor(file_attr_to_checksum)
|
493
|
+
handle_dir(globed_path, file_attr_to_checksum)
|
414
494
|
end
|
415
495
|
end
|
496
|
+
|
497
|
+
remove_not_found_symlinks(found_symlinks)
|
498
|
+
|
416
499
|
GC.start
|
417
500
|
end
|
418
501
|
|