content_server 1.4.1 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/content_data/content_data.rb +32 -26
- data/lib/content_server/version.rb +1 -1
- data/lib/file_monitoring/file_monitoring.rb +79 -29
- data/lib/file_monitoring/monitor_path.rb +111 -52
- data/lib/file_monitoring.rb +2 -0
- data/test/params/params_spec.rb +2 -2
- metadata +2 -2
@@ -23,7 +23,7 @@ module ContentData
|
|
23
23
|
# unify time, add/remove instance, queries, merge, remove directory and more.
|
24
24
|
# Content info data structure:
|
25
25
|
# @contents_info = { Checksum -> [size, *instances*, content_modification_time] }
|
26
|
-
# *instances* = {[server,path] -> instance_modification_time }
|
26
|
+
# *instances* = {[server,path] -> [instance_modification_time,index_time] }
|
27
27
|
# Notes:
|
28
28
|
# 1. content_modification_time is the instance_modification_time of the first
|
29
29
|
# instances which was added to @contents_info
|
@@ -70,8 +70,11 @@ module ContentData
|
|
70
70
|
instances_db_enum = instances_db.each_key
|
71
71
|
loop {
|
72
72
|
location = instances_db_enum.next rescue break
|
73
|
-
|
74
|
-
|
73
|
+
inst_mod_times = instances_db[location]
|
74
|
+
# we use deep clone for location since map key is using shallow clone.
|
75
|
+
# we dont want references between new content data
|
76
|
+
# and orig object. This will help the GC dispose the orig object if not used any more.
|
77
|
+
instances_db_cloned[[location[0].clone,location[1].clone]] = inst_mod_times.clone
|
75
78
|
}
|
76
79
|
clone_contents_info[checksum] = [size,
|
77
80
|
instances_db_cloned,
|
@@ -105,9 +108,9 @@ module ContentData
|
|
105
108
|
location = content_info_enum.next rescue break
|
106
109
|
# provide the block with: checksum, size, content modification time,instance modification time,
|
107
110
|
# server and path.
|
108
|
-
|
109
|
-
block.call(checksum,content_info[0], content_info[2],
|
110
|
-
location[0], location[1])
|
111
|
+
inst_mod_time, inst_index_time = content_info[1][location]
|
112
|
+
block.call(checksum,content_info[0], content_info[2], inst_mod_time,
|
113
|
+
location[0], location[1], inst_index_time)
|
111
114
|
}
|
112
115
|
}
|
113
116
|
end
|
@@ -122,8 +125,8 @@ module ContentData
|
|
122
125
|
location = instances_db_enum.next rescue break
|
123
126
|
# provide the block with: checksum, size, content modification time,instance modification time,
|
124
127
|
# server and path.
|
125
|
-
|
126
|
-
block.call(checksum,content_info[0], content_info[2],
|
128
|
+
inst_mod_time,_ = content_info[1][location]
|
129
|
+
block.call(checksum,content_info[0], content_info[2], inst_mod_time,
|
127
130
|
location[0], location[1])
|
128
131
|
}
|
129
132
|
end
|
@@ -146,10 +149,11 @@ module ContentData
|
|
146
149
|
content_info = @contents_info[checksum]
|
147
150
|
return nil if content_info.nil?
|
148
151
|
instances = content_info[1]
|
149
|
-
instance_time = instances[location]
|
152
|
+
instance_time,_ = instances[location]
|
153
|
+
instance_time
|
150
154
|
end
|
151
155
|
|
152
|
-
def add_instance(checksum, size, server, path, modification_time)
|
156
|
+
def add_instance(checksum, size, server, path, modification_time, index_time=Time.now.to_i)
|
153
157
|
location = [server, path]
|
154
158
|
|
155
159
|
# file was changed but remove_instance was not called
|
@@ -161,7 +165,7 @@ module ContentData
|
|
161
165
|
content_info = @contents_info[checksum]
|
162
166
|
if content_info.nil?
|
163
167
|
@contents_info[checksum] = [size,
|
164
|
-
{location => modification_time},
|
168
|
+
{location => [modification_time,index_time]},
|
165
169
|
modification_time]
|
166
170
|
else
|
167
171
|
if size != content_info[0]
|
@@ -172,7 +176,7 @@ module ContentData
|
|
172
176
|
#override file if needed
|
173
177
|
content_info[0] = size
|
174
178
|
instances = content_info[1]
|
175
|
-
instances[location] = modification_time
|
179
|
+
instances[location] = [modification_time, index_time]
|
176
180
|
end
|
177
181
|
@instances_info[location] = checksum
|
178
182
|
end
|
@@ -236,7 +240,7 @@ module ContentData
|
|
236
240
|
local_instances = local_content_info[1]
|
237
241
|
return false if other.checksum_instances_size(checksum) != local_instances.length
|
238
242
|
location = [server, path]
|
239
|
-
local_instance_mod_time = local_instances[location]
|
243
|
+
local_instance_mod_time, _ = local_instances[location]
|
240
244
|
return false if local_instance_mod_time.nil?
|
241
245
|
return false if local_instance_mod_time != instance_mod_time
|
242
246
|
}
|
@@ -318,8 +322,9 @@ module ContentData
|
|
318
322
|
location = instances_db_enum.next rescue break
|
319
323
|
# provide the block with: checksum, size, content modification time,instance modification time,
|
320
324
|
# server and path.
|
321
|
-
instance_modification_time = content_info[1][location]
|
322
|
-
file.write("#{checksum},#{content_info[0]},#{location[0]},#{location[1]}
|
325
|
+
instance_modification_time,instance_index_time = content_info[1][location]
|
326
|
+
file.write("#{checksum},#{content_info[0]},#{location[0]},#{location[1]}," +
|
327
|
+
"#{instance_modification_time},#{instance_index_time}\n")
|
323
328
|
}
|
324
329
|
chunk_counter += 1
|
325
330
|
break if chunk_counter == chunk_size
|
@@ -397,20 +402,21 @@ module ContentData
|
|
397
402
|
return reset_load_from_file(filename, file, "Expected to read Instance line but reached EOF") unless instance_line
|
398
403
|
parameters = instance_line.split(',')
|
399
404
|
# bugfix: if file name consist a comma then parsing based on comma separating fails
|
400
|
-
if (parameters.size >
|
401
|
-
(4..parameters.size-
|
405
|
+
if (parameters.size > 6)
|
406
|
+
(4..parameters.size-3).each do |i|
|
402
407
|
parameters[3] = [parameters[3], parameters[i]].join(",")
|
403
408
|
end
|
404
|
-
(4..parameters.size-
|
409
|
+
(4..parameters.size-3).each do |i|
|
405
410
|
parameters.delete_at(4)
|
406
411
|
end
|
407
412
|
end
|
408
413
|
|
409
|
-
add_instance(parameters[0],
|
410
|
-
parameters[1].to_i,
|
411
|
-
parameters[2],
|
412
|
-
parameters[3],
|
413
|
-
parameters[4].to_i
|
414
|
+
add_instance(parameters[0], #checksum
|
415
|
+
parameters[1].to_i, # size
|
416
|
+
parameters[2], # server
|
417
|
+
parameters[3], # path
|
418
|
+
parameters[4].to_i, # mod time
|
419
|
+
parameters[5].to_i) # index time
|
414
420
|
chunk_index += 1
|
415
421
|
end
|
416
422
|
true
|
@@ -436,7 +442,7 @@ module ContentData
|
|
436
442
|
instances_enum = instances.each_key
|
437
443
|
loop {
|
438
444
|
location = instances_enum.next rescue break
|
439
|
-
instance_mod_time = instances[location]
|
445
|
+
instance_mod_time = instances[location][0]
|
440
446
|
if instance_mod_time < min_time_per_checksum
|
441
447
|
min_time_per_checksum = instance_mod_time
|
442
448
|
end
|
@@ -445,7 +451,7 @@ module ContentData
|
|
445
451
|
instances_enum = instances.each_key
|
446
452
|
loop {
|
447
453
|
location = instances_enum.next rescue break
|
448
|
-
instances[location] = min_time_per_checksum
|
454
|
+
instances[location][0] = min_time_per_checksum
|
449
455
|
}
|
450
456
|
# update content time with min time
|
451
457
|
content_info[2] = min_time_per_checksum
|
@@ -496,7 +502,7 @@ module ContentData
|
|
496
502
|
instances_enum = instances[1].each_key
|
497
503
|
loop {
|
498
504
|
unique_path = instances_enum.next rescue break
|
499
|
-
instance_mtime = instances[1][unique_path]
|
505
|
+
instance_mtime = instances[1][unique_path][0]
|
500
506
|
instance_info = [checksum, content_mtime, content_size, instance_mtime]
|
501
507
|
instance_info.concat(unique_path)
|
502
508
|
unless check_instance(instance_info)
|
@@ -23,7 +23,29 @@ module FileMonitoring
|
|
23
23
|
# This methods controlled by <tt>monitoring_paths</tt> configuration parameter,
|
24
24
|
# that provides path and file monitoring configuration data
|
25
25
|
def monitor_files
|
26
|
-
|
26
|
+
|
27
|
+
#init log4r
|
28
|
+
monitoring_log_path = Params['default_monitoring_log_path']
|
29
|
+
Log.debug1 'File monitoring log: ' + Params['default_monitoring_log_path']
|
30
|
+
monitoring_log_dir = File.dirname(monitoring_log_path)
|
31
|
+
FileUtils.mkdir_p(monitoring_log_dir) unless File.exists?(monitoring_log_dir)
|
32
|
+
|
33
|
+
@log4r = Log4r::Logger.new 'BBFS monitoring log'
|
34
|
+
@log4r.trace = true
|
35
|
+
formatter = Log4r::PatternFormatter.new(:pattern => "[%d] [%m]")
|
36
|
+
#file setup
|
37
|
+
file_config = {
|
38
|
+
"filename" => Params['default_monitoring_log_path'],
|
39
|
+
"maxsize" => Params['log_rotation_size'],
|
40
|
+
"trunc" => true
|
41
|
+
}
|
42
|
+
file_outputter = Log4r::RollingFileOutputter.new("monitor_log", file_config)
|
43
|
+
file_outputter.level = Log4r::INFO
|
44
|
+
file_outputter.formatter = formatter
|
45
|
+
@log4r.outputters << file_outputter
|
46
|
+
::FileMonitoring::DirStat.set_log(@log4r)
|
47
|
+
|
48
|
+
conf_array = Params['monitoring_paths']
|
27
49
|
|
28
50
|
# create root dirs of monitoring
|
29
51
|
dir_stat_array = []
|
@@ -35,11 +57,24 @@ module FileMonitoring
|
|
35
57
|
#Look over loaded content data if not empty
|
36
58
|
# If file is under monitoring path - Add to DirStat tree as stable with path,size,mod_time read from file
|
37
59
|
# If file is NOT under monitoring path - skip (not a valid usage)
|
60
|
+
file_attr_to_checksum = {} # This structure is used to optimize indexing when user specifies a directory was moved.
|
38
61
|
unless $local_content_data.empty?
|
39
62
|
Log.info("Start build data base from loaded file. This could take several minutes")
|
40
63
|
inst_count = 0
|
41
64
|
$local_content_data.each_instance {
|
42
|
-
|
|
65
|
+
|checksum, size, _, mod_time, _, path, index_time|
|
66
|
+
|
67
|
+
if Params['manual_file_changes']
|
68
|
+
file_attr_key = [File.basename(path), size, mod_time]
|
69
|
+
ident_file_info = file_attr_to_checksum[file_attr_key]
|
70
|
+
unless ident_file_info
|
71
|
+
# Add file checksum to map
|
72
|
+
file_attr_to_checksum[file_attr_key] = IdentFileInfo.new(checksum, index_time)
|
73
|
+
else
|
74
|
+
# File already in map. Need to mark as not unique
|
75
|
+
ident_file_info.unique = false # file will be skipped if found at new location
|
76
|
+
end
|
77
|
+
end
|
43
78
|
# construct sub paths array from full file path:
|
44
79
|
# Example:
|
45
80
|
# instance path = /dir1/dir2/file_name
|
@@ -72,6 +107,41 @@ module FileMonitoring
|
|
72
107
|
}
|
73
108
|
Log.info("End build data base from loaded file. loaded instances:#{inst_count}")
|
74
109
|
$last_content_data_id = $local_content_data.unique_id
|
110
|
+
|
111
|
+
if Params['manual_file_changes']
|
112
|
+
# -------------------------- MANUAL MODE
|
113
|
+
# ------------ LOOP DIRS
|
114
|
+
dir_stat_array.each { | dir_stat|
|
115
|
+
log_msg = "In Manual mode. Start monitor path:#{dir_stat[0].path}. moved or copied files (same name, size and time " +
|
116
|
+
'modification) will use the checksum of the original files and be updated in content data file'
|
117
|
+
Log.info(log_msg)
|
118
|
+
$testing_memory_log.info(log_msg) if $testing_memory_active
|
119
|
+
|
120
|
+
# ------- MONITOR
|
121
|
+
dir_stat[0].monitor(file_attr_to_checksum)
|
122
|
+
|
123
|
+
# ------- REMOVE PATHS
|
124
|
+
# remove non existing (not marked) files\dirs
|
125
|
+
log_msg = 'Start remove non existing paths'
|
126
|
+
Log.info(log_msg)
|
127
|
+
$testing_memory_log.info(log_msg) if $testing_memory_active
|
128
|
+
dir_stat[0].removed_unmarked_paths
|
129
|
+
log_msg = 'End monitor path and index'
|
130
|
+
Log.info(log_msg)
|
131
|
+
$testing_memory_log.info(log_msg) if $testing_memory_active
|
132
|
+
}
|
133
|
+
|
134
|
+
# ------ WRITE CONTENT DATA
|
135
|
+
ContentServer.flush_content_data
|
136
|
+
raise("Finished manual changes and update file:#{Params['local_content_data_path']}. Exit application\n")
|
137
|
+
end
|
138
|
+
else
|
139
|
+
if Params['manual_file_changes']
|
140
|
+
Log.info('Feature: manual_file_changes is ON. But No previous content data found. ' +
|
141
|
+
'No change is required. Existing application')
|
142
|
+
raise('Feature: manual_file_changes is ON. But No previous content data found at ' +
|
143
|
+
"file:#{Params['local_content_data_path']}. No change is required. Existing application\n")
|
144
|
+
end
|
75
145
|
end
|
76
146
|
|
77
147
|
# Directories states stored in the priority queue,
|
@@ -84,26 +154,6 @@ module FileMonitoring
|
|
84
154
|
pq.push([priority, elem, dir_stat_array[index][0]], -priority)
|
85
155
|
}
|
86
156
|
|
87
|
-
#init log4r
|
88
|
-
monitoring_log_path = Params['default_monitoring_log_path']
|
89
|
-
Log.debug1 'File monitoring log: ' + Params['default_monitoring_log_path']
|
90
|
-
monitoring_log_dir = File.dirname(monitoring_log_path)
|
91
|
-
FileUtils.mkdir_p(monitoring_log_dir) unless File.exists?(monitoring_log_dir)
|
92
|
-
|
93
|
-
@log4r = Log4r::Logger.new 'BBFS monitoring log'
|
94
|
-
@log4r.trace = true
|
95
|
-
formatter = Log4r::PatternFormatter.new(:pattern => "[%d] [%m]")
|
96
|
-
#file setup
|
97
|
-
file_config = {
|
98
|
-
"filename" => Params['default_monitoring_log_path'],
|
99
|
-
"maxsize" => Params['log_rotation_size'],
|
100
|
-
"trunc" => true
|
101
|
-
}
|
102
|
-
file_outputter = Log4r::RollingFileOutputter.new("monitor_log", file_config)
|
103
|
-
file_outputter.level = Log4r::INFO
|
104
|
-
file_outputter.formatter = formatter
|
105
|
-
@log4r.outputters << file_outputter
|
106
|
-
::FileMonitoring::DirStat.set_log(@log4r)
|
107
157
|
|
108
158
|
while true do
|
109
159
|
# pull entry that should be checked next,
|
@@ -121,6 +171,13 @@ module FileMonitoring
|
|
121
171
|
::FileMonitoring.stable_state=elem['stable_state']
|
122
172
|
dir_stat.monitor
|
123
173
|
|
174
|
+
# remove non existing (not marked) files\dirs
|
175
|
+
Log.info('Start remove non existing paths')
|
176
|
+
$testing_memory_log.info('Start remove non existing paths') if $testing_memory_active
|
177
|
+
dir_stat.removed_unmarked_paths
|
178
|
+
Log.info('End monitor path and index')
|
179
|
+
$testing_memory_log.info('End monitor path and index') if $testing_memory_active
|
180
|
+
|
124
181
|
# Start index
|
125
182
|
Log.info("Start index path:%s ", dir_stat.path)
|
126
183
|
$testing_memory_log.info("Start index path:#{dir_stat.path}") if $testing_memory_active
|
@@ -130,13 +187,6 @@ module FileMonitoring
|
|
130
187
|
Log.debug1("indexed file count:%s", $indexed_file_count)
|
131
188
|
$testing_memory_log.info("indexed file count: #{$indexed_file_count}") if $testing_memory_active
|
132
189
|
|
133
|
-
# remove non existing (not marked) files\dirs
|
134
|
-
Log.info('Start remove non existing paths')
|
135
|
-
$testing_memory_log.info('Start remove non existing paths') if $testing_memory_active
|
136
|
-
dir_stat.removed_unmarked_paths
|
137
|
-
Log.info('End monitor path and index')
|
138
|
-
$testing_memory_log.info('End monitor path and index') if $testing_memory_active
|
139
|
-
|
140
190
|
#flush content data if changed
|
141
191
|
ContentServer.flush_content_data
|
142
192
|
|
@@ -19,6 +19,21 @@ module FileMonitoring
|
|
19
19
|
STABLE = "STABLE"
|
20
20
|
end
|
21
21
|
|
22
|
+
# Used for dir rename. Holds following info:
|
23
|
+
# checksum = checksum of the file
|
24
|
+
# index_time = index time of the file
|
25
|
+
# unique - if same key (file attributes) found more then once, we mark the file as not unique.
|
26
|
+
# This means that the file needs to be indexed.
|
27
|
+
# In the manual changes phase, the file will be skipped.
|
28
|
+
class IdentFileInfo
|
29
|
+
attr_accessor :checksum, :index_time, :unique
|
30
|
+
def initialize(checksum, index_time)
|
31
|
+
@checksum = checksum
|
32
|
+
@index_time = index_time
|
33
|
+
@unique = true
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
22
37
|
# Number of iterations to move state from UNCHANGED to STABLE (for index)
|
23
38
|
@@stable_state = 10
|
24
39
|
|
@@ -34,7 +49,7 @@ module FileMonitoring
|
|
34
49
|
|
35
50
|
# This class holds current state of file and methods to control and report changes
|
36
51
|
class FileStat
|
37
|
-
attr_accessor :path, :state, :size, :modification_time, :marked, :cycles
|
52
|
+
attr_accessor :path, :state, :size, :modification_time, :marked, :cycles, :indexed
|
38
53
|
|
39
54
|
@@digest = Digest::SHA1.new
|
40
55
|
|
@@ -45,7 +60,9 @@ module FileMonitoring
|
|
45
60
|
# * <tt>state</tt> - state. see class FileStatEnum. Default is NEW
|
46
61
|
# * <tt>size</tt> - File size [Byte]. Default is -1 (will be set later during monitor) todo:used?
|
47
62
|
# * <tt>mod_time</tt> - file mod time [seconds]. Default is -1 (will be set later during monitor)
|
48
|
-
|
63
|
+
# * <tt>indexed</tt> - Initialize file which is already indexed (used for dir rename case)
|
64
|
+
# * <tt>cycles</tt> - Initialize file which already passed monitor cycles (used for dir rename case)
|
65
|
+
def initialize(path, state=FileStatEnum::NEW, size=-1, mod_time=-1, indexed=false, cycles=0)
|
49
66
|
# File\Dir path
|
50
67
|
@path = path
|
51
68
|
|
@@ -64,7 +81,7 @@ module FileMonitoring
|
|
64
81
|
|
65
82
|
# Number of times that file was monitored and not changed.
|
66
83
|
# When @cycles exceeds ::FileMonitoring::stable_state, @state is set to Stable and can be indexed.
|
67
|
-
@cycles =
|
84
|
+
@cycles = cycles
|
68
85
|
|
69
86
|
# flag to indicate if file was indexed
|
70
87
|
@indexed = indexed
|
@@ -88,7 +105,7 @@ module FileMonitoring
|
|
88
105
|
$indexed_file_count += 1
|
89
106
|
@indexed = true
|
90
107
|
rescue
|
91
|
-
Log.warning("Indexed path'#{@path}' does not exist. Probably file changed")
|
108
|
+
Log.warning("Indexed path'#{@path}' does not exist. Probably file changed") if @@log
|
92
109
|
end
|
93
110
|
end
|
94
111
|
end
|
@@ -228,9 +245,10 @@ module FileMonitoring
|
|
228
245
|
dir_stat.removed_unmarked_paths
|
229
246
|
else
|
230
247
|
# directory is not marked. Remove it, since it does not exist.
|
231
|
-
|
232
|
-
|
233
|
-
|
248
|
+
if @@log
|
249
|
+
@@log.info("NON_EXISTING dir: " + dir_stat.path)
|
250
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
251
|
+
end
|
234
252
|
# remove file with changed checksum
|
235
253
|
$local_content_data_lock.synchronize{
|
236
254
|
$local_content_data.remove_directory(dir_stat.path, Params['local_server_name'])
|
@@ -247,14 +265,16 @@ module FileMonitoring
|
|
247
265
|
file_stat.marked = false # unset flag for next monitoring\index\remove phase
|
248
266
|
else
|
249
267
|
# file not marked meaning it is no longer exist. Remove.
|
250
|
-
|
251
|
-
|
252
|
-
|
268
|
+
if @@log
|
269
|
+
@@log.info("NON_EXISTING file: " + file_stat.path)
|
270
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
271
|
+
end
|
253
272
|
# remove file with changed checksum
|
254
273
|
$local_content_data_lock.synchronize{
|
255
274
|
$local_content_data.remove_instance(Params['local_server_name'], file_stat.path)
|
256
275
|
}
|
257
|
-
|
276
|
+
# remove from tree
|
277
|
+
@files.delete(file_stat.path)
|
258
278
|
end
|
259
279
|
end
|
260
280
|
end
|
@@ -264,7 +284,7 @@ module FileMonitoring
|
|
264
284
|
# Change state for existing files\dirs
|
265
285
|
# Index stable files
|
266
286
|
# Remove non existing files\dirs is handled in method: remove_unmarked_paths
|
267
|
-
def monitor
|
287
|
+
def monitor(file_attr_to_checksum=nil)
|
268
288
|
|
269
289
|
# Algorithm:
|
270
290
|
# assume that current dir is present
|
@@ -297,61 +317,100 @@ module FileMonitoring
|
|
297
317
|
# Get File \ Dir status
|
298
318
|
globed_path_stat = File.lstat(globed_path) rescue next # File or dir removed from OS file system
|
299
319
|
if globed_path_stat.file?
|
300
|
-
#
|
320
|
+
# ----------------------------- FILE -----------------------
|
301
321
|
child_stat = @files[globed_path]
|
302
322
|
if child_stat
|
303
|
-
#
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
child_stat.
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
$local_content_data.remove_instance(Params['local_server_name'], globed_path)
|
317
|
-
}
|
318
|
-
else
|
319
|
-
# File status is the same
|
320
|
-
if child_stat.state != FileStatEnum::STABLE
|
321
|
-
child_stat.state = FileStatEnum::UNCHANGED
|
322
|
-
child_stat.cycles += 1
|
323
|
-
if child_stat.cycles >= ::FileMonitoring.stable_state
|
324
|
-
child_stat.state = FileStatEnum::STABLE
|
325
|
-
@@log.info("STABLE file: " + globed_path)
|
326
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
327
|
-
else
|
328
|
-
@@log.info("UNCHANGED file: " + globed_path)
|
323
|
+
# -------------- EXISTS in Tree
|
324
|
+
unless Params['manual_file_changes']
|
325
|
+
# --------- NON MANUAL MODE
|
326
|
+
child_stat.marked = true
|
327
|
+
if child_stat.changed?(globed_path_stat)
|
328
|
+
# ---------- STATUS CHANGED
|
329
|
+
# Update changed status
|
330
|
+
child_stat.state = FileStatEnum::CHANGED
|
331
|
+
child_stat.cycles = 0
|
332
|
+
child_stat.size = globed_path_stat.size
|
333
|
+
child_stat.modification_time = globed_path_stat.mtime.to_i
|
334
|
+
if @@log
|
335
|
+
@@log.info("CHANGED file: " + globed_path)
|
329
336
|
@@log.outputters[0].flush if Params['log_flush_each_message']
|
330
337
|
end
|
338
|
+
# remove file with changed checksum. File will be added once indexed
|
339
|
+
$local_content_data_lock.synchronize{
|
340
|
+
$local_content_data.remove_instance(Params['local_server_name'], globed_path)
|
341
|
+
}
|
342
|
+
else # case child_stat did not change
|
343
|
+
# ---------- SAME STATUS
|
344
|
+
# File status is the same
|
345
|
+
if child_stat.state != FileStatEnum::STABLE
|
346
|
+
child_stat.state = FileStatEnum::UNCHANGED
|
347
|
+
child_stat.cycles += 1
|
348
|
+
if child_stat.cycles >= ::FileMonitoring.stable_state
|
349
|
+
child_stat.state = FileStatEnum::STABLE
|
350
|
+
if @@log
|
351
|
+
@@log.info("STABLE file: " + globed_path)
|
352
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
353
|
+
end
|
354
|
+
else
|
355
|
+
if @@log
|
356
|
+
@@log.info("UNCHANGED file: " + globed_path)
|
357
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
331
361
|
end
|
362
|
+
else # case Params['manual_file_changes']
|
363
|
+
# --------- MANUAL MODE
|
364
|
+
child_stat.marked = true
|
332
365
|
end
|
333
366
|
else
|
334
|
-
#
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
367
|
+
# ---------------------------- NEW FILE ----------
|
368
|
+
unless Params['manual_file_changes']
|
369
|
+
child_stat = FileStat.new(globed_path,
|
370
|
+
FileStatEnum::NEW,
|
371
|
+
globed_path_stat.size,
|
372
|
+
globed_path_stat.mtime.to_i)
|
373
|
+
if @@log
|
374
|
+
@@log.info("NEW file: " + globed_path)
|
375
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
376
|
+
end
|
377
|
+
child_stat.marked = true
|
378
|
+
add_file(child_stat)
|
379
|
+
else # case Params['manual_file_changes']
|
380
|
+
# --------------------- MANUAL MODE
|
381
|
+
# check if file name and attributes exist in global file attr map
|
382
|
+
file_attr_key = [File.basename(globed_path), globed_path_stat.size, globed_path_stat.mtime.to_i]
|
383
|
+
file_ident_info = file_attr_to_checksum[file_attr_key]
|
384
|
+
# If not found (real new file) or found but not unique then file needs indexing. skip in manual mode.
|
385
|
+
next unless (file_ident_info and file_ident_info.unique)
|
386
|
+
Log.debug1("update content data with file:%s checksum:%s index_time:%s",
|
387
|
+
File.basename(globed_path), file_ident_info.checksum, file_ident_info.index_time.to_s)
|
388
|
+
# update content data (no need to update Dir tree)
|
389
|
+
$local_content_data_lock.synchronize{
|
390
|
+
$local_content_data.add_instance(file_ident_info.checksum,
|
391
|
+
globed_path_stat.size,
|
392
|
+
Params['local_server_name'],
|
393
|
+
globed_path,
|
394
|
+
globed_path_stat.mtime.to_i,
|
395
|
+
file_ident_info.index_time)
|
396
|
+
}
|
397
|
+
end
|
341
398
|
end
|
342
399
|
else
|
343
|
-
#
|
400
|
+
# ------------------------------ DIR -----------------------
|
344
401
|
child_stat = @dirs[globed_path]
|
345
|
-
# Add Dir if not exists in Tree
|
346
402
|
unless child_stat
|
403
|
+
# ----------- ADD NEW DIR
|
347
404
|
child_stat = DirStat.new(globed_path)
|
348
405
|
add_dir(child_stat)
|
349
|
-
@@log
|
350
|
-
|
406
|
+
if @@log
|
407
|
+
@@log.info("NEW dir: " + globed_path)
|
408
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
409
|
+
end
|
351
410
|
end
|
352
411
|
child_stat.marked = true
|
353
|
-
#recursive call for dirs
|
354
|
-
child_stat.monitor
|
412
|
+
# recursive call for dirs
|
413
|
+
child_stat.monitor(file_attr_to_checksum)
|
355
414
|
end
|
356
415
|
end
|
357
416
|
GC.start
|
data/lib/file_monitoring.rb
CHANGED
@@ -20,6 +20,8 @@ module FileMonitoring
|
|
20
20
|
'This log containd track of changes found during monitoring')
|
21
21
|
Params.complex('monitoring_paths', [], 'Array of Hashes with 3 fields: ' \
|
22
22
|
'path, scan_period and stable_state.')
|
23
|
+
Params.boolean('manual_file_changes', false, 'true, indicates to application that a set of files were ' \
|
24
|
+
' moved/copied we we should not index them but rather copy their hashes from contents there were copied from')
|
23
25
|
|
24
26
|
# @see FileMonitoring#monitor_files
|
25
27
|
def monitor_files
|
data/test/params/params_spec.rb
CHANGED
@@ -115,8 +115,8 @@ module Params
|
|
115
115
|
|
116
116
|
end
|
117
117
|
|
118
|
-
it 'should
|
119
|
-
Params.read_yml_params(StringIO.new '
|
118
|
+
it 'should return false when yml file format is bad' do
|
119
|
+
Params.read_yml_params(StringIO.new 'bad yml format').should eq false
|
120
120
|
end
|
121
121
|
|
122
122
|
it 'should override defined values with yml values' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: content_server
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2014-01-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|