content_server 1.4.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/content_data/content_data.rb +32 -26
- data/lib/content_server/version.rb +1 -1
- data/lib/file_monitoring/file_monitoring.rb +79 -29
- data/lib/file_monitoring/monitor_path.rb +111 -52
- data/lib/file_monitoring.rb +2 -0
- data/test/params/params_spec.rb +2 -2
- metadata +2 -2
@@ -23,7 +23,7 @@ module ContentData
|
|
23
23
|
# unify time, add/remove instance, queries, merge, remove directory and more.
|
24
24
|
# Content info data structure:
|
25
25
|
# @contents_info = { Checksum -> [size, *instances*, content_modification_time] }
|
26
|
-
# *instances* = {[server,path] -> instance_modification_time }
|
26
|
+
# *instances* = {[server,path] -> [instance_modification_time,index_time] }
|
27
27
|
# Notes:
|
28
28
|
# 1. content_modification_time is the instance_modification_time of the first
|
29
29
|
# instances which was added to @contents_info
|
@@ -70,8 +70,11 @@ module ContentData
|
|
70
70
|
instances_db_enum = instances_db.each_key
|
71
71
|
loop {
|
72
72
|
location = instances_db_enum.next rescue break
|
73
|
-
|
74
|
-
|
73
|
+
inst_mod_times = instances_db[location]
|
74
|
+
# we use deep clone for location since map key is using shallow clone.
|
75
|
+
# we dont want references between new content data
|
76
|
+
# and orig object. This will help the GC dispose the orig object if not used any more.
|
77
|
+
instances_db_cloned[[location[0].clone,location[1].clone]] = inst_mod_times.clone
|
75
78
|
}
|
76
79
|
clone_contents_info[checksum] = [size,
|
77
80
|
instances_db_cloned,
|
@@ -105,9 +108,9 @@ module ContentData
|
|
105
108
|
location = content_info_enum.next rescue break
|
106
109
|
# provide the block with: checksum, size, content modification time,instance modification time,
|
107
110
|
# server and path.
|
108
|
-
|
109
|
-
block.call(checksum,content_info[0], content_info[2],
|
110
|
-
location[0], location[1])
|
111
|
+
inst_mod_time, inst_index_time = content_info[1][location]
|
112
|
+
block.call(checksum,content_info[0], content_info[2], inst_mod_time,
|
113
|
+
location[0], location[1], inst_index_time)
|
111
114
|
}
|
112
115
|
}
|
113
116
|
end
|
@@ -122,8 +125,8 @@ module ContentData
|
|
122
125
|
location = instances_db_enum.next rescue break
|
123
126
|
# provide the block with: checksum, size, content modification time,instance modification time,
|
124
127
|
# server and path.
|
125
|
-
|
126
|
-
block.call(checksum,content_info[0], content_info[2],
|
128
|
+
inst_mod_time,_ = content_info[1][location]
|
129
|
+
block.call(checksum,content_info[0], content_info[2], inst_mod_time,
|
127
130
|
location[0], location[1])
|
128
131
|
}
|
129
132
|
end
|
@@ -146,10 +149,11 @@ module ContentData
|
|
146
149
|
content_info = @contents_info[checksum]
|
147
150
|
return nil if content_info.nil?
|
148
151
|
instances = content_info[1]
|
149
|
-
instance_time = instances[location]
|
152
|
+
instance_time,_ = instances[location]
|
153
|
+
instance_time
|
150
154
|
end
|
151
155
|
|
152
|
-
def add_instance(checksum, size, server, path, modification_time)
|
156
|
+
def add_instance(checksum, size, server, path, modification_time, index_time=Time.now.to_i)
|
153
157
|
location = [server, path]
|
154
158
|
|
155
159
|
# file was changed but remove_instance was not called
|
@@ -161,7 +165,7 @@ module ContentData
|
|
161
165
|
content_info = @contents_info[checksum]
|
162
166
|
if content_info.nil?
|
163
167
|
@contents_info[checksum] = [size,
|
164
|
-
{location => modification_time},
|
168
|
+
{location => [modification_time,index_time]},
|
165
169
|
modification_time]
|
166
170
|
else
|
167
171
|
if size != content_info[0]
|
@@ -172,7 +176,7 @@ module ContentData
|
|
172
176
|
#override file if needed
|
173
177
|
content_info[0] = size
|
174
178
|
instances = content_info[1]
|
175
|
-
instances[location] = modification_time
|
179
|
+
instances[location] = [modification_time, index_time]
|
176
180
|
end
|
177
181
|
@instances_info[location] = checksum
|
178
182
|
end
|
@@ -236,7 +240,7 @@ module ContentData
|
|
236
240
|
local_instances = local_content_info[1]
|
237
241
|
return false if other.checksum_instances_size(checksum) != local_instances.length
|
238
242
|
location = [server, path]
|
239
|
-
local_instance_mod_time = local_instances[location]
|
243
|
+
local_instance_mod_time, _ = local_instances[location]
|
240
244
|
return false if local_instance_mod_time.nil?
|
241
245
|
return false if local_instance_mod_time != instance_mod_time
|
242
246
|
}
|
@@ -318,8 +322,9 @@ module ContentData
|
|
318
322
|
location = instances_db_enum.next rescue break
|
319
323
|
# provide the block with: checksum, size, content modification time,instance modification time,
|
320
324
|
# server and path.
|
321
|
-
instance_modification_time = content_info[1][location]
|
322
|
-
file.write("#{checksum},#{content_info[0]},#{location[0]},#{location[1]}
|
325
|
+
instance_modification_time,instance_index_time = content_info[1][location]
|
326
|
+
file.write("#{checksum},#{content_info[0]},#{location[0]},#{location[1]}," +
|
327
|
+
"#{instance_modification_time},#{instance_index_time}\n")
|
323
328
|
}
|
324
329
|
chunk_counter += 1
|
325
330
|
break if chunk_counter == chunk_size
|
@@ -397,20 +402,21 @@ module ContentData
|
|
397
402
|
return reset_load_from_file(filename, file, "Expected to read Instance line but reached EOF") unless instance_line
|
398
403
|
parameters = instance_line.split(',')
|
399
404
|
# bugfix: if file name consist a comma then parsing based on comma separating fails
|
400
|
-
if (parameters.size >
|
401
|
-
(4..parameters.size-
|
405
|
+
if (parameters.size > 6)
|
406
|
+
(4..parameters.size-3).each do |i|
|
402
407
|
parameters[3] = [parameters[3], parameters[i]].join(",")
|
403
408
|
end
|
404
|
-
(4..parameters.size-
|
409
|
+
(4..parameters.size-3).each do |i|
|
405
410
|
parameters.delete_at(4)
|
406
411
|
end
|
407
412
|
end
|
408
413
|
|
409
|
-
add_instance(parameters[0],
|
410
|
-
parameters[1].to_i,
|
411
|
-
parameters[2],
|
412
|
-
parameters[3],
|
413
|
-
parameters[4].to_i
|
414
|
+
add_instance(parameters[0], #checksum
|
415
|
+
parameters[1].to_i, # size
|
416
|
+
parameters[2], # server
|
417
|
+
parameters[3], # path
|
418
|
+
parameters[4].to_i, # mod time
|
419
|
+
parameters[5].to_i) # index time
|
414
420
|
chunk_index += 1
|
415
421
|
end
|
416
422
|
true
|
@@ -436,7 +442,7 @@ module ContentData
|
|
436
442
|
instances_enum = instances.each_key
|
437
443
|
loop {
|
438
444
|
location = instances_enum.next rescue break
|
439
|
-
instance_mod_time = instances[location]
|
445
|
+
instance_mod_time = instances[location][0]
|
440
446
|
if instance_mod_time < min_time_per_checksum
|
441
447
|
min_time_per_checksum = instance_mod_time
|
442
448
|
end
|
@@ -445,7 +451,7 @@ module ContentData
|
|
445
451
|
instances_enum = instances.each_key
|
446
452
|
loop {
|
447
453
|
location = instances_enum.next rescue break
|
448
|
-
instances[location] = min_time_per_checksum
|
454
|
+
instances[location][0] = min_time_per_checksum
|
449
455
|
}
|
450
456
|
# update content time with min time
|
451
457
|
content_info[2] = min_time_per_checksum
|
@@ -496,7 +502,7 @@ module ContentData
|
|
496
502
|
instances_enum = instances[1].each_key
|
497
503
|
loop {
|
498
504
|
unique_path = instances_enum.next rescue break
|
499
|
-
instance_mtime = instances[1][unique_path]
|
505
|
+
instance_mtime = instances[1][unique_path][0]
|
500
506
|
instance_info = [checksum, content_mtime, content_size, instance_mtime]
|
501
507
|
instance_info.concat(unique_path)
|
502
508
|
unless check_instance(instance_info)
|
@@ -23,7 +23,29 @@ module FileMonitoring
|
|
23
23
|
# This methods controlled by <tt>monitoring_paths</tt> configuration parameter,
|
24
24
|
# that provides path and file monitoring configuration data
|
25
25
|
def monitor_files
|
26
|
-
|
26
|
+
|
27
|
+
#init log4r
|
28
|
+
monitoring_log_path = Params['default_monitoring_log_path']
|
29
|
+
Log.debug1 'File monitoring log: ' + Params['default_monitoring_log_path']
|
30
|
+
monitoring_log_dir = File.dirname(monitoring_log_path)
|
31
|
+
FileUtils.mkdir_p(monitoring_log_dir) unless File.exists?(monitoring_log_dir)
|
32
|
+
|
33
|
+
@log4r = Log4r::Logger.new 'BBFS monitoring log'
|
34
|
+
@log4r.trace = true
|
35
|
+
formatter = Log4r::PatternFormatter.new(:pattern => "[%d] [%m]")
|
36
|
+
#file setup
|
37
|
+
file_config = {
|
38
|
+
"filename" => Params['default_monitoring_log_path'],
|
39
|
+
"maxsize" => Params['log_rotation_size'],
|
40
|
+
"trunc" => true
|
41
|
+
}
|
42
|
+
file_outputter = Log4r::RollingFileOutputter.new("monitor_log", file_config)
|
43
|
+
file_outputter.level = Log4r::INFO
|
44
|
+
file_outputter.formatter = formatter
|
45
|
+
@log4r.outputters << file_outputter
|
46
|
+
::FileMonitoring::DirStat.set_log(@log4r)
|
47
|
+
|
48
|
+
conf_array = Params['monitoring_paths']
|
27
49
|
|
28
50
|
# create root dirs of monitoring
|
29
51
|
dir_stat_array = []
|
@@ -35,11 +57,24 @@ module FileMonitoring
|
|
35
57
|
#Look over loaded content data if not empty
|
36
58
|
# If file is under monitoring path - Add to DirStat tree as stable with path,size,mod_time read from file
|
37
59
|
# If file is NOT under monitoring path - skip (not a valid usage)
|
60
|
+
file_attr_to_checksum = {} # This structure is used to optimize indexing when user specifies a directory was moved.
|
38
61
|
unless $local_content_data.empty?
|
39
62
|
Log.info("Start build data base from loaded file. This could take several minutes")
|
40
63
|
inst_count = 0
|
41
64
|
$local_content_data.each_instance {
|
42
|
-
|
|
65
|
+
|checksum, size, _, mod_time, _, path, index_time|
|
66
|
+
|
67
|
+
if Params['manual_file_changes']
|
68
|
+
file_attr_key = [File.basename(path), size, mod_time]
|
69
|
+
ident_file_info = file_attr_to_checksum[file_attr_key]
|
70
|
+
unless ident_file_info
|
71
|
+
# Add file checksum to map
|
72
|
+
file_attr_to_checksum[file_attr_key] = IdentFileInfo.new(checksum, index_time)
|
73
|
+
else
|
74
|
+
# File already in map. Need to mark as not unique
|
75
|
+
ident_file_info.unique = false # file will be skipped if found at new location
|
76
|
+
end
|
77
|
+
end
|
43
78
|
# construct sub paths array from full file path:
|
44
79
|
# Example:
|
45
80
|
# instance path = /dir1/dir2/file_name
|
@@ -72,6 +107,41 @@ module FileMonitoring
|
|
72
107
|
}
|
73
108
|
Log.info("End build data base from loaded file. loaded instances:#{inst_count}")
|
74
109
|
$last_content_data_id = $local_content_data.unique_id
|
110
|
+
|
111
|
+
if Params['manual_file_changes']
|
112
|
+
# -------------------------- MANUAL MODE
|
113
|
+
# ------------ LOOP DIRS
|
114
|
+
dir_stat_array.each { | dir_stat|
|
115
|
+
log_msg = "In Manual mode. Start monitor path:#{dir_stat[0].path}. moved or copied files (same name, size and time " +
|
116
|
+
'modification) will use the checksum of the original files and be updated in content data file'
|
117
|
+
Log.info(log_msg)
|
118
|
+
$testing_memory_log.info(log_msg) if $testing_memory_active
|
119
|
+
|
120
|
+
# ------- MONITOR
|
121
|
+
dir_stat[0].monitor(file_attr_to_checksum)
|
122
|
+
|
123
|
+
# ------- REMOVE PATHS
|
124
|
+
# remove non existing (not marked) files\dirs
|
125
|
+
log_msg = 'Start remove non existing paths'
|
126
|
+
Log.info(log_msg)
|
127
|
+
$testing_memory_log.info(log_msg) if $testing_memory_active
|
128
|
+
dir_stat[0].removed_unmarked_paths
|
129
|
+
log_msg = 'End monitor path and index'
|
130
|
+
Log.info(log_msg)
|
131
|
+
$testing_memory_log.info(log_msg) if $testing_memory_active
|
132
|
+
}
|
133
|
+
|
134
|
+
# ------ WRITE CONTENT DATA
|
135
|
+
ContentServer.flush_content_data
|
136
|
+
raise("Finished manual changes and update file:#{Params['local_content_data_path']}. Exit application\n")
|
137
|
+
end
|
138
|
+
else
|
139
|
+
if Params['manual_file_changes']
|
140
|
+
Log.info('Feature: manual_file_changes is ON. But No previous content data found. ' +
|
141
|
+
'No change is required. Existing application')
|
142
|
+
raise('Feature: manual_file_changes is ON. But No previous content data found at ' +
|
143
|
+
"file:#{Params['local_content_data_path']}. No change is required. Existing application\n")
|
144
|
+
end
|
75
145
|
end
|
76
146
|
|
77
147
|
# Directories states stored in the priority queue,
|
@@ -84,26 +154,6 @@ module FileMonitoring
|
|
84
154
|
pq.push([priority, elem, dir_stat_array[index][0]], -priority)
|
85
155
|
}
|
86
156
|
|
87
|
-
#init log4r
|
88
|
-
monitoring_log_path = Params['default_monitoring_log_path']
|
89
|
-
Log.debug1 'File monitoring log: ' + Params['default_monitoring_log_path']
|
90
|
-
monitoring_log_dir = File.dirname(monitoring_log_path)
|
91
|
-
FileUtils.mkdir_p(monitoring_log_dir) unless File.exists?(monitoring_log_dir)
|
92
|
-
|
93
|
-
@log4r = Log4r::Logger.new 'BBFS monitoring log'
|
94
|
-
@log4r.trace = true
|
95
|
-
formatter = Log4r::PatternFormatter.new(:pattern => "[%d] [%m]")
|
96
|
-
#file setup
|
97
|
-
file_config = {
|
98
|
-
"filename" => Params['default_monitoring_log_path'],
|
99
|
-
"maxsize" => Params['log_rotation_size'],
|
100
|
-
"trunc" => true
|
101
|
-
}
|
102
|
-
file_outputter = Log4r::RollingFileOutputter.new("monitor_log", file_config)
|
103
|
-
file_outputter.level = Log4r::INFO
|
104
|
-
file_outputter.formatter = formatter
|
105
|
-
@log4r.outputters << file_outputter
|
106
|
-
::FileMonitoring::DirStat.set_log(@log4r)
|
107
157
|
|
108
158
|
while true do
|
109
159
|
# pull entry that should be checked next,
|
@@ -121,6 +171,13 @@ module FileMonitoring
|
|
121
171
|
::FileMonitoring.stable_state=elem['stable_state']
|
122
172
|
dir_stat.monitor
|
123
173
|
|
174
|
+
# remove non existing (not marked) files\dirs
|
175
|
+
Log.info('Start remove non existing paths')
|
176
|
+
$testing_memory_log.info('Start remove non existing paths') if $testing_memory_active
|
177
|
+
dir_stat.removed_unmarked_paths
|
178
|
+
Log.info('End monitor path and index')
|
179
|
+
$testing_memory_log.info('End monitor path and index') if $testing_memory_active
|
180
|
+
|
124
181
|
# Start index
|
125
182
|
Log.info("Start index path:%s ", dir_stat.path)
|
126
183
|
$testing_memory_log.info("Start index path:#{dir_stat.path}") if $testing_memory_active
|
@@ -130,13 +187,6 @@ module FileMonitoring
|
|
130
187
|
Log.debug1("indexed file count:%s", $indexed_file_count)
|
131
188
|
$testing_memory_log.info("indexed file count: #{$indexed_file_count}") if $testing_memory_active
|
132
189
|
|
133
|
-
# remove non existing (not marked) files\dirs
|
134
|
-
Log.info('Start remove non existing paths')
|
135
|
-
$testing_memory_log.info('Start remove non existing paths') if $testing_memory_active
|
136
|
-
dir_stat.removed_unmarked_paths
|
137
|
-
Log.info('End monitor path and index')
|
138
|
-
$testing_memory_log.info('End monitor path and index') if $testing_memory_active
|
139
|
-
|
140
190
|
#flush content data if changed
|
141
191
|
ContentServer.flush_content_data
|
142
192
|
|
@@ -19,6 +19,21 @@ module FileMonitoring
|
|
19
19
|
STABLE = "STABLE"
|
20
20
|
end
|
21
21
|
|
22
|
+
# Used for dir rename. Holds following info:
|
23
|
+
# checksum = checksum of the file
|
24
|
+
# index_time = index time of the file
|
25
|
+
# unique - if same key (file attributes) found more then once, we mark the file as not unique.
|
26
|
+
# This means that the file needs to be indexed.
|
27
|
+
# In the manual changes phase, the file will be skipped.
|
28
|
+
class IdentFileInfo
|
29
|
+
attr_accessor :checksum, :index_time, :unique
|
30
|
+
def initialize(checksum, index_time)
|
31
|
+
@checksum = checksum
|
32
|
+
@index_time = index_time
|
33
|
+
@unique = true
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
22
37
|
# Number of iterations to move state from UNCHANGED to STABLE (for index)
|
23
38
|
@@stable_state = 10
|
24
39
|
|
@@ -34,7 +49,7 @@ module FileMonitoring
|
|
34
49
|
|
35
50
|
# This class holds current state of file and methods to control and report changes
|
36
51
|
class FileStat
|
37
|
-
attr_accessor :path, :state, :size, :modification_time, :marked, :cycles
|
52
|
+
attr_accessor :path, :state, :size, :modification_time, :marked, :cycles, :indexed
|
38
53
|
|
39
54
|
@@digest = Digest::SHA1.new
|
40
55
|
|
@@ -45,7 +60,9 @@ module FileMonitoring
|
|
45
60
|
# * <tt>state</tt> - state. see class FileStatEnum. Default is NEW
|
46
61
|
# * <tt>size</tt> - File size [Byte]. Default is -1 (will be set later during monitor) todo:used?
|
47
62
|
# * <tt>mod_time</tt> - file mod time [seconds]. Default is -1 (will be set later during monitor)
|
48
|
-
|
63
|
+
# * <tt>indexed</tt> - Initialize file which is already indexed (used for dir rename case)
|
64
|
+
# * <tt>cycles</tt> - Initialize file which already passed monitor cycles (used for dir rename case)
|
65
|
+
def initialize(path, state=FileStatEnum::NEW, size=-1, mod_time=-1, indexed=false, cycles=0)
|
49
66
|
# File\Dir path
|
50
67
|
@path = path
|
51
68
|
|
@@ -64,7 +81,7 @@ module FileMonitoring
|
|
64
81
|
|
65
82
|
# Number of times that file was monitored and not changed.
|
66
83
|
# When @cycles exceeds ::FileMonitoring::stable_state, @state is set to Stable and can be indexed.
|
67
|
-
@cycles =
|
84
|
+
@cycles = cycles
|
68
85
|
|
69
86
|
# flag to indicate if file was indexed
|
70
87
|
@indexed = indexed
|
@@ -88,7 +105,7 @@ module FileMonitoring
|
|
88
105
|
$indexed_file_count += 1
|
89
106
|
@indexed = true
|
90
107
|
rescue
|
91
|
-
Log.warning("Indexed path'#{@path}' does not exist. Probably file changed")
|
108
|
+
Log.warning("Indexed path'#{@path}' does not exist. Probably file changed") if @@log
|
92
109
|
end
|
93
110
|
end
|
94
111
|
end
|
@@ -228,9 +245,10 @@ module FileMonitoring
|
|
228
245
|
dir_stat.removed_unmarked_paths
|
229
246
|
else
|
230
247
|
# directory is not marked. Remove it, since it does not exist.
|
231
|
-
|
232
|
-
|
233
|
-
|
248
|
+
if @@log
|
249
|
+
@@log.info("NON_EXISTING dir: " + dir_stat.path)
|
250
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
251
|
+
end
|
234
252
|
# remove file with changed checksum
|
235
253
|
$local_content_data_lock.synchronize{
|
236
254
|
$local_content_data.remove_directory(dir_stat.path, Params['local_server_name'])
|
@@ -247,14 +265,16 @@ module FileMonitoring
|
|
247
265
|
file_stat.marked = false # unset flag for next monitoring\index\remove phase
|
248
266
|
else
|
249
267
|
# file not marked meaning it is no longer exist. Remove.
|
250
|
-
|
251
|
-
|
252
|
-
|
268
|
+
if @@log
|
269
|
+
@@log.info("NON_EXISTING file: " + file_stat.path)
|
270
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
271
|
+
end
|
253
272
|
# remove file with changed checksum
|
254
273
|
$local_content_data_lock.synchronize{
|
255
274
|
$local_content_data.remove_instance(Params['local_server_name'], file_stat.path)
|
256
275
|
}
|
257
|
-
|
276
|
+
# remove from tree
|
277
|
+
@files.delete(file_stat.path)
|
258
278
|
end
|
259
279
|
end
|
260
280
|
end
|
@@ -264,7 +284,7 @@ module FileMonitoring
|
|
264
284
|
# Change state for existing files\dirs
|
265
285
|
# Index stable files
|
266
286
|
# Remove non existing files\dirs is handled in method: remove_unmarked_paths
|
267
|
-
def monitor
|
287
|
+
def monitor(file_attr_to_checksum=nil)
|
268
288
|
|
269
289
|
# Algorithm:
|
270
290
|
# assume that current dir is present
|
@@ -297,61 +317,100 @@ module FileMonitoring
|
|
297
317
|
# Get File \ Dir status
|
298
318
|
globed_path_stat = File.lstat(globed_path) rescue next # File or dir removed from OS file system
|
299
319
|
if globed_path_stat.file?
|
300
|
-
#
|
320
|
+
# ----------------------------- FILE -----------------------
|
301
321
|
child_stat = @files[globed_path]
|
302
322
|
if child_stat
|
303
|
-
#
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
child_stat.
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
$local_content_data.remove_instance(Params['local_server_name'], globed_path)
|
317
|
-
}
|
318
|
-
else
|
319
|
-
# File status is the same
|
320
|
-
if child_stat.state != FileStatEnum::STABLE
|
321
|
-
child_stat.state = FileStatEnum::UNCHANGED
|
322
|
-
child_stat.cycles += 1
|
323
|
-
if child_stat.cycles >= ::FileMonitoring.stable_state
|
324
|
-
child_stat.state = FileStatEnum::STABLE
|
325
|
-
@@log.info("STABLE file: " + globed_path)
|
326
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
327
|
-
else
|
328
|
-
@@log.info("UNCHANGED file: " + globed_path)
|
323
|
+
# -------------- EXISTS in Tree
|
324
|
+
unless Params['manual_file_changes']
|
325
|
+
# --------- NON MANUAL MODE
|
326
|
+
child_stat.marked = true
|
327
|
+
if child_stat.changed?(globed_path_stat)
|
328
|
+
# ---------- STATUS CHANGED
|
329
|
+
# Update changed status
|
330
|
+
child_stat.state = FileStatEnum::CHANGED
|
331
|
+
child_stat.cycles = 0
|
332
|
+
child_stat.size = globed_path_stat.size
|
333
|
+
child_stat.modification_time = globed_path_stat.mtime.to_i
|
334
|
+
if @@log
|
335
|
+
@@log.info("CHANGED file: " + globed_path)
|
329
336
|
@@log.outputters[0].flush if Params['log_flush_each_message']
|
330
337
|
end
|
338
|
+
# remove file with changed checksum. File will be added once indexed
|
339
|
+
$local_content_data_lock.synchronize{
|
340
|
+
$local_content_data.remove_instance(Params['local_server_name'], globed_path)
|
341
|
+
}
|
342
|
+
else # case child_stat did not change
|
343
|
+
# ---------- SAME STATUS
|
344
|
+
# File status is the same
|
345
|
+
if child_stat.state != FileStatEnum::STABLE
|
346
|
+
child_stat.state = FileStatEnum::UNCHANGED
|
347
|
+
child_stat.cycles += 1
|
348
|
+
if child_stat.cycles >= ::FileMonitoring.stable_state
|
349
|
+
child_stat.state = FileStatEnum::STABLE
|
350
|
+
if @@log
|
351
|
+
@@log.info("STABLE file: " + globed_path)
|
352
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
353
|
+
end
|
354
|
+
else
|
355
|
+
if @@log
|
356
|
+
@@log.info("UNCHANGED file: " + globed_path)
|
357
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
331
361
|
end
|
362
|
+
else # case Params['manual_file_changes']
|
363
|
+
# --------- MANUAL MODE
|
364
|
+
child_stat.marked = true
|
332
365
|
end
|
333
366
|
else
|
334
|
-
#
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
367
|
+
# ---------------------------- NEW FILE ----------
|
368
|
+
unless Params['manual_file_changes']
|
369
|
+
child_stat = FileStat.new(globed_path,
|
370
|
+
FileStatEnum::NEW,
|
371
|
+
globed_path_stat.size,
|
372
|
+
globed_path_stat.mtime.to_i)
|
373
|
+
if @@log
|
374
|
+
@@log.info("NEW file: " + globed_path)
|
375
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
376
|
+
end
|
377
|
+
child_stat.marked = true
|
378
|
+
add_file(child_stat)
|
379
|
+
else # case Params['manual_file_changes']
|
380
|
+
# --------------------- MANUAL MODE
|
381
|
+
# check if file name and attributes exist in global file attr map
|
382
|
+
file_attr_key = [File.basename(globed_path), globed_path_stat.size, globed_path_stat.mtime.to_i]
|
383
|
+
file_ident_info = file_attr_to_checksum[file_attr_key]
|
384
|
+
# If not found (real new file) or found but not unique then file needs indexing. skip in manual mode.
|
385
|
+
next unless (file_ident_info and file_ident_info.unique)
|
386
|
+
Log.debug1("update content data with file:%s checksum:%s index_time:%s",
|
387
|
+
File.basename(globed_path), file_ident_info.checksum, file_ident_info.index_time.to_s)
|
388
|
+
# update content data (no need to update Dir tree)
|
389
|
+
$local_content_data_lock.synchronize{
|
390
|
+
$local_content_data.add_instance(file_ident_info.checksum,
|
391
|
+
globed_path_stat.size,
|
392
|
+
Params['local_server_name'],
|
393
|
+
globed_path,
|
394
|
+
globed_path_stat.mtime.to_i,
|
395
|
+
file_ident_info.index_time)
|
396
|
+
}
|
397
|
+
end
|
341
398
|
end
|
342
399
|
else
|
343
|
-
#
|
400
|
+
# ------------------------------ DIR -----------------------
|
344
401
|
child_stat = @dirs[globed_path]
|
345
|
-
# Add Dir if not exists in Tree
|
346
402
|
unless child_stat
|
403
|
+
# ----------- ADD NEW DIR
|
347
404
|
child_stat = DirStat.new(globed_path)
|
348
405
|
add_dir(child_stat)
|
349
|
-
@@log
|
350
|
-
|
406
|
+
if @@log
|
407
|
+
@@log.info("NEW dir: " + globed_path)
|
408
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
409
|
+
end
|
351
410
|
end
|
352
411
|
child_stat.marked = true
|
353
|
-
#recursive call for dirs
|
354
|
-
child_stat.monitor
|
412
|
+
# recursive call for dirs
|
413
|
+
child_stat.monitor(file_attr_to_checksum)
|
355
414
|
end
|
356
415
|
end
|
357
416
|
GC.start
|
data/lib/file_monitoring.rb
CHANGED
@@ -20,6 +20,8 @@ module FileMonitoring
|
|
20
20
|
'This log containd track of changes found during monitoring')
|
21
21
|
Params.complex('monitoring_paths', [], 'Array of Hashes with 3 fields: ' \
|
22
22
|
'path, scan_period and stable_state.')
|
23
|
+
Params.boolean('manual_file_changes', false, 'true, indicates to application that a set of files were ' \
|
24
|
+
' moved/copied we we should not index them but rather copy their hashes from contents there were copied from')
|
23
25
|
|
24
26
|
# @see FileMonitoring#monitor_files
|
25
27
|
def monitor_files
|
data/test/params/params_spec.rb
CHANGED
@@ -115,8 +115,8 @@ module Params
|
|
115
115
|
|
116
116
|
end
|
117
117
|
|
118
|
-
it 'should
|
119
|
-
Params.read_yml_params(StringIO.new '
|
118
|
+
it 'should return false when yml file format is bad' do
|
119
|
+
Params.read_yml_params(StringIO.new 'bad yml format').should eq false
|
120
120
|
end
|
121
121
|
|
122
122
|
it 'should override defined values with yml values' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: content_server
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2014-01-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|