content_server 1.3.1 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/content_data/content_data.rb +194 -56
- data/lib/content_data/version.rb +1 -1
- data/lib/content_server/backup_server.rb +4 -27
- data/lib/content_server/content_server.rb +3 -27
- data/lib/content_server/file_streamer.rb +2 -0
- data/lib/content_server/remote_content.rb +1 -2
- data/lib/content_server/server.rb +23 -3
- data/lib/content_server/version.rb +1 -1
- data/lib/email/version.rb +1 -1
- data/lib/file_copy/version.rb +1 -1
- data/lib/file_indexing/index_agent.rb +1 -1
- data/lib/file_indexing/version.rb +1 -1
- data/lib/file_monitoring/file_monitoring.rb +45 -32
- data/lib/file_monitoring/monitor_path.rb +219 -181
- data/lib/file_monitoring/version.rb +1 -1
- data/lib/file_utils/file_generator/file_generator.rb +1 -1
- data/lib/file_utils/file_utils.rb +2 -2
- data/lib/file_utils/version.rb +1 -1
- data/lib/log/version.rb +1 -1
- data/lib/networking/version.rb +1 -1
- data/lib/params/version.rb +1 -1
- data/lib/process_monitoring/version.rb +1 -1
- data/lib/run_in_background/version.rb +1 -1
- data/lib/testing_memory/testing_memory.rb +1 -1
- data/lib/testing_server/testing_server.rb +1 -1
- data/lib/testing_server/version.rb +1 -1
- data/spec/content_data/validations_spec.rb +2 -2
- data/spec/content_server/file_streamer_spec.rb +5 -0
- data/spec/networking/tcp_spec.rb +1 -3
- data/spec/validations/index_validations_spec.rb +2 -2
- data/test/content_data/content_data_test.rb +8 -7
- data/test/file_generator/file_generator_spec.rb +3 -2
- data/test/file_monitoring/monitor_path_test.rb +38 -4
- data/test/file_utils/fileutil_mksymlink_test.rb +9 -0
- data/test/file_utils/time_modification_test.rb +6 -2
- data/test/run_in_background/test_app +17 -15
- metadata +2 -93
- data/lib/content_server/queue_indexer.rb +0 -86
- data/test/file_indexing/index_agent_test.rb +0 -51
- data/test/file_indexing/index_agent_test/New.txt +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/libexslt.dll +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/libxslt.dll +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/xsltproc.exe +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exslt.h +0 -102
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exsltconfig.h +0 -73
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exsltexports.h +0 -140
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/libexslt.h +0 -29
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/attributes.h +0 -38
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/documents.h +0 -93
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/extensions.h +0 -262
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/extra.h +0 -80
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/functions.h +0 -78
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/imports.h +0 -75
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/keys.h +0 -53
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/libxslt.h +0 -30
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/namespaces.h +0 -68
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/numbersInternals.h +0 -69
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/pattern.h +0 -81
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/preproc.h +0 -43
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/security.h +0 -104
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/templates.h +0 -77
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/transform.h +0 -207
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/trio.h +0 -216
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/triodef.h +0 -220
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/variables.h +0 -91
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/win32config.h +0 -101
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xslt.h +0 -103
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltInternals.h +0 -1967
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltconfig.h +0 -172
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltexports.h +0 -142
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltlocale.h +0 -57
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltutils.h +0 -309
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltwin32config.h +0 -105
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libexslt.lib +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libexslt_a.lib +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libxslt.lib +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libxslt_a.lib +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/readme.txt +0 -22
- data/test/file_indexing/index_agent_test/patterns.input +0 -3
- data/test/file_monitoring/file_monitoring_test.rb +0 -0
- data/test/file_monitoring/file_monitoring_test/conf.yml +0 -4
- data/test/file_monitoring/file_monitoring_test/conf_win32.yml +0 -5
- data/test/file_monitoring/file_monitoring_test/log +0 -56
@@ -190,11 +190,13 @@ module ContentServer
|
|
190
190
|
handle_new_stream(file_checksum, 0) if !@streams.key?(file_checksum)
|
191
191
|
# Finalize the file copy.
|
192
192
|
handle_last_chunk(file_checksum)
|
193
|
+
return true
|
193
194
|
else
|
194
195
|
Log.warning("Unexpected receive chuck message. file_checksum:#{file_checksum}, " \
|
195
196
|
"content.nil?:#{content.nil?}, content_checksum:#{content_checksum}")
|
196
197
|
return false
|
197
198
|
end
|
199
|
+
Log.error('Code should never reach this point')
|
198
200
|
end
|
199
201
|
|
200
202
|
# open new stream
|
@@ -23,7 +23,6 @@ module ContentServer
|
|
23
23
|
def initialize(host, port, local_backup_folder)
|
24
24
|
@remote_tcp = Networking::TCPClient.new(host, port, method(:receive_content))
|
25
25
|
@last_fetch_timestamp = nil
|
26
|
-
@last_save_timestamp = nil
|
27
26
|
@last_content_data_id = nil
|
28
27
|
@content_server_content_data_path = File.join(local_backup_folder, 'remote',
|
29
28
|
host + '_' + port.to_s)
|
@@ -40,7 +39,7 @@ module ContentServer
|
|
40
39
|
|
41
40
|
# Update remote content data and write to file if changed ContentData received
|
42
41
|
if(message.unique_id != @last_content_data_id)
|
43
|
-
path = File.join(@content_server_content_data_path, @
|
42
|
+
path = File.join(@content_server_content_data_path, @last_fetch_timestamp.to_s + '.cd')
|
44
43
|
FileUtils.makedirs(@content_server_content_data_path) unless \
|
45
44
|
File.directory?(@content_server_content_data_path)
|
46
45
|
$remote_content_data_lock.synchronize{
|
@@ -31,11 +31,12 @@ module ContentServer
|
|
31
31
|
$local_content_data_lock = nil
|
32
32
|
$remote_content_data_lock = nil
|
33
33
|
$remote_content_data = nil
|
34
|
+
$last_content_data_id = nil
|
34
35
|
end
|
35
36
|
|
36
37
|
def handle_program_termination(exception)
|
37
38
|
#Write exception message to console
|
38
|
-
message = "\nInterrupt or Exit happened in server:'
|
39
|
+
message = "\nInterrupt or Exit happened in server:''.\n" +
|
39
40
|
"Exception type:'#{exception.class}'.\n" +
|
40
41
|
"Exception message:'#{exception.message}'.\n" +
|
41
42
|
"Stopping process.\n" +
|
@@ -74,7 +75,7 @@ module ContentServer
|
|
74
75
|
current_objects_counters['DirStat'] = dir_count
|
75
76
|
file_count = ObjectSpace.each_object(FileMonitoring::FileStat).count
|
76
77
|
$process_vars.set('FileStat count', file_count-dir_count)
|
77
|
-
current_objects_counters['FileStat'] = file_count
|
78
|
+
current_objects_counters['FileStat'] = file_count
|
78
79
|
|
79
80
|
# Generate report and update global counters
|
80
81
|
report = ""
|
@@ -88,5 +89,24 @@ module ContentServer
|
|
88
89
|
end
|
89
90
|
end
|
90
91
|
|
91
|
-
|
92
|
+
def flush_content_data
|
93
|
+
Log.debug1('Start flush local content data to file.')
|
94
|
+
$testing_memory_log.info('Start flush content data to file') if $testing_memory_active
|
95
|
+
|
96
|
+
$local_content_data_lock.synchronize{
|
97
|
+
local_content_data_unique_id = $local_content_data.unique_id
|
98
|
+
if (local_content_data_unique_id != $last_content_data_id)
|
99
|
+
$last_content_data_id = local_content_data_unique_id
|
100
|
+
$local_content_data.to_file($tmp_content_data_file)
|
101
|
+
File.rename($tmp_content_data_file, Params['local_content_data_path'])
|
102
|
+
Log.debug1('End flush local content data to file.')
|
103
|
+
$testing_memory_log.info('End flush content data to file') if $testing_memory_active
|
104
|
+
else
|
105
|
+
Log.debug1('no need to flush. content data has not changed')
|
106
|
+
$testing_memory_log.info('no need to flush. content data has not changed') if $testing_memory_active
|
107
|
+
end
|
108
|
+
}
|
109
|
+
end
|
110
|
+
|
111
|
+
module_function :init_globals, :handle_program_termination, :monitor_general_process_vars, :flush_content_data
|
92
112
|
end
|
data/lib/email/version.rb
CHANGED
data/lib/file_copy/version.rb
CHANGED
@@ -126,7 +126,7 @@ module FileIndexing
|
|
126
126
|
# from further processing (save checksum calculation)
|
127
127
|
file_match = false
|
128
128
|
otherDB_updated.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
129
|
-
if otherDB_updated.instance_exists(file, local_server_name
|
129
|
+
if otherDB_updated.instance_exists(file, local_server_name)
|
130
130
|
if size == file_stats.size and instance_mod_time == file_mtime.to_i
|
131
131
|
@indexed_content.add_instance(checksum, size, server, file, instance_mod_time)
|
132
132
|
file_match = true
|
@@ -1,3 +1,5 @@
|
|
1
|
+
#todo: stable state not working. after 2 cycles...
|
2
|
+
|
1
3
|
require 'algorithms'
|
2
4
|
require 'fileutils'
|
3
5
|
require 'log4r'
|
@@ -9,12 +11,6 @@ module FileMonitoring
|
|
9
11
|
# Manages file monitoring of number of file system locations
|
10
12
|
class FileMonitoring
|
11
13
|
|
12
|
-
# Set event queue used for communication between different proceses.
|
13
|
-
# @param queue [Queue]
|
14
|
-
def set_event_queue(queue)
|
15
|
-
@event_queue = queue
|
16
|
-
end
|
17
|
-
|
18
14
|
# The main method. Loops on all paths, each time span and monitors them.
|
19
15
|
#
|
20
16
|
# =Algorithm:
|
@@ -32,16 +28,16 @@ module FileMonitoring
|
|
32
28
|
# create root dirs of monitoring
|
33
29
|
dir_stat_array = []
|
34
30
|
conf_array.each { |elem|
|
35
|
-
dir_stat = DirStat.new(File.expand_path(elem['path'])
|
36
|
-
|
37
|
-
dir_stat_array.push(dir_stat)
|
31
|
+
dir_stat = DirStat.new(File.expand_path(elem['path']))
|
32
|
+
dir_stat_array.push([dir_stat, elem['stable_state']])
|
38
33
|
}
|
39
34
|
|
40
35
|
#Look over loaded content data if not empty
|
41
36
|
# If file is under monitoring path - Add to DirStat tree as stable with path,size,mod_time read from file
|
42
37
|
# If file is NOT under monitoring path - skip (not a valid usage)
|
43
38
|
unless $local_content_data.empty?
|
44
|
-
Log.info("Start build data base from loaded file")
|
39
|
+
Log.info("Start build data base from loaded file. This could take several minutes")
|
40
|
+
inst_count = 0
|
45
41
|
$local_content_data.each_instance {
|
46
42
|
|_, size, _, mod_time, _, path|
|
47
43
|
# construct sub paths array from full file path:
|
@@ -63,15 +59,19 @@ module FileMonitoring
|
|
63
59
|
# if index is found then it the monitor path
|
64
60
|
# the next index indicates the next sub path to insert to the tree
|
65
61
|
# the index will be raised at each recursive call down the tree
|
66
|
-
sub_paths_index = sub_paths.index(dir_stat.path)
|
62
|
+
sub_paths_index = sub_paths.index(dir_stat[0].path)
|
67
63
|
next if sub_paths_index.nil? # monitor path was not found. skip this instance.
|
68
|
-
|
69
|
-
|
64
|
+
|
65
|
+
# monitor path was found. Add to tree
|
70
66
|
# start the recursive call with next sub path index
|
71
|
-
|
67
|
+
::FileMonitoring.stable_state = dir_stat[1]
|
68
|
+
inst_count += 1
|
69
|
+
dir_stat[0].load_instance(sub_paths, sub_paths_index+1, size, mod_time)
|
70
|
+
break
|
72
71
|
}
|
73
72
|
}
|
74
|
-
Log.info("End build data base from loaded file")
|
73
|
+
Log.info("End build data base from loaded file. loaded instances:#{inst_count}")
|
74
|
+
$last_content_data_id = $local_content_data.unique_id
|
75
75
|
end
|
76
76
|
|
77
77
|
# Directories states stored in the priority queue,
|
@@ -81,7 +81,7 @@ module FileMonitoring
|
|
81
81
|
conf_array.each_with_index { |elem, index|
|
82
82
|
priority = (Time.now + elem['scan_period']).to_i
|
83
83
|
#Log.info("File monitoring started for: #{elem}")
|
84
|
-
pq.push([priority, elem, dir_stat_array[index]], -priority)
|
84
|
+
pq.push([priority, elem, dir_stat_array[index][0]], -priority)
|
85
85
|
}
|
86
86
|
|
87
87
|
#init log4r
|
@@ -103,33 +103,46 @@ module FileMonitoring
|
|
103
103
|
file_outputter.level = Log4r::INFO
|
104
104
|
file_outputter.formatter = formatter
|
105
105
|
@log4r.outputters << file_outputter
|
106
|
-
|
106
|
+
::FileMonitoring::DirStat.set_log(@log4r)
|
107
107
|
|
108
108
|
while true do
|
109
109
|
# pull entry that should be checked next,
|
110
110
|
# according to it's scan_period
|
111
|
-
time,
|
111
|
+
time, elem, dir_stat = pq.pop
|
112
112
|
# time remains to wait before directory should be checked
|
113
113
|
time_span = time - Time.now.to_i
|
114
114
|
if (time_span > 0)
|
115
115
|
sleep(time_span)
|
116
116
|
end
|
117
117
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
$testing_memory_log.info("Start monitor at :#{Time.now}")
|
124
|
-
puts "Start monitor at :#{Time.now}"
|
125
|
-
dir_stat.monitor
|
126
|
-
$testing_memory_log.info("End monitor at :#{Time.now}")
|
127
|
-
puts "End monitor at :#{Time.now}"
|
128
|
-
end
|
118
|
+
# Start monitor
|
119
|
+
Log.info("Start monitor path:%s ", dir_stat.path)
|
120
|
+
$testing_memory_log.info("Start monitor path:#{dir_stat.path}") if $testing_memory_active
|
121
|
+
::FileMonitoring.stable_state=elem['stable_state']
|
122
|
+
dir_stat.monitor
|
129
123
|
|
130
|
-
#
|
131
|
-
|
132
|
-
|
124
|
+
# Start index
|
125
|
+
Log.info("Start index path:%s ", dir_stat.path)
|
126
|
+
$testing_memory_log.info("Start index path:#{dir_stat.path}") if $testing_memory_active
|
127
|
+
dir_stat.index
|
128
|
+
|
129
|
+
# print number of indexed files
|
130
|
+
Log.debug1("indexed file count:%s", $indexed_file_count)
|
131
|
+
$testing_memory_log.info("indexed file count: #{$indexed_file_count}") if $testing_memory_active
|
132
|
+
|
133
|
+
# remove non existing (not marked) files\dirs
|
134
|
+
Log.info('Start remove non existing paths')
|
135
|
+
$testing_memory_log.info('Start remove non existing paths') if $testing_memory_active
|
136
|
+
dir_stat.removed_unmarked_paths
|
137
|
+
Log.info('End monitor path and index')
|
138
|
+
$testing_memory_log.info('End monitor path and index') if $testing_memory_active
|
139
|
+
|
140
|
+
#flush content data if changed
|
141
|
+
ContentServer.flush_content_data
|
142
|
+
|
143
|
+
#Add back to queue
|
144
|
+
priority = (Time.now + elem['scan_period']).to_i
|
145
|
+
pq.push([priority, elem, dir_stat], -priority)
|
133
146
|
end
|
134
147
|
end
|
135
148
|
end
|
@@ -11,8 +11,6 @@ module FileMonitoring
|
|
11
11
|
# * <tt>CHANGED</tt> - State was changed between two checks
|
12
12
|
# * <tt>UNCHANGED</tt> - Opposite to CHANGED
|
13
13
|
# * <tt>STABLE</tt> - Entity is in the UNCHANGED state for a defined (by user) number of iterations
|
14
|
-
|
15
|
-
|
16
14
|
class FileStatEnum
|
17
15
|
NON_EXISTING = "NON_EXISTING"
|
18
16
|
NEW = "NEW"
|
@@ -21,76 +19,78 @@ module FileMonitoring
|
|
21
19
|
STABLE = "STABLE"
|
22
20
|
end
|
23
21
|
|
22
|
+
# Number of iterations to move state from UNCHANGED to STABLE (for index)
|
23
|
+
@@stable_state = 10
|
24
|
+
|
25
|
+
def self.stable_state=(stable_state)
|
26
|
+
@@stable_state = stable_state
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.stable_state
|
30
|
+
@@stable_state
|
31
|
+
end
|
32
|
+
|
33
|
+
public_class_method :stable_state, :stable_state=
|
34
|
+
|
24
35
|
# This class holds current state of file and methods to control and report changes
|
25
36
|
class FileStat
|
26
|
-
|
27
|
-
attr_accessor :state, :size, :modification_time
|
37
|
+
attr_accessor :path, :state, :size, :modification_time, :marked, :cycles
|
28
38
|
|
29
|
-
|
30
|
-
|
31
|
-
@@log = nil
|
39
|
+
@@digest = Digest::SHA1.new
|
32
40
|
|
33
41
|
# Initializes new file monitoring object
|
34
42
|
# ==== Arguments:
|
35
43
|
#
|
36
|
-
# * <tt>path</tt> - File
|
37
|
-
# * <tt>
|
38
|
-
# * <tt>
|
39
|
-
# * <tt>
|
40
|
-
|
41
|
-
|
42
|
-
@path
|
43
|
-
@size = nil
|
44
|
-
@modification_time = nil
|
45
|
-
@cycles = 0
|
46
|
-
@stable_state = stable_state
|
47
|
-
end
|
44
|
+
# * <tt>path</tt> - File\Dir path
|
45
|
+
# * <tt>state</tt> - state. see class FileStatEnum. Default is NEW
|
46
|
+
# * <tt>size</tt> - File size [Byte]. Default is -1 (will be set later during monitor) todo:used?
|
47
|
+
# * <tt>mod_time</tt> - file mod time [seconds]. Default is -1 (will be set later during monitor)
|
48
|
+
def initialize(path, state=FileStatEnum::NEW, size=-1, mod_time=-1, indexed=false)
|
49
|
+
# File\Dir path
|
50
|
+
@path = path
|
48
51
|
|
49
|
-
|
50
|
-
@
|
51
|
-
end
|
52
|
+
# File size
|
53
|
+
@size = size
|
52
54
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
# File modification time
|
56
|
+
@modification_time = mod_time
|
57
|
+
|
58
|
+
# File sate. see class FileStatEnum for states.
|
59
|
+
@state = state
|
60
|
+
|
61
|
+
# indicates if path EXISTS in file system.
|
62
|
+
# If true, file will not be removed during removed_unmarked_paths phase.
|
63
|
+
@marked = false
|
64
|
+
|
65
|
+
# Number of times that file was monitored and not changed.
|
66
|
+
# When @cycles exceeds ::FileMonitoring::stable_state, @state is set to Stable and can be indexed.
|
67
|
+
@cycles = 0
|
68
|
+
|
69
|
+
# flag to indicate if file was indexed
|
70
|
+
@indexed = indexed
|
59
71
|
end
|
60
72
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
@size = file_stats.size
|
81
|
-
#@creation_time = file_stats.ctime.utc
|
82
|
-
@modification_time = file_stats.mtime.to_i
|
83
|
-
@cycles = 0
|
84
|
-
else
|
85
|
-
new_state = FileStatEnum::UNCHANGED
|
86
|
-
@cycles += 1
|
87
|
-
if @cycles >= @stable_state
|
88
|
-
new_state = FileStatEnum::STABLE
|
73
|
+
def index
|
74
|
+
if !@indexed and FileStatEnum::STABLE == @state
|
75
|
+
#index file
|
76
|
+
@@digest.reset
|
77
|
+
begin
|
78
|
+
File.open(@path, 'rb') { |f|
|
79
|
+
while buffer = f.read(16384) do
|
80
|
+
@@digest << buffer
|
81
|
+
end
|
82
|
+
}
|
83
|
+
$local_content_data_lock.synchronize{
|
84
|
+
$local_content_data.add_instance(@@digest.hexdigest.downcase, @size, Params['local_server_name'],
|
85
|
+
@path, @modification_time)
|
86
|
+
}
|
87
|
+
#$process_vars.inc('indexed_files')
|
88
|
+
$indexed_file_count += 1
|
89
|
+
@indexed = true
|
90
|
+
rescue
|
91
|
+
Log.warning("Indexed path'#{@path}' does not exist. Probably file changed")
|
89
92
|
end
|
90
93
|
end
|
91
|
-
|
92
|
-
# The assignment
|
93
|
-
set_state(new_state)
|
94
94
|
end
|
95
95
|
|
96
96
|
# Checks that stored file attributes are the same as file attributes taken from file system.
|
@@ -99,29 +99,9 @@ module FileMonitoring
|
|
99
99
|
(file_stats.mtime.to_i == @modification_time))
|
100
100
|
end
|
101
101
|
|
102
|
-
def set_event_queue(queue)
|
103
|
-
@event_queue = queue
|
104
|
-
end
|
105
|
-
|
106
|
-
# Sets and writes to the log a new state.
|
107
|
-
def set_state(new_state)
|
108
|
-
if (@state != new_state or @state == FileStatEnum::CHANGED)
|
109
|
-
@state = new_state
|
110
|
-
if (@@log)
|
111
|
-
@@log.info(state + ": " + path)
|
112
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
113
|
-
end
|
114
|
-
if @event_queue and FileStatEnum::NEW != @state # NEW state is ignored in indexer
|
115
|
-
Log.debug1("Writing to event queue [%s, %s]", @state, @path)
|
116
|
-
@event_queue.push([@state, self.instance_of?(DirStat), @path, @modification_time, @size])
|
117
|
-
$process_vars.set('monitor to index queue size', @event_queue.size)
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
102
|
# Checks whether path and state are the same as of the argument
|
123
103
|
def == (other)
|
124
|
-
@path == other.path
|
104
|
+
@path == other.path
|
125
105
|
end
|
126
106
|
|
127
107
|
# Returns path and state of the file with indentation
|
@@ -131,17 +111,31 @@ module FileMonitoring
|
|
131
111
|
end
|
132
112
|
|
133
113
|
# This class holds current state of directory and methods to control changes
|
134
|
-
class DirStat
|
114
|
+
class DirStat
|
115
|
+
attr_accessor :path, :marked
|
116
|
+
|
117
|
+
@@log = nil
|
118
|
+
|
119
|
+
def self.set_log (log)
|
120
|
+
@@log = log
|
121
|
+
end
|
122
|
+
|
123
|
+
public_class_method :set_log
|
124
|
+
|
135
125
|
# Initializes new directory monitoring object
|
136
126
|
# ==== Arguments:
|
137
127
|
#
|
138
|
-
# * <tt>path</tt> -
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
@
|
143
|
-
@files = nil # Hash: ["path" -> FileStat]
|
128
|
+
# * <tt>path</tt> - Dir location
|
129
|
+
def initialize(path)
|
130
|
+
@path = path
|
131
|
+
@dirs = {}
|
132
|
+
@files = {}
|
144
133
|
@non_utf8_paths = {} # Hash: ["path" -> true|false]
|
134
|
+
|
135
|
+
# indicates if path EXISTS in file system.
|
136
|
+
# If true, file will not be removed during removed_unmarked_paths phase.
|
137
|
+
@marked = false
|
138
|
+
|
145
139
|
end
|
146
140
|
|
147
141
|
# add instance while initializing tree using content data from file
|
@@ -163,20 +157,14 @@ module FileMonitoring
|
|
163
157
|
@files = {} unless @files
|
164
158
|
if sub_paths.size-1 == sub_paths_index
|
165
159
|
# Add File case - index points to last entry - leaf case.
|
166
|
-
file_stat = FileStat.new(sub_paths[sub_paths_index],
|
167
|
-
file_stat.set_event_queue(@event_queue)
|
168
|
-
file_stat.size = size
|
169
|
-
file_stat.modification_time = modification_time
|
170
|
-
file_stat.state = FileStatEnum::STABLE
|
160
|
+
file_stat = FileStat.new(sub_paths[sub_paths_index], FileStatEnum::STABLE, size, modification_time, true)
|
171
161
|
add_file(file_stat)
|
172
162
|
else
|
173
163
|
# Add Dir to tree if not present. index points to new dir path.
|
174
164
|
dir_stat = @dirs[sub_paths[sub_paths_index]]
|
175
165
|
#create new dir if not exist
|
176
166
|
unless dir_stat
|
177
|
-
dir_stat = DirStat.new(sub_paths[sub_paths_index]
|
178
|
-
dir_stat.state = FileStatEnum::STABLE
|
179
|
-
dir_stat.set_event_queue(@event_queue)
|
167
|
+
dir_stat = DirStat.new(sub_paths[sub_paths_index])
|
180
168
|
add_dir(dir_stat)
|
181
169
|
end
|
182
170
|
# continue recursive call on tree with next sub path index
|
@@ -228,111 +216,161 @@ module FileMonitoring
|
|
228
216
|
res
|
229
217
|
end
|
230
218
|
|
231
|
-
#
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
else
|
252
|
-
new_state = FileStatEnum::UNCHANGED
|
253
|
-
@cycles += 1
|
254
|
-
if @cycles >= @stable_state
|
255
|
-
new_state = FileStatEnum::STABLE
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
# The assignment
|
260
|
-
set_state(new_state)
|
261
|
-
end
|
262
|
-
|
263
|
-
# Updates the files and directories hashes and globs the directory for changes.
|
264
|
-
def update_dir
|
265
|
-
was_changed = false
|
266
|
-
|
267
|
-
# monitor existing and absent files
|
268
|
-
@files.each_value do |file|
|
269
|
-
file.monitor
|
270
|
-
|
271
|
-
if file.state == FileStatEnum::NON_EXISTING
|
272
|
-
was_changed = true
|
273
|
-
rm_file(file)
|
219
|
+
# Recursively, remove non existing files and dirs in Tree
|
220
|
+
def removed_unmarked_paths
|
221
|
+
#remove dirs
|
222
|
+
dirs_enum = @dirs.each_value
|
223
|
+
loop do
|
224
|
+
dir_stat = dirs_enum.next rescue break
|
225
|
+
if dir_stat.marked
|
226
|
+
dir_stat.marked = false # unset flag for next monitoring\index\remove phase
|
227
|
+
#recursive call
|
228
|
+
dir_stat.removed_unmarked_paths
|
229
|
+
else
|
230
|
+
# directory is not marked. Remove it, since it does not exist.
|
231
|
+
#Log.debug1("Non Existing dir: %s", file_stat.path)
|
232
|
+
@@log.info("NON_EXISTING dir: " + dir_stat.path)
|
233
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
234
|
+
# remove file with changed checksum
|
235
|
+
$local_content_data_lock.synchronize{
|
236
|
+
$local_content_data.remove_directory(dir_stat.path, Params['local_server_name'])
|
237
|
+
}
|
238
|
+
rm_dir(dir_stat)
|
274
239
|
end
|
275
240
|
end
|
276
241
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
242
|
+
#remove files
|
243
|
+
files_enum = @files.each_value
|
244
|
+
loop do
|
245
|
+
file_stat = files_enum.next rescue break
|
246
|
+
if file_stat.marked
|
247
|
+
file_stat.marked = false # unset flag for next monitoring\index\remove phase
|
248
|
+
else
|
249
|
+
# file not marked meaning it is no longer exist. Remove.
|
250
|
+
#Log.debug1("Non Existing file: %s", file_stat.path)
|
251
|
+
@@log.info("NON_EXISTING file: " + file_stat.path)
|
252
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
253
|
+
# remove file with changed checksum
|
254
|
+
$local_content_data_lock.synchronize{
|
255
|
+
$local_content_data.remove_instance(Params['local_server_name'], file_stat.path)
|
256
|
+
}
|
257
|
+
rm_file(file_stat)
|
283
258
|
end
|
284
259
|
end
|
285
|
-
|
286
|
-
was_changed = was_changed || glob_me
|
287
|
-
|
288
|
-
return was_changed
|
289
260
|
end
|
290
261
|
|
291
|
-
#
|
292
|
-
|
293
|
-
|
294
|
-
|
262
|
+
# Recursively, read files and dirs from file system (using Glob)
|
263
|
+
# Handle new files\dirs.
|
264
|
+
# Change state for existing files\dirs
|
265
|
+
# Index stable files
|
266
|
+
# Remove non existing files\dirs is handled in method: remove_unmarked_paths
|
267
|
+
def monitor
|
295
268
|
|
296
|
-
#
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
269
|
+
# Algorithm:
|
270
|
+
# assume that current dir is present
|
271
|
+
# ls (glob) the dir path for child dirs and files
|
272
|
+
# if child file is not already present, add it as new, mark it and handle its state
|
273
|
+
# if file already present, mark it and handle its state.
|
274
|
+
# if child dir is not already present, add it as new, mark it and propagates
|
275
|
+
# the recursive call
|
276
|
+
# if child dir already present, mark it and handle its state
|
277
|
+
# marked files will not be remove in next remove phase
|
278
|
+
|
279
|
+
# ls (glob) the dir path for child dirs and files
|
280
|
+
globed_paths_enum = Dir.glob(@path + "/*").to_enum
|
281
|
+
loop do
|
282
|
+
globed_path = globed_paths_enum.next rescue break
|
283
|
+
|
284
|
+
# UTF-8 - keep only files with names in
|
285
|
+
next if @non_utf8_paths[globed_path]
|
286
|
+
check_utf_8_encoding_file = globed_path.clone
|
301
287
|
unless check_utf_8_encoding_file.force_encoding("UTF-8").valid_encoding?
|
302
288
|
Log.warning("Non UTF-8 file name '#{check_utf_8_encoding_file}', skipping.")
|
303
|
-
@non_utf8_paths[
|
289
|
+
@non_utf8_paths[globed_path]=true
|
290
|
+
check_utf_8_encoding_file=nil
|
304
291
|
next
|
305
292
|
end
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
293
|
+
|
294
|
+
# Get File \ Dir status
|
295
|
+
globed_path_stat = File.lstat(globed_path) rescue next # File or dir removed from OS file system
|
296
|
+
if globed_path_stat.file?
|
297
|
+
# File case
|
298
|
+
child_stat = @files[globed_path]
|
299
|
+
if child_stat
|
300
|
+
# file child exists in Tree
|
301
|
+
child_stat.marked = true
|
302
|
+
if child_stat.changed?(globed_path_stat)
|
303
|
+
# Update changed status
|
304
|
+
child_stat.state = FileStatEnum::CHANGED
|
305
|
+
child_stat.cycles = 0
|
306
|
+
child_stat.size = globed_path_stat.size
|
307
|
+
child_stat.modification_time = globed_path_stat.mtime.to_i
|
308
|
+
@@log.info("CHANGED file: " + globed_path)
|
309
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
310
|
+
#Log.debug1("CHANGED file: #{globed_path}")
|
311
|
+
# remove file with changed checksum. File will be added once indexed
|
312
|
+
$local_content_data_lock.synchronize{
|
313
|
+
$local_content_data.remove_instance(Params['local_server_name'], globed_path)
|
314
|
+
}
|
315
|
+
else
|
316
|
+
# File status is the same
|
317
|
+
if child_stat.state != FileStatEnum::STABLE
|
318
|
+
child_stat.state = FileStatEnum::UNCHANGED
|
319
|
+
child_stat.cycles += 1
|
320
|
+
if child_stat.cycles >= ::FileMonitoring.stable_state
|
321
|
+
child_stat.state = FileStatEnum::STABLE
|
322
|
+
@@log.info("STABLE file: " + globed_path)
|
323
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
324
|
+
else
|
325
|
+
@@log.info("UNCHANGED file: " + globed_path)
|
326
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
else
|
331
|
+
# new File child:
|
332
|
+
child_stat = FileStat.new(globed_path, FileStatEnum::NEW,
|
333
|
+
globed_path_stat.size, globed_path_stat.mtime.to_i)
|
334
|
+
@@log.info("NEW file: " + globed_path)
|
335
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
336
|
+
child_stat.marked = true
|
337
|
+
add_file(child_stat)
|
316
338
|
end
|
317
|
-
else
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
fs.monitor
|
327
|
-
add_file(fs)
|
339
|
+
else
|
340
|
+
# Dir
|
341
|
+
child_stat = @dirs[globed_path]
|
342
|
+
# Add Dir if not exists in Tree
|
343
|
+
unless child_stat
|
344
|
+
child_stat = DirStat.new(globed_path)
|
345
|
+
add_dir(child_stat)
|
346
|
+
@@log.info("NEW dir: " + globed_path)
|
347
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
328
348
|
end
|
349
|
+
child_stat.marked = true
|
350
|
+
#recursive call for dirs
|
351
|
+
child_stat.monitor
|
329
352
|
end
|
330
353
|
end
|
354
|
+
GC.start
|
355
|
+
end
|
356
|
+
|
357
|
+
def index
|
358
|
+
files_enum = @files.each_value
|
359
|
+
index_counter = $indexed_file_count # to check if files where actually indexed
|
360
|
+
loop do
|
361
|
+
file_stat = files_enum.next rescue break
|
362
|
+
file_stat.index # file index
|
363
|
+
end
|
364
|
+
GC.start if index_counter != $indexed_file_count # GC only if files where indexed
|
331
365
|
|
332
|
-
|
366
|
+
dirs_enum = @dirs.each_value
|
367
|
+
loop do
|
368
|
+
dir_stat = dirs_enum.next rescue break
|
369
|
+
dir_stat.index # dir recursive call
|
370
|
+
end
|
333
371
|
end
|
334
372
|
|
335
|
-
protected :add_dir, :add_file, :rm_dir, :rm_file
|
373
|
+
protected :add_dir, :add_file, :rm_dir, :rm_file
|
336
374
|
end
|
337
375
|
|
338
376
|
end
|