content_server 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/content_data/content_data.rb +194 -56
- data/lib/content_data/version.rb +1 -1
- data/lib/content_server/backup_server.rb +4 -27
- data/lib/content_server/content_server.rb +3 -27
- data/lib/content_server/file_streamer.rb +2 -0
- data/lib/content_server/remote_content.rb +1 -2
- data/lib/content_server/server.rb +23 -3
- data/lib/content_server/version.rb +1 -1
- data/lib/email/version.rb +1 -1
- data/lib/file_copy/version.rb +1 -1
- data/lib/file_indexing/index_agent.rb +1 -1
- data/lib/file_indexing/version.rb +1 -1
- data/lib/file_monitoring/file_monitoring.rb +45 -32
- data/lib/file_monitoring/monitor_path.rb +219 -181
- data/lib/file_monitoring/version.rb +1 -1
- data/lib/file_utils/file_generator/file_generator.rb +1 -1
- data/lib/file_utils/file_utils.rb +2 -2
- data/lib/file_utils/version.rb +1 -1
- data/lib/log/version.rb +1 -1
- data/lib/networking/version.rb +1 -1
- data/lib/params/version.rb +1 -1
- data/lib/process_monitoring/version.rb +1 -1
- data/lib/run_in_background/version.rb +1 -1
- data/lib/testing_memory/testing_memory.rb +1 -1
- data/lib/testing_server/testing_server.rb +1 -1
- data/lib/testing_server/version.rb +1 -1
- data/spec/content_data/validations_spec.rb +2 -2
- data/spec/content_server/file_streamer_spec.rb +5 -0
- data/spec/networking/tcp_spec.rb +1 -3
- data/spec/validations/index_validations_spec.rb +2 -2
- data/test/content_data/content_data_test.rb +8 -7
- data/test/file_generator/file_generator_spec.rb +3 -2
- data/test/file_monitoring/monitor_path_test.rb +38 -4
- data/test/file_utils/fileutil_mksymlink_test.rb +9 -0
- data/test/file_utils/time_modification_test.rb +6 -2
- data/test/run_in_background/test_app +17 -15
- metadata +2 -93
- data/lib/content_server/queue_indexer.rb +0 -86
- data/test/file_indexing/index_agent_test.rb +0 -51
- data/test/file_indexing/index_agent_test/New.txt +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/libexslt.dll +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/libxslt.dll +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/xsltproc.exe +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exslt.h +0 -102
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exsltconfig.h +0 -73
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exsltexports.h +0 -140
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/libexslt.h +0 -29
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/attributes.h +0 -38
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/documents.h +0 -93
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/extensions.h +0 -262
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/extra.h +0 -80
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/functions.h +0 -78
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/imports.h +0 -75
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/keys.h +0 -53
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/libxslt.h +0 -30
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/namespaces.h +0 -68
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/numbersInternals.h +0 -69
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/pattern.h +0 -81
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/preproc.h +0 -43
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/security.h +0 -104
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/templates.h +0 -77
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/transform.h +0 -207
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/trio.h +0 -216
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/triodef.h +0 -220
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/variables.h +0 -91
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/win32config.h +0 -101
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xslt.h +0 -103
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltInternals.h +0 -1967
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltconfig.h +0 -172
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltexports.h +0 -142
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltlocale.h +0 -57
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltutils.h +0 -309
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltwin32config.h +0 -105
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libexslt.lib +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libexslt_a.lib +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libxslt.lib +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libxslt_a.lib +0 -0
- data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/readme.txt +0 -22
- data/test/file_indexing/index_agent_test/patterns.input +0 -3
- data/test/file_monitoring/file_monitoring_test.rb +0 -0
- data/test/file_monitoring/file_monitoring_test/conf.yml +0 -4
- data/test/file_monitoring/file_monitoring_test/conf_win32.yml +0 -5
- data/test/file_monitoring/file_monitoring_test/log +0 -56
|
@@ -190,11 +190,13 @@ module ContentServer
|
|
|
190
190
|
handle_new_stream(file_checksum, 0) if !@streams.key?(file_checksum)
|
|
191
191
|
# Finalize the file copy.
|
|
192
192
|
handle_last_chunk(file_checksum)
|
|
193
|
+
return true
|
|
193
194
|
else
|
|
194
195
|
Log.warning("Unexpected receive chuck message. file_checksum:#{file_checksum}, " \
|
|
195
196
|
"content.nil?:#{content.nil?}, content_checksum:#{content_checksum}")
|
|
196
197
|
return false
|
|
197
198
|
end
|
|
199
|
+
Log.error('Code should never reach this point')
|
|
198
200
|
end
|
|
199
201
|
|
|
200
202
|
# open new stream
|
|
@@ -23,7 +23,6 @@ module ContentServer
|
|
|
23
23
|
def initialize(host, port, local_backup_folder)
|
|
24
24
|
@remote_tcp = Networking::TCPClient.new(host, port, method(:receive_content))
|
|
25
25
|
@last_fetch_timestamp = nil
|
|
26
|
-
@last_save_timestamp = nil
|
|
27
26
|
@last_content_data_id = nil
|
|
28
27
|
@content_server_content_data_path = File.join(local_backup_folder, 'remote',
|
|
29
28
|
host + '_' + port.to_s)
|
|
@@ -40,7 +39,7 @@ module ContentServer
|
|
|
40
39
|
|
|
41
40
|
# Update remote content data and write to file if changed ContentData received
|
|
42
41
|
if(message.unique_id != @last_content_data_id)
|
|
43
|
-
path = File.join(@content_server_content_data_path, @
|
|
42
|
+
path = File.join(@content_server_content_data_path, @last_fetch_timestamp.to_s + '.cd')
|
|
44
43
|
FileUtils.makedirs(@content_server_content_data_path) unless \
|
|
45
44
|
File.directory?(@content_server_content_data_path)
|
|
46
45
|
$remote_content_data_lock.synchronize{
|
|
@@ -31,11 +31,12 @@ module ContentServer
|
|
|
31
31
|
$local_content_data_lock = nil
|
|
32
32
|
$remote_content_data_lock = nil
|
|
33
33
|
$remote_content_data = nil
|
|
34
|
+
$last_content_data_id = nil
|
|
34
35
|
end
|
|
35
36
|
|
|
36
37
|
def handle_program_termination(exception)
|
|
37
38
|
#Write exception message to console
|
|
38
|
-
message = "\nInterrupt or Exit happened in server:'
|
|
39
|
+
message = "\nInterrupt or Exit happened in server:''.\n" +
|
|
39
40
|
"Exception type:'#{exception.class}'.\n" +
|
|
40
41
|
"Exception message:'#{exception.message}'.\n" +
|
|
41
42
|
"Stopping process.\n" +
|
|
@@ -74,7 +75,7 @@ module ContentServer
|
|
|
74
75
|
current_objects_counters['DirStat'] = dir_count
|
|
75
76
|
file_count = ObjectSpace.each_object(FileMonitoring::FileStat).count
|
|
76
77
|
$process_vars.set('FileStat count', file_count-dir_count)
|
|
77
|
-
current_objects_counters['FileStat'] = file_count
|
|
78
|
+
current_objects_counters['FileStat'] = file_count
|
|
78
79
|
|
|
79
80
|
# Generate report and update global counters
|
|
80
81
|
report = ""
|
|
@@ -88,5 +89,24 @@ module ContentServer
|
|
|
88
89
|
end
|
|
89
90
|
end
|
|
90
91
|
|
|
91
|
-
|
|
92
|
+
def flush_content_data
|
|
93
|
+
Log.debug1('Start flush local content data to file.')
|
|
94
|
+
$testing_memory_log.info('Start flush content data to file') if $testing_memory_active
|
|
95
|
+
|
|
96
|
+
$local_content_data_lock.synchronize{
|
|
97
|
+
local_content_data_unique_id = $local_content_data.unique_id
|
|
98
|
+
if (local_content_data_unique_id != $last_content_data_id)
|
|
99
|
+
$last_content_data_id = local_content_data_unique_id
|
|
100
|
+
$local_content_data.to_file($tmp_content_data_file)
|
|
101
|
+
File.rename($tmp_content_data_file, Params['local_content_data_path'])
|
|
102
|
+
Log.debug1('End flush local content data to file.')
|
|
103
|
+
$testing_memory_log.info('End flush content data to file') if $testing_memory_active
|
|
104
|
+
else
|
|
105
|
+
Log.debug1('no need to flush. content data has not changed')
|
|
106
|
+
$testing_memory_log.info('no need to flush. content data has not changed') if $testing_memory_active
|
|
107
|
+
end
|
|
108
|
+
}
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
module_function :init_globals, :handle_program_termination, :monitor_general_process_vars, :flush_content_data
|
|
92
112
|
end
|
data/lib/email/version.rb
CHANGED
data/lib/file_copy/version.rb
CHANGED
|
@@ -126,7 +126,7 @@ module FileIndexing
|
|
|
126
126
|
# from further processing (save checksum calculation)
|
|
127
127
|
file_match = false
|
|
128
128
|
otherDB_updated.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
|
129
|
-
if otherDB_updated.instance_exists(file, local_server_name
|
|
129
|
+
if otherDB_updated.instance_exists(file, local_server_name)
|
|
130
130
|
if size == file_stats.size and instance_mod_time == file_mtime.to_i
|
|
131
131
|
@indexed_content.add_instance(checksum, size, server, file, instance_mod_time)
|
|
132
132
|
file_match = true
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
#todo: stable state not working. after 2 cycles...
|
|
2
|
+
|
|
1
3
|
require 'algorithms'
|
|
2
4
|
require 'fileutils'
|
|
3
5
|
require 'log4r'
|
|
@@ -9,12 +11,6 @@ module FileMonitoring
|
|
|
9
11
|
# Manages file monitoring of number of file system locations
|
|
10
12
|
class FileMonitoring
|
|
11
13
|
|
|
12
|
-
# Set event queue used for communication between different proceses.
|
|
13
|
-
# @param queue [Queue]
|
|
14
|
-
def set_event_queue(queue)
|
|
15
|
-
@event_queue = queue
|
|
16
|
-
end
|
|
17
|
-
|
|
18
14
|
# The main method. Loops on all paths, each time span and monitors them.
|
|
19
15
|
#
|
|
20
16
|
# =Algorithm:
|
|
@@ -32,16 +28,16 @@ module FileMonitoring
|
|
|
32
28
|
# create root dirs of monitoring
|
|
33
29
|
dir_stat_array = []
|
|
34
30
|
conf_array.each { |elem|
|
|
35
|
-
dir_stat = DirStat.new(File.expand_path(elem['path'])
|
|
36
|
-
|
|
37
|
-
dir_stat_array.push(dir_stat)
|
|
31
|
+
dir_stat = DirStat.new(File.expand_path(elem['path']))
|
|
32
|
+
dir_stat_array.push([dir_stat, elem['stable_state']])
|
|
38
33
|
}
|
|
39
34
|
|
|
40
35
|
#Look over loaded content data if not empty
|
|
41
36
|
# If file is under monitoring path - Add to DirStat tree as stable with path,size,mod_time read from file
|
|
42
37
|
# If file is NOT under monitoring path - skip (not a valid usage)
|
|
43
38
|
unless $local_content_data.empty?
|
|
44
|
-
Log.info("Start build data base from loaded file")
|
|
39
|
+
Log.info("Start build data base from loaded file. This could take several minutes")
|
|
40
|
+
inst_count = 0
|
|
45
41
|
$local_content_data.each_instance {
|
|
46
42
|
|_, size, _, mod_time, _, path|
|
|
47
43
|
# construct sub paths array from full file path:
|
|
@@ -63,15 +59,19 @@ module FileMonitoring
|
|
|
63
59
|
# if index is found then it the monitor path
|
|
64
60
|
# the next index indicates the next sub path to insert to the tree
|
|
65
61
|
# the index will be raised at each recursive call down the tree
|
|
66
|
-
sub_paths_index = sub_paths.index(dir_stat.path)
|
|
62
|
+
sub_paths_index = sub_paths.index(dir_stat[0].path)
|
|
67
63
|
next if sub_paths_index.nil? # monitor path was not found. skip this instance.
|
|
68
|
-
|
|
69
|
-
|
|
64
|
+
|
|
65
|
+
# monitor path was found. Add to tree
|
|
70
66
|
# start the recursive call with next sub path index
|
|
71
|
-
|
|
67
|
+
::FileMonitoring.stable_state = dir_stat[1]
|
|
68
|
+
inst_count += 1
|
|
69
|
+
dir_stat[0].load_instance(sub_paths, sub_paths_index+1, size, mod_time)
|
|
70
|
+
break
|
|
72
71
|
}
|
|
73
72
|
}
|
|
74
|
-
Log.info("End build data base from loaded file")
|
|
73
|
+
Log.info("End build data base from loaded file. loaded instances:#{inst_count}")
|
|
74
|
+
$last_content_data_id = $local_content_data.unique_id
|
|
75
75
|
end
|
|
76
76
|
|
|
77
77
|
# Directories states stored in the priority queue,
|
|
@@ -81,7 +81,7 @@ module FileMonitoring
|
|
|
81
81
|
conf_array.each_with_index { |elem, index|
|
|
82
82
|
priority = (Time.now + elem['scan_period']).to_i
|
|
83
83
|
#Log.info("File monitoring started for: #{elem}")
|
|
84
|
-
pq.push([priority, elem, dir_stat_array[index]], -priority)
|
|
84
|
+
pq.push([priority, elem, dir_stat_array[index][0]], -priority)
|
|
85
85
|
}
|
|
86
86
|
|
|
87
87
|
#init log4r
|
|
@@ -103,33 +103,46 @@ module FileMonitoring
|
|
|
103
103
|
file_outputter.level = Log4r::INFO
|
|
104
104
|
file_outputter.formatter = formatter
|
|
105
105
|
@log4r.outputters << file_outputter
|
|
106
|
-
|
|
106
|
+
::FileMonitoring::DirStat.set_log(@log4r)
|
|
107
107
|
|
|
108
108
|
while true do
|
|
109
109
|
# pull entry that should be checked next,
|
|
110
110
|
# according to it's scan_period
|
|
111
|
-
time,
|
|
111
|
+
time, elem, dir_stat = pq.pop
|
|
112
112
|
# time remains to wait before directory should be checked
|
|
113
113
|
time_span = time - Time.now.to_i
|
|
114
114
|
if (time_span > 0)
|
|
115
115
|
sleep(time_span)
|
|
116
116
|
end
|
|
117
117
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
$testing_memory_log.info("Start monitor at :#{Time.now}")
|
|
124
|
-
puts "Start monitor at :#{Time.now}"
|
|
125
|
-
dir_stat.monitor
|
|
126
|
-
$testing_memory_log.info("End monitor at :#{Time.now}")
|
|
127
|
-
puts "End monitor at :#{Time.now}"
|
|
128
|
-
end
|
|
118
|
+
# Start monitor
|
|
119
|
+
Log.info("Start monitor path:%s ", dir_stat.path)
|
|
120
|
+
$testing_memory_log.info("Start monitor path:#{dir_stat.path}") if $testing_memory_active
|
|
121
|
+
::FileMonitoring.stable_state=elem['stable_state']
|
|
122
|
+
dir_stat.monitor
|
|
129
123
|
|
|
130
|
-
#
|
|
131
|
-
|
|
132
|
-
|
|
124
|
+
# Start index
|
|
125
|
+
Log.info("Start index path:%s ", dir_stat.path)
|
|
126
|
+
$testing_memory_log.info("Start index path:#{dir_stat.path}") if $testing_memory_active
|
|
127
|
+
dir_stat.index
|
|
128
|
+
|
|
129
|
+
# print number of indexed files
|
|
130
|
+
Log.debug1("indexed file count:%s", $indexed_file_count)
|
|
131
|
+
$testing_memory_log.info("indexed file count: #{$indexed_file_count}") if $testing_memory_active
|
|
132
|
+
|
|
133
|
+
# remove non existing (not marked) files\dirs
|
|
134
|
+
Log.info('Start remove non existing paths')
|
|
135
|
+
$testing_memory_log.info('Start remove non existing paths') if $testing_memory_active
|
|
136
|
+
dir_stat.removed_unmarked_paths
|
|
137
|
+
Log.info('End monitor path and index')
|
|
138
|
+
$testing_memory_log.info('End monitor path and index') if $testing_memory_active
|
|
139
|
+
|
|
140
|
+
#flush content data if changed
|
|
141
|
+
ContentServer.flush_content_data
|
|
142
|
+
|
|
143
|
+
#Add back to queue
|
|
144
|
+
priority = (Time.now + elem['scan_period']).to_i
|
|
145
|
+
pq.push([priority, elem, dir_stat], -priority)
|
|
133
146
|
end
|
|
134
147
|
end
|
|
135
148
|
end
|
|
@@ -11,8 +11,6 @@ module FileMonitoring
|
|
|
11
11
|
# * <tt>CHANGED</tt> - State was changed between two checks
|
|
12
12
|
# * <tt>UNCHANGED</tt> - Opposite to CHANGED
|
|
13
13
|
# * <tt>STABLE</tt> - Entity is in the UNCHANGED state for a defined (by user) number of iterations
|
|
14
|
-
|
|
15
|
-
|
|
16
14
|
class FileStatEnum
|
|
17
15
|
NON_EXISTING = "NON_EXISTING"
|
|
18
16
|
NEW = "NEW"
|
|
@@ -21,76 +19,78 @@ module FileMonitoring
|
|
|
21
19
|
STABLE = "STABLE"
|
|
22
20
|
end
|
|
23
21
|
|
|
22
|
+
# Number of iterations to move state from UNCHANGED to STABLE (for index)
|
|
23
|
+
@@stable_state = 10
|
|
24
|
+
|
|
25
|
+
def self.stable_state=(stable_state)
|
|
26
|
+
@@stable_state = stable_state
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def self.stable_state
|
|
30
|
+
@@stable_state
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
public_class_method :stable_state, :stable_state=
|
|
34
|
+
|
|
24
35
|
# This class holds current state of file and methods to control and report changes
|
|
25
36
|
class FileStat
|
|
26
|
-
|
|
27
|
-
attr_accessor :state, :size, :modification_time
|
|
37
|
+
attr_accessor :path, :state, :size, :modification_time, :marked, :cycles
|
|
28
38
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
@@log = nil
|
|
39
|
+
@@digest = Digest::SHA1.new
|
|
32
40
|
|
|
33
41
|
# Initializes new file monitoring object
|
|
34
42
|
# ==== Arguments:
|
|
35
43
|
#
|
|
36
|
-
# * <tt>path</tt> - File
|
|
37
|
-
# * <tt>
|
|
38
|
-
# * <tt>
|
|
39
|
-
# * <tt>
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
@path
|
|
43
|
-
@size = nil
|
|
44
|
-
@modification_time = nil
|
|
45
|
-
@cycles = 0
|
|
46
|
-
@stable_state = stable_state
|
|
47
|
-
end
|
|
44
|
+
# * <tt>path</tt> - File\Dir path
|
|
45
|
+
# * <tt>state</tt> - state. see class FileStatEnum. Default is NEW
|
|
46
|
+
# * <tt>size</tt> - File size [Byte]. Default is -1 (will be set later during monitor) todo:used?
|
|
47
|
+
# * <tt>mod_time</tt> - file mod time [seconds]. Default is -1 (will be set later during monitor)
|
|
48
|
+
def initialize(path, state=FileStatEnum::NEW, size=-1, mod_time=-1, indexed=false)
|
|
49
|
+
# File\Dir path
|
|
50
|
+
@path = path
|
|
48
51
|
|
|
49
|
-
|
|
50
|
-
@
|
|
51
|
-
end
|
|
52
|
+
# File size
|
|
53
|
+
@size = size
|
|
52
54
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
55
|
+
# File modification time
|
|
56
|
+
@modification_time = mod_time
|
|
57
|
+
|
|
58
|
+
# File sate. see class FileStatEnum for states.
|
|
59
|
+
@state = state
|
|
60
|
+
|
|
61
|
+
# indicates if path EXISTS in file system.
|
|
62
|
+
# If true, file will not be removed during removed_unmarked_paths phase.
|
|
63
|
+
@marked = false
|
|
64
|
+
|
|
65
|
+
# Number of times that file was monitored and not changed.
|
|
66
|
+
# When @cycles exceeds ::FileMonitoring::stable_state, @state is set to Stable and can be indexed.
|
|
67
|
+
@cycles = 0
|
|
68
|
+
|
|
69
|
+
# flag to indicate if file was indexed
|
|
70
|
+
@indexed = indexed
|
|
59
71
|
end
|
|
60
72
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
@size = file_stats.size
|
|
81
|
-
#@creation_time = file_stats.ctime.utc
|
|
82
|
-
@modification_time = file_stats.mtime.to_i
|
|
83
|
-
@cycles = 0
|
|
84
|
-
else
|
|
85
|
-
new_state = FileStatEnum::UNCHANGED
|
|
86
|
-
@cycles += 1
|
|
87
|
-
if @cycles >= @stable_state
|
|
88
|
-
new_state = FileStatEnum::STABLE
|
|
73
|
+
def index
|
|
74
|
+
if !@indexed and FileStatEnum::STABLE == @state
|
|
75
|
+
#index file
|
|
76
|
+
@@digest.reset
|
|
77
|
+
begin
|
|
78
|
+
File.open(@path, 'rb') { |f|
|
|
79
|
+
while buffer = f.read(16384) do
|
|
80
|
+
@@digest << buffer
|
|
81
|
+
end
|
|
82
|
+
}
|
|
83
|
+
$local_content_data_lock.synchronize{
|
|
84
|
+
$local_content_data.add_instance(@@digest.hexdigest.downcase, @size, Params['local_server_name'],
|
|
85
|
+
@path, @modification_time)
|
|
86
|
+
}
|
|
87
|
+
#$process_vars.inc('indexed_files')
|
|
88
|
+
$indexed_file_count += 1
|
|
89
|
+
@indexed = true
|
|
90
|
+
rescue
|
|
91
|
+
Log.warning("Indexed path'#{@path}' does not exist. Probably file changed")
|
|
89
92
|
end
|
|
90
93
|
end
|
|
91
|
-
|
|
92
|
-
# The assignment
|
|
93
|
-
set_state(new_state)
|
|
94
94
|
end
|
|
95
95
|
|
|
96
96
|
# Checks that stored file attributes are the same as file attributes taken from file system.
|
|
@@ -99,29 +99,9 @@ module FileMonitoring
|
|
|
99
99
|
(file_stats.mtime.to_i == @modification_time))
|
|
100
100
|
end
|
|
101
101
|
|
|
102
|
-
def set_event_queue(queue)
|
|
103
|
-
@event_queue = queue
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
# Sets and writes to the log a new state.
|
|
107
|
-
def set_state(new_state)
|
|
108
|
-
if (@state != new_state or @state == FileStatEnum::CHANGED)
|
|
109
|
-
@state = new_state
|
|
110
|
-
if (@@log)
|
|
111
|
-
@@log.info(state + ": " + path)
|
|
112
|
-
@@log.outputters[0].flush if Params['log_flush_each_message']
|
|
113
|
-
end
|
|
114
|
-
if @event_queue and FileStatEnum::NEW != @state # NEW state is ignored in indexer
|
|
115
|
-
Log.debug1("Writing to event queue [%s, %s]", @state, @path)
|
|
116
|
-
@event_queue.push([@state, self.instance_of?(DirStat), @path, @modification_time, @size])
|
|
117
|
-
$process_vars.set('monitor to index queue size', @event_queue.size)
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
|
|
122
102
|
# Checks whether path and state are the same as of the argument
|
|
123
103
|
def == (other)
|
|
124
|
-
@path == other.path
|
|
104
|
+
@path == other.path
|
|
125
105
|
end
|
|
126
106
|
|
|
127
107
|
# Returns path and state of the file with indentation
|
|
@@ -131,17 +111,31 @@ module FileMonitoring
|
|
|
131
111
|
end
|
|
132
112
|
|
|
133
113
|
# This class holds current state of directory and methods to control changes
|
|
134
|
-
class DirStat
|
|
114
|
+
class DirStat
|
|
115
|
+
attr_accessor :path, :marked
|
|
116
|
+
|
|
117
|
+
@@log = nil
|
|
118
|
+
|
|
119
|
+
def self.set_log (log)
|
|
120
|
+
@@log = log
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
public_class_method :set_log
|
|
124
|
+
|
|
135
125
|
# Initializes new directory monitoring object
|
|
136
126
|
# ==== Arguments:
|
|
137
127
|
#
|
|
138
|
-
# * <tt>path</tt> -
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
@
|
|
143
|
-
@files = nil # Hash: ["path" -> FileStat]
|
|
128
|
+
# * <tt>path</tt> - Dir location
|
|
129
|
+
def initialize(path)
|
|
130
|
+
@path = path
|
|
131
|
+
@dirs = {}
|
|
132
|
+
@files = {}
|
|
144
133
|
@non_utf8_paths = {} # Hash: ["path" -> true|false]
|
|
134
|
+
|
|
135
|
+
# indicates if path EXISTS in file system.
|
|
136
|
+
# If true, file will not be removed during removed_unmarked_paths phase.
|
|
137
|
+
@marked = false
|
|
138
|
+
|
|
145
139
|
end
|
|
146
140
|
|
|
147
141
|
# add instance while initializing tree using content data from file
|
|
@@ -163,20 +157,14 @@ module FileMonitoring
|
|
|
163
157
|
@files = {} unless @files
|
|
164
158
|
if sub_paths.size-1 == sub_paths_index
|
|
165
159
|
# Add File case - index points to last entry - leaf case.
|
|
166
|
-
file_stat = FileStat.new(sub_paths[sub_paths_index],
|
|
167
|
-
file_stat.set_event_queue(@event_queue)
|
|
168
|
-
file_stat.size = size
|
|
169
|
-
file_stat.modification_time = modification_time
|
|
170
|
-
file_stat.state = FileStatEnum::STABLE
|
|
160
|
+
file_stat = FileStat.new(sub_paths[sub_paths_index], FileStatEnum::STABLE, size, modification_time, true)
|
|
171
161
|
add_file(file_stat)
|
|
172
162
|
else
|
|
173
163
|
# Add Dir to tree if not present. index points to new dir path.
|
|
174
164
|
dir_stat = @dirs[sub_paths[sub_paths_index]]
|
|
175
165
|
#create new dir if not exist
|
|
176
166
|
unless dir_stat
|
|
177
|
-
dir_stat = DirStat.new(sub_paths[sub_paths_index]
|
|
178
|
-
dir_stat.state = FileStatEnum::STABLE
|
|
179
|
-
dir_stat.set_event_queue(@event_queue)
|
|
167
|
+
dir_stat = DirStat.new(sub_paths[sub_paths_index])
|
|
180
168
|
add_dir(dir_stat)
|
|
181
169
|
end
|
|
182
170
|
# continue recursive call on tree with next sub path index
|
|
@@ -228,111 +216,161 @@ module FileMonitoring
|
|
|
228
216
|
res
|
|
229
217
|
end
|
|
230
218
|
|
|
231
|
-
#
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
else
|
|
252
|
-
new_state = FileStatEnum::UNCHANGED
|
|
253
|
-
@cycles += 1
|
|
254
|
-
if @cycles >= @stable_state
|
|
255
|
-
new_state = FileStatEnum::STABLE
|
|
256
|
-
end
|
|
257
|
-
end
|
|
258
|
-
|
|
259
|
-
# The assignment
|
|
260
|
-
set_state(new_state)
|
|
261
|
-
end
|
|
262
|
-
|
|
263
|
-
# Updates the files and directories hashes and globs the directory for changes.
|
|
264
|
-
def update_dir
|
|
265
|
-
was_changed = false
|
|
266
|
-
|
|
267
|
-
# monitor existing and absent files
|
|
268
|
-
@files.each_value do |file|
|
|
269
|
-
file.monitor
|
|
270
|
-
|
|
271
|
-
if file.state == FileStatEnum::NON_EXISTING
|
|
272
|
-
was_changed = true
|
|
273
|
-
rm_file(file)
|
|
219
|
+
# Recursively, remove non existing files and dirs in Tree
|
|
220
|
+
def removed_unmarked_paths
|
|
221
|
+
#remove dirs
|
|
222
|
+
dirs_enum = @dirs.each_value
|
|
223
|
+
loop do
|
|
224
|
+
dir_stat = dirs_enum.next rescue break
|
|
225
|
+
if dir_stat.marked
|
|
226
|
+
dir_stat.marked = false # unset flag for next monitoring\index\remove phase
|
|
227
|
+
#recursive call
|
|
228
|
+
dir_stat.removed_unmarked_paths
|
|
229
|
+
else
|
|
230
|
+
# directory is not marked. Remove it, since it does not exist.
|
|
231
|
+
#Log.debug1("Non Existing dir: %s", file_stat.path)
|
|
232
|
+
@@log.info("NON_EXISTING dir: " + dir_stat.path)
|
|
233
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
|
234
|
+
# remove file with changed checksum
|
|
235
|
+
$local_content_data_lock.synchronize{
|
|
236
|
+
$local_content_data.remove_directory(dir_stat.path, Params['local_server_name'])
|
|
237
|
+
}
|
|
238
|
+
rm_dir(dir_stat)
|
|
274
239
|
end
|
|
275
240
|
end
|
|
276
241
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
242
|
+
#remove files
|
|
243
|
+
files_enum = @files.each_value
|
|
244
|
+
loop do
|
|
245
|
+
file_stat = files_enum.next rescue break
|
|
246
|
+
if file_stat.marked
|
|
247
|
+
file_stat.marked = false # unset flag for next monitoring\index\remove phase
|
|
248
|
+
else
|
|
249
|
+
# file not marked meaning it is no longer exist. Remove.
|
|
250
|
+
#Log.debug1("Non Existing file: %s", file_stat.path)
|
|
251
|
+
@@log.info("NON_EXISTING file: " + file_stat.path)
|
|
252
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
|
253
|
+
# remove file with changed checksum
|
|
254
|
+
$local_content_data_lock.synchronize{
|
|
255
|
+
$local_content_data.remove_instance(Params['local_server_name'], file_stat.path)
|
|
256
|
+
}
|
|
257
|
+
rm_file(file_stat)
|
|
283
258
|
end
|
|
284
259
|
end
|
|
285
|
-
|
|
286
|
-
was_changed = was_changed || glob_me
|
|
287
|
-
|
|
288
|
-
return was_changed
|
|
289
260
|
end
|
|
290
261
|
|
|
291
|
-
#
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
262
|
+
# Recursively, read files and dirs from file system (using Glob)
|
|
263
|
+
# Handle new files\dirs.
|
|
264
|
+
# Change state for existing files\dirs
|
|
265
|
+
# Index stable files
|
|
266
|
+
# Remove non existing files\dirs is handled in method: remove_unmarked_paths
|
|
267
|
+
def monitor
|
|
295
268
|
|
|
296
|
-
#
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
269
|
+
# Algorithm:
|
|
270
|
+
# assume that current dir is present
|
|
271
|
+
# ls (glob) the dir path for child dirs and files
|
|
272
|
+
# if child file is not already present, add it as new, mark it and handle its state
|
|
273
|
+
# if file already present, mark it and handle its state.
|
|
274
|
+
# if child dir is not already present, add it as new, mark it and propagates
|
|
275
|
+
# the recursive call
|
|
276
|
+
# if child dir already present, mark it and handle its state
|
|
277
|
+
# marked files will not be remove in next remove phase
|
|
278
|
+
|
|
279
|
+
# ls (glob) the dir path for child dirs and files
|
|
280
|
+
globed_paths_enum = Dir.glob(@path + "/*").to_enum
|
|
281
|
+
loop do
|
|
282
|
+
globed_path = globed_paths_enum.next rescue break
|
|
283
|
+
|
|
284
|
+
# UTF-8 - keep only files with names in
|
|
285
|
+
next if @non_utf8_paths[globed_path]
|
|
286
|
+
check_utf_8_encoding_file = globed_path.clone
|
|
301
287
|
unless check_utf_8_encoding_file.force_encoding("UTF-8").valid_encoding?
|
|
302
288
|
Log.warning("Non UTF-8 file name '#{check_utf_8_encoding_file}', skipping.")
|
|
303
|
-
@non_utf8_paths[
|
|
289
|
+
@non_utf8_paths[globed_path]=true
|
|
290
|
+
check_utf_8_encoding_file=nil
|
|
304
291
|
next
|
|
305
292
|
end
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
293
|
+
|
|
294
|
+
# Get File \ Dir status
|
|
295
|
+
globed_path_stat = File.lstat(globed_path) rescue next # File or dir removed from OS file system
|
|
296
|
+
if globed_path_stat.file?
|
|
297
|
+
# File case
|
|
298
|
+
child_stat = @files[globed_path]
|
|
299
|
+
if child_stat
|
|
300
|
+
# file child exists in Tree
|
|
301
|
+
child_stat.marked = true
|
|
302
|
+
if child_stat.changed?(globed_path_stat)
|
|
303
|
+
# Update changed status
|
|
304
|
+
child_stat.state = FileStatEnum::CHANGED
|
|
305
|
+
child_stat.cycles = 0
|
|
306
|
+
child_stat.size = globed_path_stat.size
|
|
307
|
+
child_stat.modification_time = globed_path_stat.mtime.to_i
|
|
308
|
+
@@log.info("CHANGED file: " + globed_path)
|
|
309
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
|
310
|
+
#Log.debug1("CHANGED file: #{globed_path}")
|
|
311
|
+
# remove file with changed checksum. File will be added once indexed
|
|
312
|
+
$local_content_data_lock.synchronize{
|
|
313
|
+
$local_content_data.remove_instance(Params['local_server_name'], globed_path)
|
|
314
|
+
}
|
|
315
|
+
else
|
|
316
|
+
# File status is the same
|
|
317
|
+
if child_stat.state != FileStatEnum::STABLE
|
|
318
|
+
child_stat.state = FileStatEnum::UNCHANGED
|
|
319
|
+
child_stat.cycles += 1
|
|
320
|
+
if child_stat.cycles >= ::FileMonitoring.stable_state
|
|
321
|
+
child_stat.state = FileStatEnum::STABLE
|
|
322
|
+
@@log.info("STABLE file: " + globed_path)
|
|
323
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
|
324
|
+
else
|
|
325
|
+
@@log.info("UNCHANGED file: " + globed_path)
|
|
326
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
else
|
|
331
|
+
# new File child:
|
|
332
|
+
child_stat = FileStat.new(globed_path, FileStatEnum::NEW,
|
|
333
|
+
globed_path_stat.size, globed_path_stat.mtime.to_i)
|
|
334
|
+
@@log.info("NEW file: " + globed_path)
|
|
335
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
|
336
|
+
child_stat.marked = true
|
|
337
|
+
add_file(child_stat)
|
|
316
338
|
end
|
|
317
|
-
else
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
fs.monitor
|
|
327
|
-
add_file(fs)
|
|
339
|
+
else
|
|
340
|
+
# Dir
|
|
341
|
+
child_stat = @dirs[globed_path]
|
|
342
|
+
# Add Dir if not exists in Tree
|
|
343
|
+
unless child_stat
|
|
344
|
+
child_stat = DirStat.new(globed_path)
|
|
345
|
+
add_dir(child_stat)
|
|
346
|
+
@@log.info("NEW dir: " + globed_path)
|
|
347
|
+
@@log.outputters[0].flush if Params['log_flush_each_message']
|
|
328
348
|
end
|
|
349
|
+
child_stat.marked = true
|
|
350
|
+
#recursive call for dirs
|
|
351
|
+
child_stat.monitor
|
|
329
352
|
end
|
|
330
353
|
end
|
|
354
|
+
GC.start
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
def index
|
|
358
|
+
files_enum = @files.each_value
|
|
359
|
+
index_counter = $indexed_file_count # to check if files where actually indexed
|
|
360
|
+
loop do
|
|
361
|
+
file_stat = files_enum.next rescue break
|
|
362
|
+
file_stat.index # file index
|
|
363
|
+
end
|
|
364
|
+
GC.start if index_counter != $indexed_file_count # GC only if files where indexed
|
|
331
365
|
|
|
332
|
-
|
|
366
|
+
dirs_enum = @dirs.each_value
|
|
367
|
+
loop do
|
|
368
|
+
dir_stat = dirs_enum.next rescue break
|
|
369
|
+
dir_stat.index # dir recursive call
|
|
370
|
+
end
|
|
333
371
|
end
|
|
334
372
|
|
|
335
|
-
protected :add_dir, :add_file, :rm_dir, :rm_file
|
|
373
|
+
protected :add_dir, :add_file, :rm_dir, :rm_file
|
|
336
374
|
end
|
|
337
375
|
|
|
338
376
|
end
|