content_server 1.3.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. data/lib/content_data/content_data.rb +194 -56
  2. data/lib/content_data/version.rb +1 -1
  3. data/lib/content_server/backup_server.rb +4 -27
  4. data/lib/content_server/content_server.rb +3 -27
  5. data/lib/content_server/file_streamer.rb +2 -0
  6. data/lib/content_server/remote_content.rb +1 -2
  7. data/lib/content_server/server.rb +23 -3
  8. data/lib/content_server/version.rb +1 -1
  9. data/lib/email/version.rb +1 -1
  10. data/lib/file_copy/version.rb +1 -1
  11. data/lib/file_indexing/index_agent.rb +1 -1
  12. data/lib/file_indexing/version.rb +1 -1
  13. data/lib/file_monitoring/file_monitoring.rb +45 -32
  14. data/lib/file_monitoring/monitor_path.rb +219 -181
  15. data/lib/file_monitoring/version.rb +1 -1
  16. data/lib/file_utils/file_generator/file_generator.rb +1 -1
  17. data/lib/file_utils/file_utils.rb +2 -2
  18. data/lib/file_utils/version.rb +1 -1
  19. data/lib/log/version.rb +1 -1
  20. data/lib/networking/version.rb +1 -1
  21. data/lib/params/version.rb +1 -1
  22. data/lib/process_monitoring/version.rb +1 -1
  23. data/lib/run_in_background/version.rb +1 -1
  24. data/lib/testing_memory/testing_memory.rb +1 -1
  25. data/lib/testing_server/testing_server.rb +1 -1
  26. data/lib/testing_server/version.rb +1 -1
  27. data/spec/content_data/validations_spec.rb +2 -2
  28. data/spec/content_server/file_streamer_spec.rb +5 -0
  29. data/spec/networking/tcp_spec.rb +1 -3
  30. data/spec/validations/index_validations_spec.rb +2 -2
  31. data/test/content_data/content_data_test.rb +8 -7
  32. data/test/file_generator/file_generator_spec.rb +3 -2
  33. data/test/file_monitoring/monitor_path_test.rb +38 -4
  34. data/test/file_utils/fileutil_mksymlink_test.rb +9 -0
  35. data/test/file_utils/time_modification_test.rb +6 -2
  36. data/test/run_in_background/test_app +17 -15
  37. metadata +2 -93
  38. data/lib/content_server/queue_indexer.rb +0 -86
  39. data/test/file_indexing/index_agent_test.rb +0 -51
  40. data/test/file_indexing/index_agent_test/New.txt +0 -0
  41. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/libexslt.dll +0 -0
  42. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/libxslt.dll +0 -0
  43. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/xsltproc.exe +0 -0
  44. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exslt.h +0 -102
  45. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exsltconfig.h +0 -73
  46. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exsltexports.h +0 -140
  47. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/libexslt.h +0 -29
  48. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/attributes.h +0 -38
  49. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/documents.h +0 -93
  50. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/extensions.h +0 -262
  51. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/extra.h +0 -80
  52. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/functions.h +0 -78
  53. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/imports.h +0 -75
  54. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/keys.h +0 -53
  55. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/libxslt.h +0 -30
  56. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/namespaces.h +0 -68
  57. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/numbersInternals.h +0 -69
  58. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/pattern.h +0 -81
  59. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/preproc.h +0 -43
  60. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/security.h +0 -104
  61. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/templates.h +0 -77
  62. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/transform.h +0 -207
  63. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/trio.h +0 -216
  64. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/triodef.h +0 -220
  65. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/variables.h +0 -91
  66. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/win32config.h +0 -101
  67. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xslt.h +0 -103
  68. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltInternals.h +0 -1967
  69. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltconfig.h +0 -172
  70. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltexports.h +0 -142
  71. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltlocale.h +0 -57
  72. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltutils.h +0 -309
  73. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltwin32config.h +0 -105
  74. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libexslt.lib +0 -0
  75. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libexslt_a.lib +0 -0
  76. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libxslt.lib +0 -0
  77. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libxslt_a.lib +0 -0
  78. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/readme.txt +0 -22
  79. data/test/file_indexing/index_agent_test/patterns.input +0 -3
  80. data/test/file_monitoring/file_monitoring_test.rb +0 -0
  81. data/test/file_monitoring/file_monitoring_test/conf.yml +0 -4
  82. data/test/file_monitoring/file_monitoring_test/conf_win32.yml +0 -5
  83. data/test/file_monitoring/file_monitoring_test/log +0 -56
@@ -190,11 +190,13 @@ module ContentServer
190
190
  handle_new_stream(file_checksum, 0) if !@streams.key?(file_checksum)
191
191
  # Finalize the file copy.
192
192
  handle_last_chunk(file_checksum)
193
+ return true
193
194
  else
194
195
  Log.warning("Unexpected receive chuck message. file_checksum:#{file_checksum}, " \
195
196
  "content.nil?:#{content.nil?}, content_checksum:#{content_checksum}")
196
197
  return false
197
198
  end
199
+ Log.error('Code should never reach this point')
198
200
  end
199
201
 
200
202
  # open new stream
@@ -23,7 +23,6 @@ module ContentServer
23
23
  def initialize(host, port, local_backup_folder)
24
24
  @remote_tcp = Networking::TCPClient.new(host, port, method(:receive_content))
25
25
  @last_fetch_timestamp = nil
26
- @last_save_timestamp = nil
27
26
  @last_content_data_id = nil
28
27
  @content_server_content_data_path = File.join(local_backup_folder, 'remote',
29
28
  host + '_' + port.to_s)
@@ -40,7 +39,7 @@ module ContentServer
40
39
 
41
40
  # Update remote content data and write to file if changed ContentData received
42
41
  if(message.unique_id != @last_content_data_id)
43
- path = File.join(@content_server_content_data_path, @last_save_timestamp.to_s + '.cd')
42
+ path = File.join(@content_server_content_data_path, @last_fetch_timestamp.to_s + '.cd')
44
43
  FileUtils.makedirs(@content_server_content_data_path) unless \
45
44
  File.directory?(@content_server_content_data_path)
46
45
  $remote_content_data_lock.synchronize{
@@ -31,11 +31,12 @@ module ContentServer
31
31
  $local_content_data_lock = nil
32
32
  $remote_content_data_lock = nil
33
33
  $remote_content_data = nil
34
+ $last_content_data_id = nil
34
35
  end
35
36
 
36
37
  def handle_program_termination(exception)
37
38
  #Write exception message to console
38
- message = "\nInterrupt or Exit happened in server:'#{Params['service_name']}'.\n" +
39
+ message = "\nInterrupt or Exit happened in server:''.\n" +
39
40
  "Exception type:'#{exception.class}'.\n" +
40
41
  "Exception message:'#{exception.message}'.\n" +
41
42
  "Stopping process.\n" +
@@ -74,7 +75,7 @@ module ContentServer
74
75
  current_objects_counters['DirStat'] = dir_count
75
76
  file_count = ObjectSpace.each_object(FileMonitoring::FileStat).count
76
77
  $process_vars.set('FileStat count', file_count-dir_count)
77
- current_objects_counters['FileStat'] = file_count-dir_count
78
+ current_objects_counters['FileStat'] = file_count
78
79
 
79
80
  # Generate report and update global counters
80
81
  report = ""
@@ -88,5 +89,24 @@ module ContentServer
88
89
  end
89
90
  end
90
91
 
91
- module_function :init_globals, :handle_program_termination, :monitor_general_process_vars
92
+ def flush_content_data
93
+ Log.debug1('Start flush local content data to file.')
94
+ $testing_memory_log.info('Start flush content data to file') if $testing_memory_active
95
+
96
+ $local_content_data_lock.synchronize{
97
+ local_content_data_unique_id = $local_content_data.unique_id
98
+ if (local_content_data_unique_id != $last_content_data_id)
99
+ $last_content_data_id = local_content_data_unique_id
100
+ $local_content_data.to_file($tmp_content_data_file)
101
+ File.rename($tmp_content_data_file, Params['local_content_data_path'])
102
+ Log.debug1('End flush local content data to file.')
103
+ $testing_memory_log.info('End flush content data to file') if $testing_memory_active
104
+ else
105
+ Log.debug1('no need to flush. content data has not changed')
106
+ $testing_memory_log.info('no need to flush. content data has not changed') if $testing_memory_active
107
+ end
108
+ }
109
+ end
110
+
111
+ module_function :init_globals, :handle_program_termination, :monitor_general_process_vars, :flush_content_data
92
112
  end
@@ -1,3 +1,3 @@
1
1
  module ContentServer
2
- VERSION = "1.3.1"
2
+ VERSION = "1.4.0"
3
3
  end
@@ -1,3 +1,3 @@
1
1
  module SendEmail
2
- VERSION = "1.1.0"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -1,4 +1,4 @@
1
1
  module FileCopy
2
- VERSION = "1.1.0"
2
+ VERSION = "1.2.0"
3
3
  end
4
4
 
@@ -126,7 +126,7 @@ module FileIndexing
126
126
  # from further processing (save checksum calculation)
127
127
  file_match = false
128
128
  otherDB_updated.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
129
- if otherDB_updated.instance_exists(file, local_server_name, checksum)
129
+ if otherDB_updated.instance_exists(file, local_server_name)
130
130
  if size == file_stats.size and instance_mod_time == file_mtime.to_i
131
131
  @indexed_content.add_instance(checksum, size, server, file, instance_mod_time)
132
132
  file_match = true
@@ -1,3 +1,3 @@
1
1
  module FileIndexing
2
- VERSION = "1.1.0"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -1,3 +1,5 @@
1
+ #todo: stable state not working. after 2 cycles...
2
+
1
3
  require 'algorithms'
2
4
  require 'fileutils'
3
5
  require 'log4r'
@@ -9,12 +11,6 @@ module FileMonitoring
9
11
  # Manages file monitoring of number of file system locations
10
12
  class FileMonitoring
11
13
 
12
- # Set event queue used for communication between different proceses.
13
- # @param queue [Queue]
14
- def set_event_queue(queue)
15
- @event_queue = queue
16
- end
17
-
18
14
  # The main method. Loops on all paths, each time span and monitors them.
19
15
  #
20
16
  # =Algorithm:
@@ -32,16 +28,16 @@ module FileMonitoring
32
28
  # create root dirs of monitoring
33
29
  dir_stat_array = []
34
30
  conf_array.each { |elem|
35
- dir_stat = DirStat.new(File.expand_path(elem['path']), elem['stable_state'])
36
- dir_stat.set_event_queue(@event_queue) if @event_queue
37
- dir_stat_array.push(dir_stat)
31
+ dir_stat = DirStat.new(File.expand_path(elem['path']))
32
+ dir_stat_array.push([dir_stat, elem['stable_state']])
38
33
  }
39
34
 
40
35
  #Look over loaded content data if not empty
41
36
  # If file is under monitoring path - Add to DirStat tree as stable with path,size,mod_time read from file
42
37
  # If file is NOT under monitoring path - skip (not a valid usage)
43
38
  unless $local_content_data.empty?
44
- Log.info("Start build data base from loaded file")
39
+ Log.info("Start build data base from loaded file. This could take several minutes")
40
+ inst_count = 0
45
41
  $local_content_data.each_instance {
46
42
  |_, size, _, mod_time, _, path|
47
43
  # construct sub paths array from full file path:
@@ -63,15 +59,19 @@ module FileMonitoring
63
59
  # if index is found then it the monitor path
64
60
  # the next index indicates the next sub path to insert to the tree
65
61
  # the index will be raised at each recursive call down the tree
66
- sub_paths_index = sub_paths.index(dir_stat.path)
62
+ sub_paths_index = sub_paths.index(dir_stat[0].path)
67
63
  next if sub_paths_index.nil? # monitor path was not found. skip this instance.
68
- # monitor path was found. Add to tree as stable.
69
- dir_stat.state = FileStatEnum::STABLE
64
+
65
+ # monitor path was found. Add to tree
70
66
  # start the recursive call with next sub path index
71
- dir_stat.load_instance(sub_paths, sub_paths_index+1, size, mod_time)
67
+ ::FileMonitoring.stable_state = dir_stat[1]
68
+ inst_count += 1
69
+ dir_stat[0].load_instance(sub_paths, sub_paths_index+1, size, mod_time)
70
+ break
72
71
  }
73
72
  }
74
- Log.info("End build data base from loaded file")
73
+ Log.info("End build data base from loaded file. loaded instances:#{inst_count}")
74
+ $last_content_data_id = $local_content_data.unique_id
75
75
  end
76
76
 
77
77
  # Directories states stored in the priority queue,
@@ -81,7 +81,7 @@ module FileMonitoring
81
81
  conf_array.each_with_index { |elem, index|
82
82
  priority = (Time.now + elem['scan_period']).to_i
83
83
  #Log.info("File monitoring started for: #{elem}")
84
- pq.push([priority, elem, dir_stat_array[index]], -priority)
84
+ pq.push([priority, elem, dir_stat_array[index][0]], -priority)
85
85
  }
86
86
 
87
87
  #init log4r
@@ -103,33 +103,46 @@ module FileMonitoring
103
103
  file_outputter.level = Log4r::INFO
104
104
  file_outputter.formatter = formatter
105
105
  @log4r.outputters << file_outputter
106
- FileStat.set_log(@log4r)
106
+ ::FileMonitoring::DirStat.set_log(@log4r)
107
107
 
108
108
  while true do
109
109
  # pull entry that should be checked next,
110
110
  # according to it's scan_period
111
- time, conf, dir_stat = pq.pop
111
+ time, elem, dir_stat = pq.pop
112
112
  # time remains to wait before directory should be checked
113
113
  time_span = time - Time.now.to_i
114
114
  if (time_span > 0)
115
115
  sleep(time_span)
116
116
  end
117
117
 
118
- unless $testing_memory_active
119
- Log.info("Start monitor for dir:%s", dir_stat.path)
120
- dir_stat.monitor
121
- Log.info("End monitor for dir:%s", dir_stat.path)
122
- else
123
- $testing_memory_log.info("Start monitor at :#{Time.now}")
124
- puts "Start monitor at :#{Time.now}"
125
- dir_stat.monitor
126
- $testing_memory_log.info("End monitor at :#{Time.now}")
127
- puts "End monitor at :#{Time.now}"
128
- end
118
+ # Start monitor
119
+ Log.info("Start monitor path:%s ", dir_stat.path)
120
+ $testing_memory_log.info("Start monitor path:#{dir_stat.path}") if $testing_memory_active
121
+ ::FileMonitoring.stable_state=elem['stable_state']
122
+ dir_stat.monitor
129
123
 
130
- # push entry with new a next time it should be checked as a priority key
131
- priority = (Time.now + conf['scan_period']).to_i
132
- pq.push([priority, conf, dir_stat], -priority)
124
+ # Start index
125
+ Log.info("Start index path:%s ", dir_stat.path)
126
+ $testing_memory_log.info("Start index path:#{dir_stat.path}") if $testing_memory_active
127
+ dir_stat.index
128
+
129
+ # print number of indexed files
130
+ Log.debug1("indexed file count:%s", $indexed_file_count)
131
+ $testing_memory_log.info("indexed file count: #{$indexed_file_count}") if $testing_memory_active
132
+
133
+ # remove non existing (not marked) files\dirs
134
+ Log.info('Start remove non existing paths')
135
+ $testing_memory_log.info('Start remove non existing paths') if $testing_memory_active
136
+ dir_stat.removed_unmarked_paths
137
+ Log.info('End monitor path and index')
138
+ $testing_memory_log.info('End monitor path and index') if $testing_memory_active
139
+
140
+ #flush content data if changed
141
+ ContentServer.flush_content_data
142
+
143
+ #Add back to queue
144
+ priority = (Time.now + elem['scan_period']).to_i
145
+ pq.push([priority, elem, dir_stat], -priority)
133
146
  end
134
147
  end
135
148
  end
@@ -11,8 +11,6 @@ module FileMonitoring
11
11
  # * <tt>CHANGED</tt> - State was changed between two checks
12
12
  # * <tt>UNCHANGED</tt> - Opposite to CHANGED
13
13
  # * <tt>STABLE</tt> - Entity is in the UNCHANGED state for a defined (by user) number of iterations
14
-
15
-
16
14
  class FileStatEnum
17
15
  NON_EXISTING = "NON_EXISTING"
18
16
  NEW = "NEW"
@@ -21,76 +19,78 @@ module FileMonitoring
21
19
  STABLE = "STABLE"
22
20
  end
23
21
 
22
+ # Number of iterations to move state from UNCHANGED to STABLE (for index)
23
+ @@stable_state = 10
24
+
25
+ def self.stable_state=(stable_state)
26
+ @@stable_state = stable_state
27
+ end
28
+
29
+ def self.stable_state
30
+ @@stable_state
31
+ end
32
+
33
+ public_class_method :stable_state, :stable_state=
34
+
24
35
  # This class holds current state of file and methods to control and report changes
25
36
  class FileStat
26
- attr_reader :cycles, :path, :stable_state
27
- attr_accessor :state, :size, :modification_time
37
+ attr_accessor :path, :state, :size, :modification_time, :marked, :cycles
28
38
 
29
- DEFAULT_STABLE_STATE = 10
30
-
31
- @@log = nil
39
+ @@digest = Digest::SHA1.new
32
40
 
33
41
  # Initializes new file monitoring object
34
42
  # ==== Arguments:
35
43
  #
36
- # * <tt>path</tt> - File location
37
- # * <tt>size</tt> - File size [Byte]
38
- # * <tt>modification_time</tt> - file mod time [seconds]
39
- # * <tt>cycles</tt> - # number of iterations from the last state change.
40
- # * <tt>stable_state</tt> - Number of iterations to move unchanged file to stable state
41
- def initialize(path, stable_state = DEFAULT_STABLE_STATE)
42
- @path ||= path
43
- @size = nil
44
- @modification_time = nil
45
- @cycles = 0
46
- @stable_state = stable_state
47
- end
44
+ # * <tt>path</tt> - File\Dir path
45
+ # * <tt>state</tt> - state. see class FileStatEnum. Default is NEW
46
+ # * <tt>size</tt> - File size [Byte]. Default is -1 (will be set later during monitor) todo:used?
47
+ # * <tt>mod_time</tt> - file mod time [seconds]. Default is -1 (will be set later during monitor)
48
+ def initialize(path, state=FileStatEnum::NEW, size=-1, mod_time=-1, indexed=false)
49
+ # File\Dir path
50
+ @path = path
48
51
 
49
- def set_output_queue(event_queue)
50
- @event_queue = event_queue
51
- end
52
+ # File size
53
+ @size = size
52
54
 
53
- # Sets a log file to report changes
54
- # ==== Arguments:
55
- #
56
- # * <tt>log</tt> - already opened ruby File object
57
- def self.set_log (log)
58
- @@log = log
55
+ # File modification time
56
+ @modification_time = mod_time
57
+
58
+ # File sate. see class FileStatEnum for states.
59
+ @state = state
60
+
61
+ # indicates if path EXISTS in file system.
62
+ # If true, file will not be removed during removed_unmarked_paths phase.
63
+ @marked = false
64
+
65
+ # Number of times that file was monitored and not changed.
66
+ # When @cycles exceeds ::FileMonitoring::stable_state, @state is set to Stable and can be indexed.
67
+ @cycles = 0
68
+
69
+ # flag to indicate if file was indexed
70
+ @indexed = indexed
59
71
  end
60
72
 
61
- # Checks whether file was changed from the last iteration.
62
- # For files, size and modification time are checked.
63
- def monitor
64
- file_stats = File.lstat(@path) rescue nil
65
- new_state = nil
66
- if file_stats.nil?
67
- new_state = FileStatEnum::NON_EXISTING
68
- @size = nil
69
- #@creation_time = nil
70
- @modification_time = nil
71
- @cycles = 0
72
- elsif @size.nil?
73
- new_state = FileStatEnum::NEW
74
- @size = file_stats.size
75
- #@creation_time = file_stats.ctime.utc
76
- @modification_time = file_stats.mtime.to_i
77
- @cycles = 0
78
- elsif changed?(file_stats)
79
- new_state = FileStatEnum::CHANGED
80
- @size = file_stats.size
81
- #@creation_time = file_stats.ctime.utc
82
- @modification_time = file_stats.mtime.to_i
83
- @cycles = 0
84
- else
85
- new_state = FileStatEnum::UNCHANGED
86
- @cycles += 1
87
- if @cycles >= @stable_state
88
- new_state = FileStatEnum::STABLE
73
+ def index
74
+ if !@indexed and FileStatEnum::STABLE == @state
75
+ #index file
76
+ @@digest.reset
77
+ begin
78
+ File.open(@path, 'rb') { |f|
79
+ while buffer = f.read(16384) do
80
+ @@digest << buffer
81
+ end
82
+ }
83
+ $local_content_data_lock.synchronize{
84
+ $local_content_data.add_instance(@@digest.hexdigest.downcase, @size, Params['local_server_name'],
85
+ @path, @modification_time)
86
+ }
87
+ #$process_vars.inc('indexed_files')
88
+ $indexed_file_count += 1
89
+ @indexed = true
90
+ rescue
91
+ Log.warning("Indexed path'#{@path}' does not exist. Probably file changed")
89
92
  end
90
93
  end
91
-
92
- # The assignment
93
- set_state(new_state)
94
94
  end
95
95
 
96
96
  # Checks that stored file attributes are the same as file attributes taken from file system.
@@ -99,29 +99,9 @@ module FileMonitoring
99
99
  (file_stats.mtime.to_i == @modification_time))
100
100
  end
101
101
 
102
- def set_event_queue(queue)
103
- @event_queue = queue
104
- end
105
-
106
- # Sets and writes to the log a new state.
107
- def set_state(new_state)
108
- if (@state != new_state or @state == FileStatEnum::CHANGED)
109
- @state = new_state
110
- if (@@log)
111
- @@log.info(state + ": " + path)
112
- @@log.outputters[0].flush if Params['log_flush_each_message']
113
- end
114
- if @event_queue and FileStatEnum::NEW != @state # NEW state is ignored in indexer
115
- Log.debug1("Writing to event queue [%s, %s]", @state, @path)
116
- @event_queue.push([@state, self.instance_of?(DirStat), @path, @modification_time, @size])
117
- $process_vars.set('monitor to index queue size', @event_queue.size)
118
- end
119
- end
120
- end
121
-
122
102
  # Checks whether path and state are the same as of the argument
123
103
  def == (other)
124
- @path == other.path and @stable_state == other.stable_state
104
+ @path == other.path
125
105
  end
126
106
 
127
107
  # Returns path and state of the file with indentation
@@ -131,17 +111,31 @@ module FileMonitoring
131
111
  end
132
112
 
133
113
  # This class holds current state of directory and methods to control changes
134
- class DirStat < FileStat
114
+ class DirStat
115
+ attr_accessor :path, :marked
116
+
117
+ @@log = nil
118
+
119
+ def self.set_log (log)
120
+ @@log = log
121
+ end
122
+
123
+ public_class_method :set_log
124
+
135
125
  # Initializes new directory monitoring object
136
126
  # ==== Arguments:
137
127
  #
138
- # * <tt>path</tt> - File location
139
- # * <tt>stable_state</tt> - Number of iterations to move unchanged directory to stable state
140
- def initialize(path, stable_state = DEFAULT_STABLE_STATE)
141
- super
142
- @dirs = nil # Hash: ["path" -> DirStat]
143
- @files = nil # Hash: ["path" -> FileStat]
128
+ # * <tt>path</tt> - Dir location
129
+ def initialize(path)
130
+ @path = path
131
+ @dirs = {}
132
+ @files = {}
144
133
  @non_utf8_paths = {} # Hash: ["path" -> true|false]
134
+
135
+ # indicates if path EXISTS in file system.
136
+ # If true, file will not be removed during removed_unmarked_paths phase.
137
+ @marked = false
138
+
145
139
  end
146
140
 
147
141
  # add instance while initializing tree using content data from file
@@ -163,20 +157,14 @@ module FileMonitoring
163
157
  @files = {} unless @files
164
158
  if sub_paths.size-1 == sub_paths_index
165
159
  # Add File case - index points to last entry - leaf case.
166
- file_stat = FileStat.new(sub_paths[sub_paths_index], @stable_state)
167
- file_stat.set_event_queue(@event_queue)
168
- file_stat.size = size
169
- file_stat.modification_time = modification_time
170
- file_stat.state = FileStatEnum::STABLE
160
+ file_stat = FileStat.new(sub_paths[sub_paths_index], FileStatEnum::STABLE, size, modification_time, true)
171
161
  add_file(file_stat)
172
162
  else
173
163
  # Add Dir to tree if not present. index points to new dir path.
174
164
  dir_stat = @dirs[sub_paths[sub_paths_index]]
175
165
  #create new dir if not exist
176
166
  unless dir_stat
177
- dir_stat = DirStat.new(sub_paths[sub_paths_index], @stable_state)
178
- dir_stat.state = FileStatEnum::STABLE
179
- dir_stat.set_event_queue(@event_queue)
167
+ dir_stat = DirStat.new(sub_paths[sub_paths_index])
180
168
  add_dir(dir_stat)
181
169
  end
182
170
  # continue recursive call on tree with next sub path index
@@ -228,111 +216,161 @@ module FileMonitoring
228
216
  res
229
217
  end
230
218
 
231
- # Checks that directory structure (i.e. files and directories located directly under this directory)
232
- # wasn't changed since the last iteration.
233
- def monitor
234
- was_changed = false
235
- new_state = nil
236
- self_stat = File.lstat(@path) rescue nil
237
- if self_stat == nil
238
- new_state = FileStatEnum::NON_EXISTING
239
- @files = nil
240
- @dirs = nil
241
- @cycles = 0
242
- elsif @files == nil
243
- new_state = FileStatEnum::NEW
244
- @files = Hash.new
245
- @dirs = Hash.new
246
- @cycles = 0
247
- update_dir
248
- elsif update_dir
249
- new_state = FileStatEnum::CHANGED
250
- @cycles = 0
251
- else
252
- new_state = FileStatEnum::UNCHANGED
253
- @cycles += 1
254
- if @cycles >= @stable_state
255
- new_state = FileStatEnum::STABLE
256
- end
257
- end
258
-
259
- # The assignment
260
- set_state(new_state)
261
- end
262
-
263
- # Updates the files and directories hashes and globs the directory for changes.
264
- def update_dir
265
- was_changed = false
266
-
267
- # monitor existing and absent files
268
- @files.each_value do |file|
269
- file.monitor
270
-
271
- if file.state == FileStatEnum::NON_EXISTING
272
- was_changed = true
273
- rm_file(file)
219
+ # Recursively, remove non existing files and dirs in Tree
220
+ def removed_unmarked_paths
221
+ #remove dirs
222
+ dirs_enum = @dirs.each_value
223
+ loop do
224
+ dir_stat = dirs_enum.next rescue break
225
+ if dir_stat.marked
226
+ dir_stat.marked = false # unset flag for next monitoring\index\remove phase
227
+ #recursive call
228
+ dir_stat.removed_unmarked_paths
229
+ else
230
+ # directory is not marked. Remove it, since it does not exist.
231
+ #Log.debug1("Non Existing dir: %s", file_stat.path)
232
+ @@log.info("NON_EXISTING dir: " + dir_stat.path)
233
+ @@log.outputters[0].flush if Params['log_flush_each_message']
234
+ # remove file with changed checksum
235
+ $local_content_data_lock.synchronize{
236
+ $local_content_data.remove_directory(dir_stat.path, Params['local_server_name'])
237
+ }
238
+ rm_dir(dir_stat)
274
239
  end
275
240
  end
276
241
 
277
- @dirs.each_value do |dir|
278
- dir.monitor
279
-
280
- if dir.state == FileStatEnum::NON_EXISTING
281
- was_changed = true
282
- rm_dir(dir)
242
+ #remove files
243
+ files_enum = @files.each_value
244
+ loop do
245
+ file_stat = files_enum.next rescue break
246
+ if file_stat.marked
247
+ file_stat.marked = false # unset flag for next monitoring\index\remove phase
248
+ else
249
+ # file not marked meaning it is no longer exist. Remove.
250
+ #Log.debug1("Non Existing file: %s", file_stat.path)
251
+ @@log.info("NON_EXISTING file: " + file_stat.path)
252
+ @@log.outputters[0].flush if Params['log_flush_each_message']
253
+ # remove file with changed checksum
254
+ $local_content_data_lock.synchronize{
255
+ $local_content_data.remove_instance(Params['local_server_name'], file_stat.path)
256
+ }
257
+ rm_file(file_stat)
283
258
  end
284
259
  end
285
-
286
- was_changed = was_changed || glob_me
287
-
288
- return was_changed
289
260
  end
290
261
 
291
- # Globs the directory for new files and directories
292
- def glob_me
293
- was_changed = false
294
- files = Dir.glob(path + "/*")
262
+ # Recursively, read files and dirs from file system (using Glob)
263
+ # Handle new files\dirs.
264
+ # Change state for existing files\dirs
265
+ # Index stable files
266
+ # Remove non existing files\dirs is handled in method: remove_unmarked_paths
267
+ def monitor
295
268
 
296
- # add and monitor new files and directories
297
- files.each do |file|
298
- # keep only files with names in UTF-8
299
- next if @non_utf8_paths[file]
300
- check_utf_8_encoding_file = file.clone
269
+ # Algorithm:
270
+ # assume that current dir is present
271
+ # ls (glob) the dir path for child dirs and files
272
+ # if child file is not already present, add it as new, mark it and handle its state
273
+ # if file already present, mark it and handle its state.
274
+ # if child dir is not already present, add it as new, mark it and propagates
275
+ # the recursive call
276
+ # if child dir already present, mark it and handle its state
277
+ # marked files will not be remove in next remove phase
278
+
279
+ # ls (glob) the dir path for child dirs and files
280
+ globed_paths_enum = Dir.glob(@path + "/*").to_enum
281
+ loop do
282
+ globed_path = globed_paths_enum.next rescue break
283
+
284
+ # UTF-8 - keep only files with names in
285
+ next if @non_utf8_paths[globed_path]
286
+ check_utf_8_encoding_file = globed_path.clone
301
287
  unless check_utf_8_encoding_file.force_encoding("UTF-8").valid_encoding?
302
288
  Log.warning("Non UTF-8 file name '#{check_utf_8_encoding_file}', skipping.")
303
- @non_utf8_paths[file]=true
289
+ @non_utf8_paths[globed_path]=true
290
+ check_utf_8_encoding_file=nil
304
291
  next
305
292
  end
306
- file_stat = File.lstat(file) rescue nil
307
- if (file_stat.directory?)
308
- unless (has_dir?(file)) # new directory
309
- # change state only for existing directories
310
- # newly added directories have to remain with NEW state
311
- was_changed = true
312
- ds = DirStat.new(file, self.stable_state)
313
- ds.set_event_queue(@event_queue) unless @event_queue.nil?
314
- ds.monitor
315
- add_dir(ds)
293
+
294
+ # Get File \ Dir status
295
+ globed_path_stat = File.lstat(globed_path) rescue next # File or dir removed from OS file system
296
+ if globed_path_stat.file?
297
+ # File case
298
+ child_stat = @files[globed_path]
299
+ if child_stat
300
+ # file child exists in Tree
301
+ child_stat.marked = true
302
+ if child_stat.changed?(globed_path_stat)
303
+ # Update changed status
304
+ child_stat.state = FileStatEnum::CHANGED
305
+ child_stat.cycles = 0
306
+ child_stat.size = globed_path_stat.size
307
+ child_stat.modification_time = globed_path_stat.mtime.to_i
308
+ @@log.info("CHANGED file: " + globed_path)
309
+ @@log.outputters[0].flush if Params['log_flush_each_message']
310
+ #Log.debug1("CHANGED file: #{globed_path}")
311
+ # remove file with changed checksum. File will be added once indexed
312
+ $local_content_data_lock.synchronize{
313
+ $local_content_data.remove_instance(Params['local_server_name'], globed_path)
314
+ }
315
+ else
316
+ # File status is the same
317
+ if child_stat.state != FileStatEnum::STABLE
318
+ child_stat.state = FileStatEnum::UNCHANGED
319
+ child_stat.cycles += 1
320
+ if child_stat.cycles >= ::FileMonitoring.stable_state
321
+ child_stat.state = FileStatEnum::STABLE
322
+ @@log.info("STABLE file: " + globed_path)
323
+ @@log.outputters[0].flush if Params['log_flush_each_message']
324
+ else
325
+ @@log.info("UNCHANGED file: " + globed_path)
326
+ @@log.outputters[0].flush if Params['log_flush_each_message']
327
+ end
328
+ end
329
+ end
330
+ else
331
+ # new File child:
332
+ child_stat = FileStat.new(globed_path, FileStatEnum::NEW,
333
+ globed_path_stat.size, globed_path_stat.mtime.to_i)
334
+ @@log.info("NEW file: " + globed_path)
335
+ @@log.outputters[0].flush if Params['log_flush_each_message']
336
+ child_stat.marked = true
337
+ add_file(child_stat)
316
338
  end
317
- else # it is a file
318
- unless(has_file?(file)) # new file
319
- # change state only for existing directories
320
- # newly added directories have to remain with NEW state
321
- was_changed = true
322
- # check if file exist in content data cache - set state to STABLE
323
- file_state = FileStatEnum::NON_EXISTING
324
- fs = FileStat.new(file, self.stable_state)
325
- fs.set_event_queue(@event_queue) unless @event_queue.nil?
326
- fs.monitor
327
- add_file(fs)
339
+ else
340
+ # Dir
341
+ child_stat = @dirs[globed_path]
342
+ # Add Dir if not exists in Tree
343
+ unless child_stat
344
+ child_stat = DirStat.new(globed_path)
345
+ add_dir(child_stat)
346
+ @@log.info("NEW dir: " + globed_path)
347
+ @@log.outputters[0].flush if Params['log_flush_each_message']
328
348
  end
349
+ child_stat.marked = true
350
+ #recursive call for dirs
351
+ child_stat.monitor
329
352
  end
330
353
  end
354
+ GC.start
355
+ end
356
+
357
+ def index
358
+ files_enum = @files.each_value
359
+ index_counter = $indexed_file_count # to check if files where actually indexed
360
+ loop do
361
+ file_stat = files_enum.next rescue break
362
+ file_stat.index # file index
363
+ end
364
+ GC.start if index_counter != $indexed_file_count # GC only if files where indexed
331
365
 
332
- return was_changed
366
+ dirs_enum = @dirs.each_value
367
+ loop do
368
+ dir_stat = dirs_enum.next rescue break
369
+ dir_stat.index # dir recursive call
370
+ end
333
371
  end
334
372
 
335
- protected :add_dir, :add_file, :rm_dir, :rm_file, :update_dir, :glob_me
373
+ protected :add_dir, :add_file, :rm_dir, :rm_file
336
374
  end
337
375
 
338
376
  end