content_server 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. data/lib/content_data/content_data.rb +194 -56
  2. data/lib/content_data/version.rb +1 -1
  3. data/lib/content_server/backup_server.rb +4 -27
  4. data/lib/content_server/content_server.rb +3 -27
  5. data/lib/content_server/file_streamer.rb +2 -0
  6. data/lib/content_server/remote_content.rb +1 -2
  7. data/lib/content_server/server.rb +23 -3
  8. data/lib/content_server/version.rb +1 -1
  9. data/lib/email/version.rb +1 -1
  10. data/lib/file_copy/version.rb +1 -1
  11. data/lib/file_indexing/index_agent.rb +1 -1
  12. data/lib/file_indexing/version.rb +1 -1
  13. data/lib/file_monitoring/file_monitoring.rb +45 -32
  14. data/lib/file_monitoring/monitor_path.rb +219 -181
  15. data/lib/file_monitoring/version.rb +1 -1
  16. data/lib/file_utils/file_generator/file_generator.rb +1 -1
  17. data/lib/file_utils/file_utils.rb +2 -2
  18. data/lib/file_utils/version.rb +1 -1
  19. data/lib/log/version.rb +1 -1
  20. data/lib/networking/version.rb +1 -1
  21. data/lib/params/version.rb +1 -1
  22. data/lib/process_monitoring/version.rb +1 -1
  23. data/lib/run_in_background/version.rb +1 -1
  24. data/lib/testing_memory/testing_memory.rb +1 -1
  25. data/lib/testing_server/testing_server.rb +1 -1
  26. data/lib/testing_server/version.rb +1 -1
  27. data/spec/content_data/validations_spec.rb +2 -2
  28. data/spec/content_server/file_streamer_spec.rb +5 -0
  29. data/spec/networking/tcp_spec.rb +1 -3
  30. data/spec/validations/index_validations_spec.rb +2 -2
  31. data/test/content_data/content_data_test.rb +8 -7
  32. data/test/file_generator/file_generator_spec.rb +3 -2
  33. data/test/file_monitoring/monitor_path_test.rb +38 -4
  34. data/test/file_utils/fileutil_mksymlink_test.rb +9 -0
  35. data/test/file_utils/time_modification_test.rb +6 -2
  36. data/test/run_in_background/test_app +17 -15
  37. metadata +2 -93
  38. data/lib/content_server/queue_indexer.rb +0 -86
  39. data/test/file_indexing/index_agent_test.rb +0 -51
  40. data/test/file_indexing/index_agent_test/New.txt +0 -0
  41. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/libexslt.dll +0 -0
  42. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/libxslt.dll +0 -0
  43. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/bin/xsltproc.exe +0 -0
  44. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exslt.h +0 -102
  45. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exsltconfig.h +0 -73
  46. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/exsltexports.h +0 -140
  47. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libexslt/libexslt.h +0 -29
  48. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/attributes.h +0 -38
  49. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/documents.h +0 -93
  50. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/extensions.h +0 -262
  51. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/extra.h +0 -80
  52. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/functions.h +0 -78
  53. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/imports.h +0 -75
  54. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/keys.h +0 -53
  55. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/libxslt.h +0 -30
  56. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/namespaces.h +0 -68
  57. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/numbersInternals.h +0 -69
  58. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/pattern.h +0 -81
  59. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/preproc.h +0 -43
  60. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/security.h +0 -104
  61. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/templates.h +0 -77
  62. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/transform.h +0 -207
  63. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/trio.h +0 -216
  64. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/triodef.h +0 -220
  65. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/variables.h +0 -91
  66. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/win32config.h +0 -101
  67. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xslt.h +0 -103
  68. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltInternals.h +0 -1967
  69. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltconfig.h +0 -172
  70. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltexports.h +0 -142
  71. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltlocale.h +0 -57
  72. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltutils.h +0 -309
  73. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/include/libxslt/xsltwin32config.h +0 -105
  74. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libexslt.lib +0 -0
  75. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libexslt_a.lib +0 -0
  76. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libxslt.lib +0 -0
  77. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/lib/libxslt_a.lib +0 -0
  78. data/test/file_indexing/index_agent_test/libxslt-1.1.26.win32/readme.txt +0 -22
  79. data/test/file_indexing/index_agent_test/patterns.input +0 -3
  80. data/test/file_monitoring/file_monitoring_test.rb +0 -0
  81. data/test/file_monitoring/file_monitoring_test/conf.yml +0 -4
  82. data/test/file_monitoring/file_monitoring_test/conf_win32.yml +0 -5
  83. data/test/file_monitoring/file_monitoring_test/log +0 -56
@@ -190,11 +190,13 @@ module ContentServer
190
190
  handle_new_stream(file_checksum, 0) if !@streams.key?(file_checksum)
191
191
  # Finalize the file copy.
192
192
  handle_last_chunk(file_checksum)
193
+ return true
193
194
  else
194
195
  Log.warning("Unexpected receive chuck message. file_checksum:#{file_checksum}, " \
195
196
  "content.nil?:#{content.nil?}, content_checksum:#{content_checksum}")
196
197
  return false
197
198
  end
199
+ Log.error('Code should never reach this point')
198
200
  end
199
201
 
200
202
  # open new stream
@@ -23,7 +23,6 @@ module ContentServer
23
23
  def initialize(host, port, local_backup_folder)
24
24
  @remote_tcp = Networking::TCPClient.new(host, port, method(:receive_content))
25
25
  @last_fetch_timestamp = nil
26
- @last_save_timestamp = nil
27
26
  @last_content_data_id = nil
28
27
  @content_server_content_data_path = File.join(local_backup_folder, 'remote',
29
28
  host + '_' + port.to_s)
@@ -40,7 +39,7 @@ module ContentServer
40
39
 
41
40
  # Update remote content data and write to file if changed ContentData received
42
41
  if(message.unique_id != @last_content_data_id)
43
- path = File.join(@content_server_content_data_path, @last_save_timestamp.to_s + '.cd')
42
+ path = File.join(@content_server_content_data_path, @last_fetch_timestamp.to_s + '.cd')
44
43
  FileUtils.makedirs(@content_server_content_data_path) unless \
45
44
  File.directory?(@content_server_content_data_path)
46
45
  $remote_content_data_lock.synchronize{
@@ -31,11 +31,12 @@ module ContentServer
31
31
  $local_content_data_lock = nil
32
32
  $remote_content_data_lock = nil
33
33
  $remote_content_data = nil
34
+ $last_content_data_id = nil
34
35
  end
35
36
 
36
37
  def handle_program_termination(exception)
37
38
  #Write exception message to console
38
- message = "\nInterrupt or Exit happened in server:'#{Params['service_name']}'.\n" +
39
+ message = "\nInterrupt or Exit happened in server:''.\n" +
39
40
  "Exception type:'#{exception.class}'.\n" +
40
41
  "Exception message:'#{exception.message}'.\n" +
41
42
  "Stopping process.\n" +
@@ -74,7 +75,7 @@ module ContentServer
74
75
  current_objects_counters['DirStat'] = dir_count
75
76
  file_count = ObjectSpace.each_object(FileMonitoring::FileStat).count
76
77
  $process_vars.set('FileStat count', file_count-dir_count)
77
- current_objects_counters['FileStat'] = file_count-dir_count
78
+ current_objects_counters['FileStat'] = file_count
78
79
 
79
80
  # Generate report and update global counters
80
81
  report = ""
@@ -88,5 +89,24 @@ module ContentServer
88
89
  end
89
90
  end
90
91
 
91
- module_function :init_globals, :handle_program_termination, :monitor_general_process_vars
92
+ def flush_content_data
93
+ Log.debug1('Start flush local content data to file.')
94
+ $testing_memory_log.info('Start flush content data to file') if $testing_memory_active
95
+
96
+ $local_content_data_lock.synchronize{
97
+ local_content_data_unique_id = $local_content_data.unique_id
98
+ if (local_content_data_unique_id != $last_content_data_id)
99
+ $last_content_data_id = local_content_data_unique_id
100
+ $local_content_data.to_file($tmp_content_data_file)
101
+ File.rename($tmp_content_data_file, Params['local_content_data_path'])
102
+ Log.debug1('End flush local content data to file.')
103
+ $testing_memory_log.info('End flush content data to file') if $testing_memory_active
104
+ else
105
+ Log.debug1('no need to flush. content data has not changed')
106
+ $testing_memory_log.info('no need to flush. content data has not changed') if $testing_memory_active
107
+ end
108
+ }
109
+ end
110
+
111
+ module_function :init_globals, :handle_program_termination, :monitor_general_process_vars, :flush_content_data
92
112
  end
@@ -1,3 +1,3 @@
1
1
  module ContentServer
2
- VERSION = "1.3.1"
2
+ VERSION = "1.4.0"
3
3
  end
@@ -1,3 +1,3 @@
1
1
  module SendEmail
2
- VERSION = "1.1.0"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -1,4 +1,4 @@
1
1
  module FileCopy
2
- VERSION = "1.1.0"
2
+ VERSION = "1.2.0"
3
3
  end
4
4
 
@@ -126,7 +126,7 @@ module FileIndexing
126
126
  # from further processing (save checksum calculation)
127
127
  file_match = false
128
128
  otherDB_updated.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
129
- if otherDB_updated.instance_exists(file, local_server_name, checksum)
129
+ if otherDB_updated.instance_exists(file, local_server_name)
130
130
  if size == file_stats.size and instance_mod_time == file_mtime.to_i
131
131
  @indexed_content.add_instance(checksum, size, server, file, instance_mod_time)
132
132
  file_match = true
@@ -1,3 +1,3 @@
1
1
  module FileIndexing
2
- VERSION = "1.1.0"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -1,3 +1,5 @@
1
+ #todo: stable state not working. after 2 cycles...
2
+
1
3
  require 'algorithms'
2
4
  require 'fileutils'
3
5
  require 'log4r'
@@ -9,12 +11,6 @@ module FileMonitoring
9
11
  # Manages file monitoring of number of file system locations
10
12
  class FileMonitoring
11
13
 
12
- # Set event queue used for communication between different proceses.
13
- # @param queue [Queue]
14
- def set_event_queue(queue)
15
- @event_queue = queue
16
- end
17
-
18
14
  # The main method. Loops on all paths, each time span and monitors them.
19
15
  #
20
16
  # =Algorithm:
@@ -32,16 +28,16 @@ module FileMonitoring
32
28
  # create root dirs of monitoring
33
29
  dir_stat_array = []
34
30
  conf_array.each { |elem|
35
- dir_stat = DirStat.new(File.expand_path(elem['path']), elem['stable_state'])
36
- dir_stat.set_event_queue(@event_queue) if @event_queue
37
- dir_stat_array.push(dir_stat)
31
+ dir_stat = DirStat.new(File.expand_path(elem['path']))
32
+ dir_stat_array.push([dir_stat, elem['stable_state']])
38
33
  }
39
34
 
40
35
  #Look over loaded content data if not empty
41
36
  # If file is under monitoring path - Add to DirStat tree as stable with path,size,mod_time read from file
42
37
  # If file is NOT under monitoring path - skip (not a valid usage)
43
38
  unless $local_content_data.empty?
44
- Log.info("Start build data base from loaded file")
39
+ Log.info("Start build data base from loaded file. This could take several minutes")
40
+ inst_count = 0
45
41
  $local_content_data.each_instance {
46
42
  |_, size, _, mod_time, _, path|
47
43
  # construct sub paths array from full file path:
@@ -63,15 +59,19 @@ module FileMonitoring
63
59
  # if index is found then it the monitor path
64
60
  # the next index indicates the next sub path to insert to the tree
65
61
  # the index will be raised at each recursive call down the tree
66
- sub_paths_index = sub_paths.index(dir_stat.path)
62
+ sub_paths_index = sub_paths.index(dir_stat[0].path)
67
63
  next if sub_paths_index.nil? # monitor path was not found. skip this instance.
68
- # monitor path was found. Add to tree as stable.
69
- dir_stat.state = FileStatEnum::STABLE
64
+
65
+ # monitor path was found. Add to tree
70
66
  # start the recursive call with next sub path index
71
- dir_stat.load_instance(sub_paths, sub_paths_index+1, size, mod_time)
67
+ ::FileMonitoring.stable_state = dir_stat[1]
68
+ inst_count += 1
69
+ dir_stat[0].load_instance(sub_paths, sub_paths_index+1, size, mod_time)
70
+ break
72
71
  }
73
72
  }
74
- Log.info("End build data base from loaded file")
73
+ Log.info("End build data base from loaded file. loaded instances:#{inst_count}")
74
+ $last_content_data_id = $local_content_data.unique_id
75
75
  end
76
76
 
77
77
  # Directories states stored in the priority queue,
@@ -81,7 +81,7 @@ module FileMonitoring
81
81
  conf_array.each_with_index { |elem, index|
82
82
  priority = (Time.now + elem['scan_period']).to_i
83
83
  #Log.info("File monitoring started for: #{elem}")
84
- pq.push([priority, elem, dir_stat_array[index]], -priority)
84
+ pq.push([priority, elem, dir_stat_array[index][0]], -priority)
85
85
  }
86
86
 
87
87
  #init log4r
@@ -103,33 +103,46 @@ module FileMonitoring
103
103
  file_outputter.level = Log4r::INFO
104
104
  file_outputter.formatter = formatter
105
105
  @log4r.outputters << file_outputter
106
- FileStat.set_log(@log4r)
106
+ ::FileMonitoring::DirStat.set_log(@log4r)
107
107
 
108
108
  while true do
109
109
  # pull entry that should be checked next,
110
110
  # according to it's scan_period
111
- time, conf, dir_stat = pq.pop
111
+ time, elem, dir_stat = pq.pop
112
112
  # time remains to wait before directory should be checked
113
113
  time_span = time - Time.now.to_i
114
114
  if (time_span > 0)
115
115
  sleep(time_span)
116
116
  end
117
117
 
118
- unless $testing_memory_active
119
- Log.info("Start monitor for dir:%s", dir_stat.path)
120
- dir_stat.monitor
121
- Log.info("End monitor for dir:%s", dir_stat.path)
122
- else
123
- $testing_memory_log.info("Start monitor at :#{Time.now}")
124
- puts "Start monitor at :#{Time.now}"
125
- dir_stat.monitor
126
- $testing_memory_log.info("End monitor at :#{Time.now}")
127
- puts "End monitor at :#{Time.now}"
128
- end
118
+ # Start monitor
119
+ Log.info("Start monitor path:%s ", dir_stat.path)
120
+ $testing_memory_log.info("Start monitor path:#{dir_stat.path}") if $testing_memory_active
121
+ ::FileMonitoring.stable_state=elem['stable_state']
122
+ dir_stat.monitor
129
123
 
130
- # push entry with new a next time it should be checked as a priority key
131
- priority = (Time.now + conf['scan_period']).to_i
132
- pq.push([priority, conf, dir_stat], -priority)
124
+ # Start index
125
+ Log.info("Start index path:%s ", dir_stat.path)
126
+ $testing_memory_log.info("Start index path:#{dir_stat.path}") if $testing_memory_active
127
+ dir_stat.index
128
+
129
+ # print number of indexed files
130
+ Log.debug1("indexed file count:%s", $indexed_file_count)
131
+ $testing_memory_log.info("indexed file count: #{$indexed_file_count}") if $testing_memory_active
132
+
133
+ # remove non existing (not marked) files\dirs
134
+ Log.info('Start remove non existing paths')
135
+ $testing_memory_log.info('Start remove non existing paths') if $testing_memory_active
136
+ dir_stat.removed_unmarked_paths
137
+ Log.info('End monitor path and index')
138
+ $testing_memory_log.info('End monitor path and index') if $testing_memory_active
139
+
140
+ #flush content data if changed
141
+ ContentServer.flush_content_data
142
+
143
+ #Add back to queue
144
+ priority = (Time.now + elem['scan_period']).to_i
145
+ pq.push([priority, elem, dir_stat], -priority)
133
146
  end
134
147
  end
135
148
  end
@@ -11,8 +11,6 @@ module FileMonitoring
11
11
  # * <tt>CHANGED</tt> - State was changed between two checks
12
12
  # * <tt>UNCHANGED</tt> - Opposite to CHANGED
13
13
  # * <tt>STABLE</tt> - Entity is in the UNCHANGED state for a defined (by user) number of iterations
14
-
15
-
16
14
  class FileStatEnum
17
15
  NON_EXISTING = "NON_EXISTING"
18
16
  NEW = "NEW"
@@ -21,76 +19,78 @@ module FileMonitoring
21
19
  STABLE = "STABLE"
22
20
  end
23
21
 
22
+ # Number of iterations to move state from UNCHANGED to STABLE (for index)
23
+ @@stable_state = 10
24
+
25
+ def self.stable_state=(stable_state)
26
+ @@stable_state = stable_state
27
+ end
28
+
29
+ def self.stable_state
30
+ @@stable_state
31
+ end
32
+
33
+ public_class_method :stable_state, :stable_state=
34
+
24
35
  # This class holds current state of file and methods to control and report changes
25
36
  class FileStat
26
- attr_reader :cycles, :path, :stable_state
27
- attr_accessor :state, :size, :modification_time
37
+ attr_accessor :path, :state, :size, :modification_time, :marked, :cycles
28
38
 
29
- DEFAULT_STABLE_STATE = 10
30
-
31
- @@log = nil
39
+ @@digest = Digest::SHA1.new
32
40
 
33
41
  # Initializes new file monitoring object
34
42
  # ==== Arguments:
35
43
  #
36
- # * <tt>path</tt> - File location
37
- # * <tt>size</tt> - File size [Byte]
38
- # * <tt>modification_time</tt> - file mod time [seconds]
39
- # * <tt>cycles</tt> - # number of iterations from the last state change.
40
- # * <tt>stable_state</tt> - Number of iterations to move unchanged file to stable state
41
- def initialize(path, stable_state = DEFAULT_STABLE_STATE)
42
- @path ||= path
43
- @size = nil
44
- @modification_time = nil
45
- @cycles = 0
46
- @stable_state = stable_state
47
- end
44
+ # * <tt>path</tt> - File\Dir path
45
+ # * <tt>state</tt> - state. see class FileStatEnum. Default is NEW
46
+ # * <tt>size</tt> - File size [Byte]. Default is -1 (will be set later during monitor) todo:used?
47
+ # * <tt>mod_time</tt> - file mod time [seconds]. Default is -1 (will be set later during monitor)
48
+ def initialize(path, state=FileStatEnum::NEW, size=-1, mod_time=-1, indexed=false)
49
+ # File\Dir path
50
+ @path = path
48
51
 
49
- def set_output_queue(event_queue)
50
- @event_queue = event_queue
51
- end
52
+ # File size
53
+ @size = size
52
54
 
53
- # Sets a log file to report changes
54
- # ==== Arguments:
55
- #
56
- # * <tt>log</tt> - already opened ruby File object
57
- def self.set_log (log)
58
- @@log = log
55
+ # File modification time
56
+ @modification_time = mod_time
57
+
58
+ # File sate. see class FileStatEnum for states.
59
+ @state = state
60
+
61
+ # indicates if path EXISTS in file system.
62
+ # If true, file will not be removed during removed_unmarked_paths phase.
63
+ @marked = false
64
+
65
+ # Number of times that file was monitored and not changed.
66
+ # When @cycles exceeds ::FileMonitoring::stable_state, @state is set to Stable and can be indexed.
67
+ @cycles = 0
68
+
69
+ # flag to indicate if file was indexed
70
+ @indexed = indexed
59
71
  end
60
72
 
61
- # Checks whether file was changed from the last iteration.
62
- # For files, size and modification time are checked.
63
- def monitor
64
- file_stats = File.lstat(@path) rescue nil
65
- new_state = nil
66
- if file_stats.nil?
67
- new_state = FileStatEnum::NON_EXISTING
68
- @size = nil
69
- #@creation_time = nil
70
- @modification_time = nil
71
- @cycles = 0
72
- elsif @size.nil?
73
- new_state = FileStatEnum::NEW
74
- @size = file_stats.size
75
- #@creation_time = file_stats.ctime.utc
76
- @modification_time = file_stats.mtime.to_i
77
- @cycles = 0
78
- elsif changed?(file_stats)
79
- new_state = FileStatEnum::CHANGED
80
- @size = file_stats.size
81
- #@creation_time = file_stats.ctime.utc
82
- @modification_time = file_stats.mtime.to_i
83
- @cycles = 0
84
- else
85
- new_state = FileStatEnum::UNCHANGED
86
- @cycles += 1
87
- if @cycles >= @stable_state
88
- new_state = FileStatEnum::STABLE
73
+ def index
74
+ if !@indexed and FileStatEnum::STABLE == @state
75
+ #index file
76
+ @@digest.reset
77
+ begin
78
+ File.open(@path, 'rb') { |f|
79
+ while buffer = f.read(16384) do
80
+ @@digest << buffer
81
+ end
82
+ }
83
+ $local_content_data_lock.synchronize{
84
+ $local_content_data.add_instance(@@digest.hexdigest.downcase, @size, Params['local_server_name'],
85
+ @path, @modification_time)
86
+ }
87
+ #$process_vars.inc('indexed_files')
88
+ $indexed_file_count += 1
89
+ @indexed = true
90
+ rescue
91
+ Log.warning("Indexed path'#{@path}' does not exist. Probably file changed")
89
92
  end
90
93
  end
91
-
92
- # The assignment
93
- set_state(new_state)
94
94
  end
95
95
 
96
96
  # Checks that stored file attributes are the same as file attributes taken from file system.
@@ -99,29 +99,9 @@ module FileMonitoring
99
99
  (file_stats.mtime.to_i == @modification_time))
100
100
  end
101
101
 
102
- def set_event_queue(queue)
103
- @event_queue = queue
104
- end
105
-
106
- # Sets and writes to the log a new state.
107
- def set_state(new_state)
108
- if (@state != new_state or @state == FileStatEnum::CHANGED)
109
- @state = new_state
110
- if (@@log)
111
- @@log.info(state + ": " + path)
112
- @@log.outputters[0].flush if Params['log_flush_each_message']
113
- end
114
- if @event_queue and FileStatEnum::NEW != @state # NEW state is ignored in indexer
115
- Log.debug1("Writing to event queue [%s, %s]", @state, @path)
116
- @event_queue.push([@state, self.instance_of?(DirStat), @path, @modification_time, @size])
117
- $process_vars.set('monitor to index queue size', @event_queue.size)
118
- end
119
- end
120
- end
121
-
122
102
  # Checks whether path and state are the same as of the argument
123
103
  def == (other)
124
- @path == other.path and @stable_state == other.stable_state
104
+ @path == other.path
125
105
  end
126
106
 
127
107
  # Returns path and state of the file with indentation
@@ -131,17 +111,31 @@ module FileMonitoring
131
111
  end
132
112
 
133
113
  # This class holds current state of directory and methods to control changes
134
- class DirStat < FileStat
114
+ class DirStat
115
+ attr_accessor :path, :marked
116
+
117
+ @@log = nil
118
+
119
+ def self.set_log (log)
120
+ @@log = log
121
+ end
122
+
123
+ public_class_method :set_log
124
+
135
125
  # Initializes new directory monitoring object
136
126
  # ==== Arguments:
137
127
  #
138
- # * <tt>path</tt> - File location
139
- # * <tt>stable_state</tt> - Number of iterations to move unchanged directory to stable state
140
- def initialize(path, stable_state = DEFAULT_STABLE_STATE)
141
- super
142
- @dirs = nil # Hash: ["path" -> DirStat]
143
- @files = nil # Hash: ["path" -> FileStat]
128
+ # * <tt>path</tt> - Dir location
129
+ def initialize(path)
130
+ @path = path
131
+ @dirs = {}
132
+ @files = {}
144
133
  @non_utf8_paths = {} # Hash: ["path" -> true|false]
134
+
135
+ # indicates if path EXISTS in file system.
136
+ # If true, file will not be removed during removed_unmarked_paths phase.
137
+ @marked = false
138
+
145
139
  end
146
140
 
147
141
  # add instance while initializing tree using content data from file
@@ -163,20 +157,14 @@ module FileMonitoring
163
157
  @files = {} unless @files
164
158
  if sub_paths.size-1 == sub_paths_index
165
159
  # Add File case - index points to last entry - leaf case.
166
- file_stat = FileStat.new(sub_paths[sub_paths_index], @stable_state)
167
- file_stat.set_event_queue(@event_queue)
168
- file_stat.size = size
169
- file_stat.modification_time = modification_time
170
- file_stat.state = FileStatEnum::STABLE
160
+ file_stat = FileStat.new(sub_paths[sub_paths_index], FileStatEnum::STABLE, size, modification_time, true)
171
161
  add_file(file_stat)
172
162
  else
173
163
  # Add Dir to tree if not present. index points to new dir path.
174
164
  dir_stat = @dirs[sub_paths[sub_paths_index]]
175
165
  #create new dir if not exist
176
166
  unless dir_stat
177
- dir_stat = DirStat.new(sub_paths[sub_paths_index], @stable_state)
178
- dir_stat.state = FileStatEnum::STABLE
179
- dir_stat.set_event_queue(@event_queue)
167
+ dir_stat = DirStat.new(sub_paths[sub_paths_index])
180
168
  add_dir(dir_stat)
181
169
  end
182
170
  # continue recursive call on tree with next sub path index
@@ -228,111 +216,161 @@ module FileMonitoring
228
216
  res
229
217
  end
230
218
 
231
- # Checks that directory structure (i.e. files and directories located directly under this directory)
232
- # wasn't changed since the last iteration.
233
- def monitor
234
- was_changed = false
235
- new_state = nil
236
- self_stat = File.lstat(@path) rescue nil
237
- if self_stat == nil
238
- new_state = FileStatEnum::NON_EXISTING
239
- @files = nil
240
- @dirs = nil
241
- @cycles = 0
242
- elsif @files == nil
243
- new_state = FileStatEnum::NEW
244
- @files = Hash.new
245
- @dirs = Hash.new
246
- @cycles = 0
247
- update_dir
248
- elsif update_dir
249
- new_state = FileStatEnum::CHANGED
250
- @cycles = 0
251
- else
252
- new_state = FileStatEnum::UNCHANGED
253
- @cycles += 1
254
- if @cycles >= @stable_state
255
- new_state = FileStatEnum::STABLE
256
- end
257
- end
258
-
259
- # The assignment
260
- set_state(new_state)
261
- end
262
-
263
- # Updates the files and directories hashes and globs the directory for changes.
264
- def update_dir
265
- was_changed = false
266
-
267
- # monitor existing and absent files
268
- @files.each_value do |file|
269
- file.monitor
270
-
271
- if file.state == FileStatEnum::NON_EXISTING
272
- was_changed = true
273
- rm_file(file)
219
+ # Recursively, remove non existing files and dirs in Tree
220
+ def removed_unmarked_paths
221
+ #remove dirs
222
+ dirs_enum = @dirs.each_value
223
+ loop do
224
+ dir_stat = dirs_enum.next rescue break
225
+ if dir_stat.marked
226
+ dir_stat.marked = false # unset flag for next monitoring\index\remove phase
227
+ #recursive call
228
+ dir_stat.removed_unmarked_paths
229
+ else
230
+ # directory is not marked. Remove it, since it does not exist.
231
+ #Log.debug1("Non Existing dir: %s", file_stat.path)
232
+ @@log.info("NON_EXISTING dir: " + dir_stat.path)
233
+ @@log.outputters[0].flush if Params['log_flush_each_message']
234
+ # remove file with changed checksum
235
+ $local_content_data_lock.synchronize{
236
+ $local_content_data.remove_directory(dir_stat.path, Params['local_server_name'])
237
+ }
238
+ rm_dir(dir_stat)
274
239
  end
275
240
  end
276
241
 
277
- @dirs.each_value do |dir|
278
- dir.monitor
279
-
280
- if dir.state == FileStatEnum::NON_EXISTING
281
- was_changed = true
282
- rm_dir(dir)
242
+ #remove files
243
+ files_enum = @files.each_value
244
+ loop do
245
+ file_stat = files_enum.next rescue break
246
+ if file_stat.marked
247
+ file_stat.marked = false # unset flag for next monitoring\index\remove phase
248
+ else
249
+ # file not marked meaning it is no longer exist. Remove.
250
+ #Log.debug1("Non Existing file: %s", file_stat.path)
251
+ @@log.info("NON_EXISTING file: " + file_stat.path)
252
+ @@log.outputters[0].flush if Params['log_flush_each_message']
253
+ # remove file with changed checksum
254
+ $local_content_data_lock.synchronize{
255
+ $local_content_data.remove_instance(Params['local_server_name'], file_stat.path)
256
+ }
257
+ rm_file(file_stat)
283
258
  end
284
259
  end
285
-
286
- was_changed = was_changed || glob_me
287
-
288
- return was_changed
289
260
  end
290
261
 
291
- # Globs the directory for new files and directories
292
- def glob_me
293
- was_changed = false
294
- files = Dir.glob(path + "/*")
262
+ # Recursively, read files and dirs from file system (using Glob)
263
+ # Handle new files\dirs.
264
+ # Change state for existing files\dirs
265
+ # Index stable files
266
+ # Remove non existing files\dirs is handled in method: remove_unmarked_paths
267
+ def monitor
295
268
 
296
- # add and monitor new files and directories
297
- files.each do |file|
298
- # keep only files with names in UTF-8
299
- next if @non_utf8_paths[file]
300
- check_utf_8_encoding_file = file.clone
269
+ # Algorithm:
270
+ # assume that current dir is present
271
+ # ls (glob) the dir path for child dirs and files
272
+ # if child file is not already present, add it as new, mark it and handle its state
273
+ # if file already present, mark it and handle its state.
274
+ # if child dir is not already present, add it as new, mark it and propagates
275
+ # the recursive call
276
+ # if child dir already present, mark it and handle its state
277
+ # marked files will not be remove in next remove phase
278
+
279
+ # ls (glob) the dir path for child dirs and files
280
+ globed_paths_enum = Dir.glob(@path + "/*").to_enum
281
+ loop do
282
+ globed_path = globed_paths_enum.next rescue break
283
+
284
+ # UTF-8 - keep only files with names in
285
+ next if @non_utf8_paths[globed_path]
286
+ check_utf_8_encoding_file = globed_path.clone
301
287
  unless check_utf_8_encoding_file.force_encoding("UTF-8").valid_encoding?
302
288
  Log.warning("Non UTF-8 file name '#{check_utf_8_encoding_file}', skipping.")
303
- @non_utf8_paths[file]=true
289
+ @non_utf8_paths[globed_path]=true
290
+ check_utf_8_encoding_file=nil
304
291
  next
305
292
  end
306
- file_stat = File.lstat(file) rescue nil
307
- if (file_stat.directory?)
308
- unless (has_dir?(file)) # new directory
309
- # change state only for existing directories
310
- # newly added directories have to remain with NEW state
311
- was_changed = true
312
- ds = DirStat.new(file, self.stable_state)
313
- ds.set_event_queue(@event_queue) unless @event_queue.nil?
314
- ds.monitor
315
- add_dir(ds)
293
+
294
+ # Get File \ Dir status
295
+ globed_path_stat = File.lstat(globed_path) rescue next # File or dir removed from OS file system
296
+ if globed_path_stat.file?
297
+ # File case
298
+ child_stat = @files[globed_path]
299
+ if child_stat
300
+ # file child exists in Tree
301
+ child_stat.marked = true
302
+ if child_stat.changed?(globed_path_stat)
303
+ # Update changed status
304
+ child_stat.state = FileStatEnum::CHANGED
305
+ child_stat.cycles = 0
306
+ child_stat.size = globed_path_stat.size
307
+ child_stat.modification_time = globed_path_stat.mtime.to_i
308
+ @@log.info("CHANGED file: " + globed_path)
309
+ @@log.outputters[0].flush if Params['log_flush_each_message']
310
+ #Log.debug1("CHANGED file: #{globed_path}")
311
+ # remove file with changed checksum. File will be added once indexed
312
+ $local_content_data_lock.synchronize{
313
+ $local_content_data.remove_instance(Params['local_server_name'], globed_path)
314
+ }
315
+ else
316
+ # File status is the same
317
+ if child_stat.state != FileStatEnum::STABLE
318
+ child_stat.state = FileStatEnum::UNCHANGED
319
+ child_stat.cycles += 1
320
+ if child_stat.cycles >= ::FileMonitoring.stable_state
321
+ child_stat.state = FileStatEnum::STABLE
322
+ @@log.info("STABLE file: " + globed_path)
323
+ @@log.outputters[0].flush if Params['log_flush_each_message']
324
+ else
325
+ @@log.info("UNCHANGED file: " + globed_path)
326
+ @@log.outputters[0].flush if Params['log_flush_each_message']
327
+ end
328
+ end
329
+ end
330
+ else
331
+ # new File child:
332
+ child_stat = FileStat.new(globed_path, FileStatEnum::NEW,
333
+ globed_path_stat.size, globed_path_stat.mtime.to_i)
334
+ @@log.info("NEW file: " + globed_path)
335
+ @@log.outputters[0].flush if Params['log_flush_each_message']
336
+ child_stat.marked = true
337
+ add_file(child_stat)
316
338
  end
317
- else # it is a file
318
- unless(has_file?(file)) # new file
319
- # change state only for existing directories
320
- # newly added directories have to remain with NEW state
321
- was_changed = true
322
- # check if file exist in content data cache - set state to STABLE
323
- file_state = FileStatEnum::NON_EXISTING
324
- fs = FileStat.new(file, self.stable_state)
325
- fs.set_event_queue(@event_queue) unless @event_queue.nil?
326
- fs.monitor
327
- add_file(fs)
339
+ else
340
+ # Dir
341
+ child_stat = @dirs[globed_path]
342
+ # Add Dir if not exists in Tree
343
+ unless child_stat
344
+ child_stat = DirStat.new(globed_path)
345
+ add_dir(child_stat)
346
+ @@log.info("NEW dir: " + globed_path)
347
+ @@log.outputters[0].flush if Params['log_flush_each_message']
328
348
  end
349
+ child_stat.marked = true
350
+ #recursive call for dirs
351
+ child_stat.monitor
329
352
  end
330
353
  end
354
+ GC.start
355
+ end
356
+
357
+ def index
358
+ files_enum = @files.each_value
359
+ index_counter = $indexed_file_count # to check if files where actually indexed
360
+ loop do
361
+ file_stat = files_enum.next rescue break
362
+ file_stat.index # file index
363
+ end
364
+ GC.start if index_counter != $indexed_file_count # GC only if files where indexed
331
365
 
332
- return was_changed
366
+ dirs_enum = @dirs.each_value
367
+ loop do
368
+ dir_stat = dirs_enum.next rescue break
369
+ dir_stat.index # dir recursive call
370
+ end
333
371
  end
334
372
 
335
- protected :add_dir, :add_file, :rm_dir, :rm_file, :update_dir, :glob_me
373
+ protected :add_dir, :add_file, :rm_dir, :rm_file
336
374
  end
337
375
 
338
376
  end