cantemo-portal-agent 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -35,6 +35,7 @@ module Envoi::Mam::Cantemo
35
35
  end
36
36
 
37
37
  def add_to_ignore(wf, file)
38
+ logger.debug { "Adding path to ignore cache: '#{file.path}'" }
38
39
  @ignored_file_paths_by_watch_folder[wf] << file.path
39
40
  end
40
41
 
@@ -64,8 +65,9 @@ module Envoi::Mam::Cantemo
64
65
  if exclude
65
66
  next if ignored_files.include?(file.path)
66
67
  if [*exclude].find { |ep| File.fnmatch(ep, file.path) }
67
- logger.debug { "Adding File to Ignore Cache: '#{file.path}'"}
68
- ignored_files << file.path
68
+ add_to_ignore(wf, file)
69
+ # logger.debug { "Adding File to Ignore Cache: '#{file.path}'"}
70
+ # ignored_files << file.path
69
71
  next
70
72
  end
71
73
  end
@@ -93,6 +95,7 @@ module Envoi::Mam::Cantemo
93
95
  AWF.run_once(watch_folders) { |wf| process_watch_folder(wf) }
94
96
  end
95
97
 
98
+ # Initialize then run
96
99
  def self.run(args)
97
100
  w = self.new(args)
98
101
  w.run
@@ -0,0 +1,426 @@
1
+ require 'envoi/mam/cantemo/agent'
2
+ require 'envoi/aspera/watch_service/watch_folder'
3
+ require 'envoi/watch_folder_utility/watch_folder/handler/listen'
4
+
5
+ module Envoi::Mam::Cantemo
6
+
7
+ class Agent
8
+
9
+ class WatchFolderManager
10
+
11
+ # AWF = Envoi::Aspera::WatchService::WatchFolder # Aspera Watch Folder
12
+ LWF = Envoi::WatchFolderUtility::WatchFolder::Handler::Listen # Listen Watch Folder
13
+
14
+ DEFAULT_WATCH_FOLDER_PROCESSOR_LIMIT = 10
15
+
16
+ attr_accessor :logger, :agent, :config, :watch_folder_defs, :watch_folders
17
+
18
+ def initialize(args = {})
19
+ initialize_logger(args)
20
+
21
+ logger.debug { 'Initializing Agent Watch Folder Manager.' }
22
+
23
+ logger.debug { 'Initializing Cantemo Portal Agent.' }
24
+ args[:default_preserve_file_path] = args.fetch(:default_preserve_file_path, false)
25
+ @agent = Envoi::Mam::Cantemo::Agent.load_from_config_file(args)
26
+ logger.debug { 'Cantemo Portal Agent Initialized.' }
27
+
28
+ @config = agent.config
29
+ cantemo_config = config[:cantemo] || config['cantemo']
30
+ @watch_folder_defs = cantemo_config[:watch_folders] || cantemo_config['watch_folders']
31
+
32
+ @ignored_file_paths_by_watch_folder = Hash.new { |h, k| h[k] = [] }
33
+ @ignored_file_paths_lock = Mutex.new
34
+
35
+ @threaded = args.fetch(:threaded, true)
36
+
37
+ @default_maximum_active_processors = DEFAULT_WATCH_FOLDER_PROCESSOR_LIMIT
38
+ @processors_by_watch_folder = Hash.new { |h, k| h[k] = {} }
39
+
40
+ process_watch_folder_defs
41
+ end
42
+
43
+ def initialize_logger(args = {})
44
+ @logger = args[:logger] ||= Logger.new(args[:log_to] || STDOUT)
45
+ log_level = args[:log_level] = Logger::DEBUG
46
+ if log_level
47
+ @logger.level = log_level
48
+ args[:logger] = @logger
49
+ end
50
+ @logger
51
+ end
52
+
53
+ # @TODO Move most of this code into a watch folder class
54
+ #
55
+ # @param [Hash] watch_folder_def
56
+ # @option watch_folder_def [String] path
57
+ # @option watch_folder_def [String] upload_to_storage_id
58
+ # @option watch_folder_def [String] name (path)
59
+ # @option watch_folder_def [Array<String>] paths ([path])
60
+ # @option watch_folder_def [String] exclude ('**/.*')
61
+ # @option watch_folder_def [Array<string>] excludes ([exclude])
62
+ # @option watch_folder_def [String] include
63
+ # @option watch_folder_def [Array<String>] includes ([include])
64
+ # @option watch_folder_def [String] quarantine_directory_path
65
+ # @option watch_folder_def [String] completed_directory_path
66
+ # @option watch_folder_def [Integer|False] maximum_active_processors (@default_maximum_active_processors)
67
+ # @option watch_folder_def [Hash] logging
68
+ def process_watch_folder_def(watch_folder_def)
69
+ logger.debug { "Initializing Watch Folder #{watch_folder_def.inspect}" }
70
+
71
+ logger.debug { "Initializing parameter 'paths'." }
72
+ name = watch_folder_def['name']
73
+
74
+ path = watch_folder_def['path']
75
+
76
+ paths = watch_folder_def['paths'] ||= []
77
+ paths = [ paths ] if paths.is_a?(String)
78
+ paths.concat [*path] if path
79
+ paths.map! { |p| File.expand_path(p) }
80
+ if paths.empty?
81
+ name_as_path = File.expand_path(name)
82
+ paths.concat name_as_path if Dir.exist?(name_as_path)
83
+ end
84
+ paths.uniq!
85
+ watch_folder_def['paths'] = paths
86
+ # watch_folder_def['path'] ||= paths.first if paths.length == 1
87
+ watch_folder_def.delete('path')
88
+
89
+ if paths.empty?
90
+ logger.error { "Failed to initialize watch folder. No path found in watch folder definition." }
91
+ return false
92
+ end
93
+ logger.debug { "Parameter 'paths' initialized." }
94
+
95
+ logger.debug { "Initializing parameter 'includes'." }
96
+ include = watch_folder_def['include']
97
+ includes = (watch_folder_def['includes'] ||= [])
98
+ includes.concat [*include] if include
99
+ includes.uniq!
100
+ includes.map! { |e| Regexp.try_convert(e) || e }
101
+ watch_folder_def['includes'] = includes
102
+ watch_folder_def.delete('include')
103
+ logger.debug { "Parameter `includes` initialized." }
104
+
105
+ logger.debug { "Initializing parameter 'excludes'." }
106
+ exclude = watch_folder_def['exclude']
107
+ exclude ||= '**/.*'
108
+ excludes = (watch_folder_def['excludes'] ||= [])
109
+ excludes.concat [*exclude] if exclude
110
+ excludes.uniq!
111
+ excludes.map! { |e| Regexp.try_convert(e) || e }
112
+ watch_folder_def['excludes'] = excludes
113
+ watch_folder_def.delete('exclude')
114
+ logger.debug { "Parameter `excludes` initialized." }
115
+
116
+
117
+ logger.debug { "Initializing parameter `quarantine directory path`." }
118
+ quarantine_directory_path = watch_folder_def['quarantine_directory_path'] || watch_folder_def['quarantine_path']
119
+ if quarantine_directory_path
120
+ quarantine_directory_path = File.expand_path(quarantine_directory_path)
121
+ watch_folder_def['quarantine_directory_path'] = quarantine_directory_path
122
+
123
+ unless Dir.exist?(quarantine_directory_path)
124
+ logger.warn { "Quarantine directory path '#{quarantine_directory_path}' does not exist. Files will be ignored instead." }
125
+ end
126
+ end
127
+ watch_folder_def.delete('quarantine_path')
128
+ logger.debug { "Parameter `quarantine directory path` initialized." }
129
+
130
+ logger.debug { "Initializing parameter 'completed directory path'." }
131
+ completed_directory_path = watch_folder_def['completed_directory_path'] || watch_folder_def['completed_path']
132
+ if completed_directory_path
133
+ completed_directory_path = File.expand_path(completed_directory_path)
134
+ watch_folder_def['completed_directory_path'] = completed_directory_path
135
+
136
+ unless Dir.exist?(completed_directory_path)
137
+ logger.warn { "Completed directory path '#{completed_directory_path}' does not exist. File will be ignored instead." }
138
+ end
139
+ end
140
+ watch_folder_def.delete('completed_path')
141
+ logger.debug { "Parameter 'completed directory path' initialized." }
142
+
143
+
144
+ logger.debug { "Initializing parameter `upload to storage id`." }
145
+ storage_id = watch_folder_def['upload_to_storage_id'] || watch_folder_def['storage_id']
146
+ watch_folder_def['upload_to_storage_id'] ||= storage_id
147
+ watch_folder_def.delete('storage_id')
148
+ unless storage_id
149
+ logger.warn { "No `upload to storage id` specified. Uploading will be skipped for this watch folder." }
150
+ end
151
+ logger.debug { "Parameter 'upload to storage id' initialized." }
152
+
153
+ maximum_active_processors = watch_folder_def['maximum_active_processors']
154
+ if maximum_active_processors.nil?
155
+ maximum_active_processors = @default_maximum_active_processors
156
+ watch_folder_def['maximum_active_processors'] = maximum_active_processors
157
+ end
158
+
159
+ args_out = {}
160
+ logging = watch_folder_def['logging'] || watch_folder_def
161
+ log_to = logging['log_to']
162
+ log_level = logging['log_level']
163
+ args_out[:log_to] ||= log_to if log_to && !log_to.empty?
164
+ args_out[:log_level] ||= log_level if log_level && !log_level.empty?
165
+ args_out[:logger] ||= logger.dup unless log_to
166
+ args_out[:definition] = watch_folder_def
167
+
168
+ logger.debug { "Initializing Watch Folder Instance from Def. #{watch_folder_def}" }
169
+ watch_folder = LWF.new(args_out)
170
+ logger.debug { "Watch Folder Instance Created." }
171
+ watch_folder
172
+ end
173
+
174
+ # Iterates through watch_folder_defs and populates @watch_folders with watch folders initialized from the watch
175
+ # folder definitions
176
+ def process_watch_folder_defs
177
+ logger.debug { 'Processing watch folder definitions.' }
178
+ if watch_folder_defs.is_a?(Array)
179
+ @watch_folders = watch_folder_defs.map { |watch_folder_def| process_watch_folder_def(watch_folder_def) }
180
+ elsif watch_folder_defs.is_a?(Hash)
181
+ @watch_folders = watch_folder_defs.map do |name, watch_folder_def|
182
+ watch_folder_def['name'] ||= name
183
+ process_watch_folder_def(watch_folder_def)
184
+ end
185
+ else
186
+ raise "Unhandled format: #{watch_folder_defs.class.name}"
187
+ end
188
+ @watch_folders.keep_if { |wf| wf }
189
+ logger.debug { 'Processing of watch folder definitions completed.' }
190
+ end
191
+
192
+ # @param [Object] watch_folder
193
+ # @param [Object] file
194
+ def add_to_ignore(watch_folder, file)
195
+ logger.debug { "Adding File to Ignore Cache: '#{file.path}'" }
196
+ @ignored_file_paths_lock.synchronize do
197
+ @ignored_file_paths_by_watch_folder[watch_folder] << file.path
198
+ file.ignore if file.respond_to?(:ignore)
199
+ end
200
+ end
201
+
202
+ # @param [Object] watch_folder
203
+ # @param [Object] file
204
+ # @return [FalseClass]
205
+ def process_file(watch_folder, file)
206
+ file.processing = true
207
+ file_name = file.name || file.path
208
+ logger.debug { "Processing File '#{file_name}'" }
209
+
210
+ storage_id = watch_folder.definition['upload_to_storage_id']
211
+ quarantine_directory_path = watch_folder.definition['quarantine_directory_path']
212
+ completed_directory_path = watch_folder.definition['completed_directory_path']
213
+
214
+ # full_file_path = File.join(watch_folder.path, file.path)
215
+ full_file_path = file.path
216
+ unless storage_id
217
+ logger.warn { "Skipping processing of file because of missing storage ID." }
218
+ return { success: false, message: 'Missing storage ID.' }
219
+ end
220
+
221
+ _response = agent.upload(file_path: full_file_path, storage_id: storage_id)
222
+ _response = { success: _response } if _response == true || _response == false
223
+
224
+ if _response[:success]
225
+ if completed_directory_path
226
+ if Dir.exist?(completed_directory_path)
227
+ logger.debug { "Moving '#{full_file_path}' to completed directory path '#{completed_directory_path}'" }
228
+ FileUtils.mv full_file_path, completed_directory_path
229
+ else
230
+ logger.warn { "Completed directory path not found: '#{completed_directory_path}'" }
231
+ add_to_ignore(watch_folder, file)
232
+ end
233
+ else
234
+ FileUtils.rm full_file_path
235
+ end
236
+ else
237
+ if quarantine_directory_path && Dir.exist?(quarantine_directory_path)
238
+ logger.warn { "Moving '#{full_file_path}' to quarantine directory path '#{quarantine_directory_path}'" }
239
+ FileUtils.mv full_file_path, quarantine_directory_path
240
+ else
241
+ logger.warn { "Adding '#{full_file_path}' to the temporary ignore list." }
242
+ add_to_ignore(watch_folder, file)
243
+ end
244
+ end
245
+
246
+ file.processed = true
247
+
248
+ _response
249
+ rescue => e
250
+ file.exception = e
251
+ raise e
252
+ ensure
253
+ file.processing = false
254
+ end
255
+
256
+ # Used to compare file to patterns
257
+ def find_in_patterns(patterns, file)
258
+ patterns.find do |pattern|
259
+ matched = pattern.is_a?(Regexp) ? pattern.match(file.path) : File.fnmatch(pattern, file.path)
260
+ logger.debug { "#{pattern} #{matched ? 'matched' : "didn't match"} #{file.path}" }
261
+ matched
262
+ end
263
+ end
264
+
265
+ def process_watch_folder_stable_files(wf, stable_files)
266
+ active_processors = @processors_by_watch_folder[wf]
267
+ maximum_active_processors = wf.definition['maximum_active_processors']
268
+
269
+ includes = wf.definition['includes']
270
+ excludes = wf.definition['excludes']
271
+
272
+ ignored_file_paths = @ignored_file_paths_by_watch_folder[wf]
273
+ stable_files.each do |file|
274
+ next if ignored_file_paths.include?(file.path)
275
+ next if file.processing
276
+ next if file.processed
277
+
278
+ if includes && !includes.empty?
279
+ should_include = find_in_patterns(includes, file)
280
+ unless should_include
281
+ add_to_ignore(wf, file)
282
+ next
283
+ end
284
+ end
285
+
286
+ should_exclude = find_in_patterns(excludes, file)
287
+ if should_exclude
288
+ add_to_ignore(wf, file)
289
+ next
290
+ end
291
+
292
+ if @threaded
293
+ active_processors.keep_if { |k, v| k.processing }
294
+ if active_processors.length > maximum_active_processors
295
+ logger.debug { "Maximum number of active processors reached for watch folder. #{wf.name || wf.paths}" }
296
+ next
297
+ end
298
+ t = Thread.new(wf, file) do |wf, file|
299
+ begin
300
+ process_file(wf, file)
301
+ rescue => e
302
+ logger.error { "Exception '#{e.message}' in thread for `#{wf.name || wf.paths}` `#{file.path}`. " }
303
+ raise e
304
+ ensure
305
+ file.processing = false rescue nil
306
+ end
307
+ end
308
+ t.join
309
+ active_processors[file] = t if file.processing
310
+ else
311
+ process_file(wf, file)
312
+ end
313
+
314
+ end
315
+
316
+ end
317
+
318
+ # @TODO Thread this
319
+ def poll
320
+ stable_files_by_watch_folder = {} # Hash.new { |h, k| h[k] = [] }
321
+ watch_folders.each do |watch_folder|
322
+ if !watch_folder.last_poll_time || (Time.now - watch_folder.last_poll_time) >= watch_folder.poll_interval
323
+ stable_files = watch_folder.poll
324
+ stable_files_by_watch_folder[watch_folder] = stable_files
325
+ end
326
+ end
327
+ stable_files_by_watch_folder.each do |watch_folder, stable_files|
328
+ process_watch_folder_stable_files(watch_folder, stable_files)
329
+ end
330
+ end
331
+
332
+ # The main execution method
333
+ def run
334
+ # AWF.run_once(watch_folders) { |wf| pp wf }
335
+ # AWF.run(watch_folders) { |wf| process_watch_folder(wf) }
336
+
337
+ logger.info { 'Running...' }
338
+ watch_folders.map { |wf| wf.run if wf.respond_to?(:run) }
339
+ loop do
340
+ begin
341
+ poll
342
+ sleep 1
343
+ rescue Interrupt, SystemExit => e
344
+ logger.debug { "Received Signal: #{e.class.name}" }
345
+ break
346
+ end
347
+ end
348
+ logger.info { 'Exiting...' }
349
+ rescue => e
350
+ logger.error { "An error occurred.\n#{e.message}\n#{e.backtrace.join("\n")}\n#{e.message}" }
351
+ raise e
352
+ ensure
353
+ watch_folders.each { |wf| wf.stop if wf.respond_to?(:stop) }
354
+ end
355
+
356
+ def run_once
357
+ # AWF.run_once(watch_folders) { |wf| process_watch_folder(wf) }
358
+ end
359
+
360
+ def self.run(args)
361
+ w = self.new(args)
362
+ w.run
363
+ rescue => e
364
+ abort("An error occurred.\n#{e.message}\n#{e.message}")
365
+ end
366
+
367
+ def self.daemons_run_proc_with_cleanup(options, &block)
368
+ if block_given?
369
+ options[:mode] = :proc
370
+ options[:proc] = block
371
+ end
372
+
373
+ controller = Daemons::Controller.new(options, ARGV)
374
+ _command, _controller_part, _app_part = controller.class.split_argv(ARGV)
375
+
376
+ controller_group = Daemons::ApplicationGroup.new(controller.app_name, controller.options)
377
+ controller_group.controller_argv = _controller_part
378
+ controller_group.app_argv = _app_part
379
+
380
+ controller_group.setup
381
+ applications = controller_group.applications
382
+
383
+ is_running = applications.find { |a| a.running? }
384
+ if !applications.empty?
385
+ puts "Found #{applications.length} existing pid file(s) #{applications.map { |a| a.pid.pid }}"
386
+ should_zap_all = !is_running || (applications.length == 1 && applications.first.pid.pid == 0)
387
+ if should_zap_all
388
+ warn "Found stale pid file(s)"
389
+ controller_group.zap_all
390
+ controller_group.options[:force] = true
391
+ # controller_group.applications = []
392
+
393
+ controller.options[:force] = true
394
+ end
395
+ end
396
+
397
+ Daemons.run_proc(options[:app_name], options, &block)
398
+ # controller.catch_exceptions do
399
+ # controller.run
400
+ # end
401
+
402
+ end
403
+
404
+ def self.run_as_daemon(args, options = {})
405
+ # ARGV.unshift 'run' unless %w(start stop restart run zap killall status).include? ARGV.first
406
+ require 'daemons'
407
+ # Daemons.run_proc('cantemo-portal-watch-folders', { force: true }) { self.run(args) }
408
+ app_name = 'cantemo-portal-watch-folders'
409
+ proc = Proc.new { self.run(args) }
410
+
411
+ options[:app_name] = app_name
412
+ options[:mode] = :proc
413
+ options[:dir_mode] = :normal
414
+ options[:dir] = File.split(__FILE__)[0]
415
+
416
+ daemons_run_proc_with_cleanup(options, &proc)
417
+
418
+ # app_controller = Daemons::Controller.new(options, ARGV)
419
+ # Daemons.run_proc(app_name, options, &proc)
420
+ end
421
+
422
+ end
423
+
424
+ end
425
+
426
+ end