cantemo-portal-agent 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,6 +35,7 @@ module Envoi::Mam::Cantemo
35
35
  end
36
36
 
37
37
  def add_to_ignore(wf, file)
38
+ logger.debug { "Adding path to ignore cache: '#{file.path}'" }
38
39
  @ignored_file_paths_by_watch_folder[wf] << file.path
39
40
  end
40
41
 
@@ -64,8 +65,9 @@ module Envoi::Mam::Cantemo
64
65
  if exclude
65
66
  next if ignored_files.include?(file.path)
66
67
  if [*exclude].find { |ep| File.fnmatch(ep, file.path) }
67
- logger.debug { "Adding File to Ignore Cache: '#{file.path}'"}
68
- ignored_files << file.path
68
+ add_to_ignore(wf, file)
69
+ # logger.debug { "Adding File to Ignore Cache: '#{file.path}'"}
70
+ # ignored_files << file.path
69
71
  next
70
72
  end
71
73
  end
@@ -93,6 +95,7 @@ module Envoi::Mam::Cantemo
93
95
  AWF.run_once(watch_folders) { |wf| process_watch_folder(wf) }
94
96
  end
95
97
 
98
+ # Initialize then run
96
99
  def self.run(args)
97
100
  w = self.new(args)
98
101
  w.run
@@ -0,0 +1,426 @@
1
+ require 'envoi/mam/cantemo/agent'
2
+ require 'envoi/aspera/watch_service/watch_folder'
3
+ require 'envoi/watch_folder_utility/watch_folder/handler/listen'
4
+
5
+ module Envoi::Mam::Cantemo
6
+
7
+ class Agent
8
+
9
+ class WatchFolderManager
10
+
11
+ # AWF = Envoi::Aspera::WatchService::WatchFolder # Aspera Watch Folder
12
+ LWF = Envoi::WatchFolderUtility::WatchFolder::Handler::Listen # Listen Watch Folder
13
+
14
+ DEFAULT_WATCH_FOLDER_PROCESSOR_LIMIT = 10
15
+
16
+ attr_accessor :logger, :agent, :config, :watch_folder_defs, :watch_folders
17
+
18
+ def initialize(args = {})
19
+ initialize_logger(args)
20
+
21
+ logger.debug { 'Initializing Agent Watch Folder Manager.' }
22
+
23
+ logger.debug { 'Initializing Cantemo Portal Agent.' }
24
+ args[:default_preserve_file_path] = args.fetch(:default_preserve_file_path, false)
25
+ @agent = Envoi::Mam::Cantemo::Agent.load_from_config_file(args)
26
+ logger.debug { 'Cantemo Portal Agent Initialized.' }
27
+
28
+ @config = agent.config
29
+ cantemo_config = config[:cantemo] || config['cantemo']
30
+ @watch_folder_defs = cantemo_config[:watch_folders] || cantemo_config['watch_folders']
31
+
32
+ @ignored_file_paths_by_watch_folder = Hash.new { |h, k| h[k] = [] }
33
+ @ignored_file_paths_lock = Mutex.new
34
+
35
+ @threaded = args.fetch(:threaded, true)
36
+
37
+ @default_maximum_active_processors = DEFAULT_WATCH_FOLDER_PROCESSOR_LIMIT
38
+ @processors_by_watch_folder = Hash.new { |h, k| h[k] = {} }
39
+
40
+ process_watch_folder_defs
41
+ end
42
+
43
+ def initialize_logger(args = {})
44
+ @logger = args[:logger] ||= Logger.new(args[:log_to] || STDOUT)
45
+ log_level = args[:log_level] = Logger::DEBUG
46
+ if log_level
47
+ @logger.level = log_level
48
+ args[:logger] = @logger
49
+ end
50
+ @logger
51
+ end
52
+
53
+ # @TODO Move most of this code into a watch folder class
54
+ #
55
+ # @param [Hash] watch_folder_def
56
+ # @option watch_folder_def [String] path
57
+ # @option watch_folder_def [String] upload_to_storage_id
58
+ # @option watch_folder_def [String] name (path)
59
+ # @option watch_folder_def [Array<String>] paths ([path])
60
+ # @option watch_folder_def [String] exclude ('**/.*')
61
+ # @option watch_folder_def [Array<string>] excludes ([exclude])
62
+ # @option watch_folder_def [String] include
63
+ # @option watch_folder_def [Array<String>] includes ([include])
64
+ # @option watch_folder_def [String] quarantine_directory_path
65
+ # @option watch_folder_def [String] completed_directory_path
66
+ # @option watch_folder_def [Integer|False] maximum_active_processors (@default_maximum_active_processors)
67
+ # @option watch_folder_def [Hash] logging
68
+ def process_watch_folder_def(watch_folder_def)
69
+ logger.debug { "Initializing Watch Folder #{watch_folder_def.inspect}" }
70
+
71
+ logger.debug { "Initializing parameter 'paths'." }
72
+ name = watch_folder_def['name']
73
+
74
+ path = watch_folder_def['path']
75
+
76
+ paths = watch_folder_def['paths'] ||= []
77
+ paths = [ paths ] if paths.is_a?(String)
78
+ paths.concat [*path] if path
79
+ paths.map! { |p| File.expand_path(p) }
80
+ if paths.empty?
81
+ name_as_path = File.expand_path(name)
82
+ paths.concat name_as_path if Dir.exist?(name_as_path)
83
+ end
84
+ paths.uniq!
85
+ watch_folder_def['paths'] = paths
86
+ # watch_folder_def['path'] ||= paths.first if paths.length == 1
87
+ watch_folder_def.delete('path')
88
+
89
+ if paths.empty?
90
+ logger.error { "Failed to initialize watch folder. No path found in watch folder definition." }
91
+ return false
92
+ end
93
+ logger.debug { "Parameter 'paths' initialized." }
94
+
95
+ logger.debug { "Initializing parameter 'includes'." }
96
+ include = watch_folder_def['include']
97
+ includes = (watch_folder_def['includes'] ||= [])
98
+ includes.concat [*include] if include
99
+ includes.uniq!
100
+ includes.map! { |e| Regexp.try_convert(e) || e }
101
+ watch_folder_def['includes'] = includes
102
+ watch_folder_def.delete('include')
103
+ logger.debug { "Parameter `includes` initialized." }
104
+
105
+ logger.debug { "Initializing parameter 'excludes'." }
106
+ exclude = watch_folder_def['exclude']
107
+ exclude ||= '**/.*'
108
+ excludes = (watch_folder_def['excludes'] ||= [])
109
+ excludes.concat [*exclude] if exclude
110
+ excludes.uniq!
111
+ excludes.map! { |e| Regexp.try_convert(e) || e }
112
+ watch_folder_def['excludes'] = excludes
113
+ watch_folder_def.delete('exclude')
114
+ logger.debug { "Parameter `excludes` initialized." }
115
+
116
+
117
+ logger.debug { "Initializing parameter `quarantine directory path`." }
118
+ quarantine_directory_path = watch_folder_def['quarantine_directory_path'] || watch_folder_def['quarantine_path']
119
+ if quarantine_directory_path
120
+ quarantine_directory_path = File.expand_path(quarantine_directory_path)
121
+ watch_folder_def['quarantine_directory_path'] = quarantine_directory_path
122
+
123
+ unless Dir.exist?(quarantine_directory_path)
124
+ logger.warn { "Quarantine directory path '#{quarantine_directory_path}' does not exist. Files will be ignored instead." }
125
+ end
126
+ end
127
+ watch_folder_def.delete('quarantine_path')
128
+ logger.debug { "Parameter `quarantine directory path` initialized." }
129
+
130
+ logger.debug { "Initializing parameter 'completed directory path'." }
131
+ completed_directory_path = watch_folder_def['completed_directory_path'] || watch_folder_def['completed_path']
132
+ if completed_directory_path
133
+ completed_directory_path = File.expand_path(completed_directory_path)
134
+ watch_folder_def['completed_directory_path'] = completed_directory_path
135
+
136
+ unless Dir.exist?(completed_directory_path)
137
+ logger.warn { "Completed directory path '#{completed_directory_path}' does not exist. File will be ignored instead." }
138
+ end
139
+ end
140
+ watch_folder_def.delete('completed_path')
141
+ logger.debug { "Parameter 'completed directory path' initialized." }
142
+
143
+
144
+ logger.debug { "Initializing parameter `upload to storage id`." }
145
+ storage_id = watch_folder_def['upload_to_storage_id'] || watch_folder_def['storage_id']
146
+ watch_folder_def['upload_to_storage_id'] ||= storage_id
147
+ watch_folder_def.delete('storage_id')
148
+ unless storage_id
149
+ logger.warn { "No `upload to storage id` specified. Uploading will be skipped for this watch folder." }
150
+ end
151
+ logger.debug { "Parameter 'upload to storage id' initialized." }
152
+
153
+ maximum_active_processors = watch_folder_def['maximum_active_processors']
154
+ if maximum_active_processors.nil?
155
+ maximum_active_processors = @default_maximum_active_processors
156
+ watch_folder_def['maximum_active_processors'] = maximum_active_processors
157
+ end
158
+
159
+ args_out = {}
160
+ logging = watch_folder_def['logging'] || watch_folder_def
161
+ log_to = logging['log_to']
162
+ log_level = logging['log_level']
163
+ args_out[:log_to] ||= log_to if log_to && !log_to.empty?
164
+ args_out[:log_level] ||= log_level if log_level && !log_level.empty?
165
+ args_out[:logger] ||= logger.dup unless log_to
166
+ args_out[:definition] = watch_folder_def
167
+
168
+ logger.debug { "Initializing Watch Folder Instance from Def. #{watch_folder_def}" }
169
+ watch_folder = LWF.new(args_out)
170
+ logger.debug { "Watch Folder Instance Created." }
171
+ watch_folder
172
+ end
173
+
174
+ # Iterates through watch_folder_defs and populates @watch_folders with watch folders initialized from the watch
175
+ # folder definitions
176
+ def process_watch_folder_defs
177
+ logger.debug { 'Processing watch folder definitions.' }
178
+ if watch_folder_defs.is_a?(Array)
179
+ @watch_folders = watch_folder_defs.map { |watch_folder_def| process_watch_folder_def(watch_folder_def) }
180
+ elsif watch_folder_defs.is_a?(Hash)
181
+ @watch_folders = watch_folder_defs.map do |name, watch_folder_def|
182
+ watch_folder_def['name'] ||= name
183
+ process_watch_folder_def(watch_folder_def)
184
+ end
185
+ else
186
+ raise "Unhandled format: #{watch_folder_defs.class.name}"
187
+ end
188
+ @watch_folders.keep_if { |wf| wf }
189
+ logger.debug { 'Processing of watch folder definitions completed.' }
190
+ end
191
+
192
+ # @param [Object] watch_folder
193
+ # @param [Object] file
194
+ def add_to_ignore(watch_folder, file)
195
+ logger.debug { "Adding File to Ignore Cache: '#{file.path}'" }
196
+ @ignored_file_paths_lock.synchronize do
197
+ @ignored_file_paths_by_watch_folder[watch_folder] << file.path
198
+ file.ignore if file.respond_to?(:ignore)
199
+ end
200
+ end
201
+
202
+ # @param [Object] watch_folder
203
+ # @param [Object] file
204
+ # @return [FalseClass]
205
+ def process_file(watch_folder, file)
206
+ file.processing = true
207
+ file_name = file.name || file.path
208
+ logger.debug { "Processing File '#{file_name}'" }
209
+
210
+ storage_id = watch_folder.definition['upload_to_storage_id']
211
+ quarantine_directory_path = watch_folder.definition['quarantine_directory_path']
212
+ completed_directory_path = watch_folder.definition['completed_directory_path']
213
+
214
+ # full_file_path = File.join(watch_folder.path, file.path)
215
+ full_file_path = file.path
216
+ unless storage_id
217
+ logger.warn { "Skipping processing of file because of missing storage ID." }
218
+ return { success: false, message: 'Missing storage ID.' }
219
+ end
220
+
221
+ _response = agent.upload(file_path: full_file_path, storage_id: storage_id)
222
+ _response = { success: _response } if _response == true || _response == false
223
+
224
+ if _response[:success]
225
+ if completed_directory_path
226
+ if Dir.exist?(completed_directory_path)
227
+ logger.debug { "Moving '#{full_file_path}' to completed directory path '#{completed_directory_path}'" }
228
+ FileUtils.mv full_file_path, completed_directory_path
229
+ else
230
+ logger.warn { "Completed directory path not found: '#{completed_directory_path}'" }
231
+ add_to_ignore(watch_folder, file)
232
+ end
233
+ else
234
+ FileUtils.rm full_file_path
235
+ end
236
+ else
237
+ if quarantine_directory_path && Dir.exist?(quarantine_directory_path)
238
+ logger.warn { "Moving '#{full_file_path}' to quarantine directory path '#{quarantine_directory_path}'" }
239
+ FileUtils.mv full_file_path, quarantine_directory_path
240
+ else
241
+ logger.warn { "Adding '#{full_file_path}' to the temporary ignore list." }
242
+ add_to_ignore(watch_folder, file)
243
+ end
244
+ end
245
+
246
+ file.processed = true
247
+
248
+ _response
249
+ rescue => e
250
+ file.exception = e
251
+ raise e
252
+ ensure
253
+ file.processing = false
254
+ end
255
+
256
+ # Used to compare file to patterns
257
+ def find_in_patterns(patterns, file)
258
+ patterns.find do |pattern|
259
+ matched = pattern.is_a?(Regexp) ? pattern.match(file.path) : File.fnmatch(pattern, file.path)
260
+ logger.debug { "#{pattern} #{matched ? 'matched' : "didn't match"} #{file.path}" }
261
+ matched
262
+ end
263
+ end
264
+
265
+ def process_watch_folder_stable_files(wf, stable_files)
266
+ active_processors = @processors_by_watch_folder[wf]
267
+ maximum_active_processors = wf.definition['maximum_active_processors']
268
+
269
+ includes = wf.definition['includes']
270
+ excludes = wf.definition['excludes']
271
+
272
+ ignored_file_paths = @ignored_file_paths_by_watch_folder[wf]
273
+ stable_files.each do |file|
274
+ next if ignored_file_paths.include?(file.path)
275
+ next if file.processing
276
+ next if file.processed
277
+
278
+ if includes && !includes.empty?
279
+ should_include = find_in_patterns(includes, file)
280
+ unless should_include
281
+ add_to_ignore(wf, file)
282
+ next
283
+ end
284
+ end
285
+
286
+ should_exclude = find_in_patterns(excludes, file)
287
+ if should_exclude
288
+ add_to_ignore(wf, file)
289
+ next
290
+ end
291
+
292
+ if @threaded
293
+ active_processors.keep_if { |k, v| k.processing }
294
+ if active_processors.length > maximum_active_processors
295
+ logger.debug { "Maximum number of active processors reached for watch folder. #{wf.name || wf.paths}" }
296
+ next
297
+ end
298
+ t = Thread.new(wf, file) do |wf, file|
299
+ begin
300
+ process_file(wf, file)
301
+ rescue => e
302
+ logger.error { "Exception '#{e.message}' in thread for `#{wf.name || wf.paths}` `#{file.path}`. " }
303
+ raise e
304
+ ensure
305
+ file.processing = false rescue nil
306
+ end
307
+ end
308
+ t.join
309
+ active_processors[file] = t if file.processing
310
+ else
311
+ process_file(wf, file)
312
+ end
313
+
314
+ end
315
+
316
+ end
317
+
318
+ # @TODO Thread this
319
+ def poll
320
+ stable_files_by_watch_folder = {} # Hash.new { |h, k| h[k] = [] }
321
+ watch_folders.each do |watch_folder|
322
+ if !watch_folder.last_poll_time || (Time.now - watch_folder.last_poll_time) >= watch_folder.poll_interval
323
+ stable_files = watch_folder.poll
324
+ stable_files_by_watch_folder[watch_folder] = stable_files
325
+ end
326
+ end
327
+ stable_files_by_watch_folder.each do |watch_folder, stable_files|
328
+ process_watch_folder_stable_files(watch_folder, stable_files)
329
+ end
330
+ end
331
+
332
+ # The main execution method
333
+ def run
334
+ # AWF.run_once(watch_folders) { |wf| pp wf }
335
+ # AWF.run(watch_folders) { |wf| process_watch_folder(wf) }
336
+
337
+ logger.info { 'Running...' }
338
+ watch_folders.map { |wf| wf.run if wf.respond_to?(:run) }
339
+ loop do
340
+ begin
341
+ poll
342
+ sleep 1
343
+ rescue Interrupt, SystemExit => e
344
+ logger.debug { "Received Signal: #{e.class.name}" }
345
+ break
346
+ end
347
+ end
348
+ logger.info { 'Exiting...' }
349
+ rescue => e
350
+ logger.error { "An error occurred.\n#{e.message}\n#{e.backtrace.join("\n")}\n#{e.message}" }
351
+ raise e
352
+ ensure
353
+ watch_folders.each { |wf| wf.stop if wf.respond_to?(:stop) }
354
+ end
355
+
356
+ def run_once
357
+ # AWF.run_once(watch_folders) { |wf| process_watch_folder(wf) }
358
+ end
359
+
360
+ def self.run(args)
361
+ w = self.new(args)
362
+ w.run
363
+ rescue => e
364
+ abort("An error occurred.\n#{e.message}\n#{e.message}")
365
+ end
366
+
367
+ def self.daemons_run_proc_with_cleanup(options, &block)
368
+ if block_given?
369
+ options[:mode] = :proc
370
+ options[:proc] = block
371
+ end
372
+
373
+ controller = Daemons::Controller.new(options, ARGV)
374
+ _command, _controller_part, _app_part = controller.class.split_argv(ARGV)
375
+
376
+ controller_group = Daemons::ApplicationGroup.new(controller.app_name, controller.options)
377
+ controller_group.controller_argv = _controller_part
378
+ controller_group.app_argv = _app_part
379
+
380
+ controller_group.setup
381
+ applications = controller_group.applications
382
+
383
+ is_running = applications.find { |a| a.running? }
384
+ if !applications.empty?
385
+ puts "Found #{applications.length} existing pid file(s) #{applications.map { |a| a.pid.pid }}"
386
+ should_zap_all = !is_running || (applications.length == 1 && applications.first.pid.pid == 0)
387
+ if should_zap_all
388
+ warn "Found stale pid file(s)"
389
+ controller_group.zap_all
390
+ controller_group.options[:force] = true
391
+ # controller_group.applications = []
392
+
393
+ controller.options[:force] = true
394
+ end
395
+ end
396
+
397
+ Daemons.run_proc(options[:app_name], options, &block)
398
+ # controller.catch_exceptions do
399
+ # controller.run
400
+ # end
401
+
402
+ end
403
+
404
+ def self.run_as_daemon(args, options = {})
405
+ # ARGV.unshift 'run' unless %w(start stop restart run zap killall status).include? ARGV.first
406
+ require 'daemons'
407
+ # Daemons.run_proc('cantemo-portal-watch-folders', { force: true }) { self.run(args) }
408
+ app_name = 'cantemo-portal-watch-folders'
409
+ proc = Proc.new { self.run(args) }
410
+
411
+ options[:app_name] = app_name
412
+ options[:mode] = :proc
413
+ options[:dir_mode] = :normal
414
+ options[:dir] = File.split(__FILE__)[0]
415
+
416
+ daemons_run_proc_with_cleanup(options, &proc)
417
+
418
+ # app_controller = Daemons::Controller.new(options, ARGV)
419
+ # Daemons.run_proc(app_name, options, &proc)
420
+ end
421
+
422
+ end
423
+
424
+ end
425
+
426
+ end