magellan-gcs-proxy 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ # coding: utf-8
2
+ require 'magellan/gcs/proxy'
3
+
4
+ require 'yaml'
5
+ require 'erb'
6
+
7
+ module Magellan
8
+ module Gcs
9
+ module Proxy
10
+ class Config
11
+ attr_reader :path
12
+ def initialize(path = './config.yml')
13
+ @path = path
14
+ end
15
+
16
+ def data
17
+ @data ||= load_file
18
+ end
19
+
20
+ def reset
21
+ @data = nil
22
+ end
23
+
24
+ def load_file
25
+ erb = ERB.new(File.read(path), nil, '-')
26
+ erb.filename = path
27
+ t = erb.result
28
+ puts '=' * 100
29
+ puts t
30
+ puts '-' * 100
31
+ YAML.load(t)
32
+ end
33
+
34
+ def [](key)
35
+ data[key.to_s]
36
+ end
37
+
38
+ def verbose?
39
+ ENV['VERBOSE'] =~ /true|yes|on|1/i
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -1,6 +1,6 @@
1
1
  # coding: utf-8
2
- require "magellan/gcs/proxy"
3
- require "magellan/gcs/proxy/log"
2
+ require 'magellan/gcs/proxy'
3
+ require 'magellan/gcs/proxy/log'
4
4
 
5
5
  require 'fileutils'
6
6
  require 'uri'
@@ -10,30 +10,27 @@ module Magellan
10
10
  module Proxy
11
11
  class Context
12
12
  include Log
13
+ include Proxy::ProgressNotification
13
14
 
14
- attr_reader :workspace, :remote_download_files
15
- def initialize(workspace, remote_download_files)
16
- @workspace = workspace
17
- @remote_download_files = remote_download_files
15
+ attr_reader :message, :workspace, :remote_download_files
16
+ def initialize(message)
17
+ @message = message
18
+ @remote_download_files = parse_json(message.attributes['download_files'])
19
+ @workspace = nil
18
20
  end
19
21
 
20
- KEYS = [
21
- :workspace,
22
- :downloads_dir, :uploads_dir,
23
- :download_files,
24
- :local_download_files,
25
- :remote_download_files,
26
- ].freeze
27
-
28
- def [](key)
29
- case key.to_sym
30
- when *KEYS then send(key)
31
- else nil
22
+ def setup
23
+ Dir.mktmpdir 'workspace' do |dir|
24
+ @workspace = dir
25
+ setup_dirs
26
+ PubsubSustainer.run(message) do
27
+ yield
28
+ end
32
29
  end
33
30
  end
34
31
 
35
- def include?(key)
36
- KEYS.include?(key)
32
+ def ltsv(hash)
33
+ hash.map { |k, v| "#{k}:#{v}" }.join("\t")
37
34
  end
38
35
 
39
36
  def downloads_dir
@@ -47,16 +44,12 @@ module Magellan
47
44
  def local_download_files
48
45
  @local_download_files ||= build_local_files_obj(remote_download_files, download_mapping)
49
46
  end
50
- alias_method :download_files, :local_download_files
47
+ alias download_files local_download_files
51
48
 
52
49
  def uploads_dir
53
50
  File.join(workspace, 'uploads')
54
51
  end
55
52
 
56
- def setup
57
- setup_dirs
58
- end
59
-
60
53
  def download
61
54
  download_mapping.each do |url, path|
62
55
  FileUtils.mkdir_p File.dirname(path)
@@ -64,7 +57,7 @@ module Magellan
64
57
  uri = parse_uri(url)
65
58
  @last_bucket_name = uri.host
66
59
  bucket = GCP.storage.bucket(@last_bucket_name)
67
- file = bucket.file uri.path.sub(/\A\//, '')
60
+ file = bucket.file uri.path.sub(%r{\A/}, '')
68
61
  file.download(path)
69
62
  logger.info("Download OK: #{url} to #{path}")
70
63
  end
@@ -73,7 +66,7 @@ module Magellan
73
66
  def upload
74
67
  Dir.chdir(uploads_dir) do
75
68
  Dir.glob('**/*') do |path|
76
- next if File.directory?(path)
69
+ next if directory?(path)
77
70
  url = "gs://#{@last_bucket_name}/#{path}"
78
71
  logger.info("Uploading: #{path} to #{url}")
79
72
  bucket = GCP.storage.bucket(@last_bucket_name)
@@ -83,8 +76,12 @@ module Magellan
83
76
  end
84
77
  end
85
78
 
79
+ def directory?(path)
80
+ File.directory?(path)
81
+ end
82
+
86
83
  def setup_dirs
87
- [:downloads_dir, :uploads_dir].each{|k| Dir.mkdir(send(k))}
84
+ [:downloads_dir, :uploads_dir].each { |k| FileUtils.mkdir_p(send(k)) }
88
85
  end
89
86
 
90
87
  def build_mapping(base_dir, obj)
@@ -98,11 +95,16 @@ module Magellan
98
95
  case obj
99
96
  when nil then []
100
97
  when Hash then flatten_values(obj.values)
101
- when Array then obj.map{|i| flatten_values(i) }
98
+ when Array then obj.map { |i| flatten_values(i) }
102
99
  else obj
103
100
  end
104
101
  end
105
102
 
103
+ def parse_json(str)
104
+ return nil if str.nil? || str.empty?
105
+ JSON.parse(str)
106
+ end
107
+
106
108
  def parse_uri(str)
107
109
  uri = URI.parse(str)
108
110
  raise "Unsupported scheme #{uri.scheme.inspect} of #{str}" unless uri.scheme == 'gs'
@@ -111,13 +113,12 @@ module Magellan
111
113
 
112
114
  def build_local_files_obj(obj, mapping)
113
115
  case obj
114
- when Hash then obj.each_with_object({}){|(k,v), d| d[k] = build_local_files_obj(v, mapping)}
115
- when Array then obj.map{|i| build_local_files_obj(i, mapping)}
116
+ when Hash then obj.each_with_object({}) { |(k, v), d| d[k] = build_local_files_obj(v, mapping) }
117
+ when Array then obj.map { |i| build_local_files_obj(i, mapping) }
116
118
  when String then mapping[obj]
117
119
  else obj
118
120
  end
119
121
  end
120
-
121
122
  end
122
123
  end
123
124
  end
@@ -1,53 +1,45 @@
1
- require "magellan/gcs/proxy"
1
+ require 'magellan/gcs/proxy'
2
2
 
3
3
  module Magellan
4
4
  module Gcs
5
5
  module Proxy
6
6
  module ExpandVariable
7
-
8
7
  class InvalidReferenceError < StandardError
9
8
  end
10
9
 
11
10
  module_function
12
11
 
13
12
  def dig_variables(variable_ref, data)
14
- vars = variable_ref.split(".").map{|i| (/\A\d+\z/.match(i)) ? i.to_i : i }
15
- value = vars.inject(data) do |tmp, v|
16
- case v
17
- when String
18
- if tmp.respond_to?(:[]) && tmp.respond_to?(:include?)
19
- if tmp.include?(v)
20
- tmp[v]
21
- else
22
- raise InvalidReferenceError, variable_ref
23
- end
24
- else
25
- raise InvalidReferenceError, variable_ref
26
- end
27
- when Integer
28
- case tmp
29
- when Array
30
- if tmp.size > v
31
- tmp[v]
32
- else
33
- raise InvalidReferenceError, variable_ref
34
- end
35
- else
36
- raise InvalidReferenceError, variable_ref
37
- end
13
+ vars = variable_ref.split('.').map { |i| /\A\d+\z/ =~ i ? i.to_i : i }
14
+ vars.inject(data) do |tmp, v|
15
+ dig_variable(tmp, v, variable_ref)
16
+ end
17
+ end
18
+
19
+ def dig_variable(tmp, v, variable_ref)
20
+ case v
21
+ when String
22
+ if tmp.respond_to?(:[]) && tmp.respond_to?(:include?)
23
+ return tmp[v] if tmp.include?(v)
24
+ end
25
+ when Integer
26
+ case tmp
27
+ when Array
28
+ return tmp[v] if tmp.size > v
38
29
  end
39
30
  end
31
+ raise InvalidReferenceError, variable_ref
40
32
  end
41
33
 
42
34
  def expand_variables(str, data, quote_string: false)
43
35
  data ||= {}
44
- str.gsub(/\%\{\s*([\w.]+)\s*\}/) do |m|
36
+ str.gsub(/\%\{\s*([\w.]+)\s*\}/) do |_m|
45
37
  var = Regexp.last_match(1)
46
38
  value =
47
39
  begin
48
40
  dig_variables(var, data)
49
41
  rescue InvalidReferenceError
50
- ""
42
+ ''
51
43
  end
52
44
 
53
45
  case value
@@ -58,7 +50,6 @@ module Magellan
58
50
  end
59
51
  end
60
52
  end
61
-
62
53
  end
63
54
  end
64
55
  end
@@ -1,8 +1,10 @@
1
1
  # coding: utf-8
2
2
  require 'magellan/gcs/proxy'
3
3
 
4
- require "google/cloud/pubsub"
5
- require "google/cloud/storage"
4
+ require 'google/cloud/logging'
5
+ require 'google/cloud/logging/version'
6
+ require 'google/cloud/pubsub'
7
+ require 'google/cloud/storage'
6
8
  require 'net/http'
7
9
 
8
10
  module Magellan
@@ -24,7 +26,7 @@ module Magellan
24
26
 
25
27
  METADATA_HOST = 'metadata.google.internal'.freeze
26
28
  METADATA_PATH_BASE = '/computeMetadata/v1/'.freeze
27
- METADATA_HEADER = {"Metadata-Flavor" => "Google"}.freeze
29
+ METADATA_HEADER = { 'Metadata-Flavor' => 'Google' }.freeze
28
30
 
29
31
  def retrieve_metadata(key)
30
32
  http = Net::HTTP.new(METADATA_HOST)
@@ -51,8 +53,12 @@ module Magellan
51
53
  @subscription
52
54
  end
53
55
 
56
+ def logging
57
+ @logging ||= Google::Cloud::Logging.new(project: project_id)
58
+ end
59
+
54
60
  def reset
55
- instance_variables.each {|ivar| instance_variable_set(ivar, nil)}
61
+ instance_variables.each { |ivar| instance_variable_set(ivar, nil) }
56
62
  end
57
63
  end
58
64
  end
@@ -1,10 +1,76 @@
1
+ # coding: utf-8
1
2
  require 'logger'
3
+
2
4
  module Magellan
3
5
  module Gcs
4
6
  module Proxy
5
7
  module Log
8
+ module_function
9
+
10
+ def verbose(msg)
11
+ logger.debug(msg) if Proxy.config.verbose?
12
+ end
13
+
6
14
  def logger
7
- @logger ||= Logger.new($stdout)
15
+ @logger ||= build_logger(loggers)
16
+ end
17
+
18
+ def build_logger(loggers)
19
+ case loggers.length
20
+ when 0 then Logger.new('/dev/null')
21
+ when 1 then loggers.first
22
+ else CompositeLogger.new(loggers)
23
+ end
24
+ end
25
+
26
+ def loggers
27
+ @loggers ||= build_loggers
28
+ end
29
+
30
+ def build_loggers
31
+ (Proxy.config[:loggers] || []).map do |logger_def|
32
+ config = logger_def.dup
33
+ type = config.delete('type')
34
+ case type
35
+ when 'stdout' then Logger.new($stdout)
36
+ when 'stderr' then Logger.new($stderr)
37
+ when 'cloud_logging' then build_cloud_logging_logger(config)
38
+ else raise "Unsupported logger type: #{type} with #{config.inspect}"
39
+ end
40
+ end
41
+ end
42
+
43
+ CLOUD_LOGGING_RESOURCE_KEYS = [
44
+ :project_id,
45
+ :cluster_name,
46
+ :namespace_id,
47
+ :instance_id,
48
+ :pod_id,
49
+ :container_name,
50
+ :zone,
51
+ ].freeze
52
+
53
+ def build_cloud_logging_logger(config)
54
+ log_name = config['log_name']
55
+ return nil unless log_name
56
+ # container
57
+ # GKE Container A Google Container Engine (GKE) container instance.
58
+ # project_id: The identifier of the GCP project associated with this resource (e.g., my-project).
59
+ # cluster_name: An immutable name for the cluster the container is running in.
60
+ # namespace_id: Immutable ID of the cluster namespace the container is running in.
61
+ # instance_id: Immutable ID of the GCE instance the container is running in.
62
+ # pod_id: Immutable ID of the pod the container is running in.
63
+ # container_name: Immutable name of the container.
64
+ # zone: The GCE zone in which the instance is running.
65
+ # See https://cloud.google.com/logging/docs/api/v2/resource-list
66
+ options = CLOUD_LOGGING_RESOURCE_KEYS.each_with_object({}) do |key, d|
67
+ if v = ENV["BLOCKS_BATCH_CLOUD_LOGGING_#{key.to_s.upcase}"]
68
+ d[key] = v
69
+ end
70
+ end
71
+ resource = GCP.logging.resource 'container', options
72
+ Google::Cloud::Logging::Logger.new GCP.logging, log_name, resource,
73
+ magellan_gcs_proxy: Magellan::Gcs::Proxy::VERSION
8
74
  end
9
75
  end
10
76
  end
@@ -1,12 +1,13 @@
1
- require "magellan/gcs/proxy"
1
+ require 'magellan/gcs/proxy'
2
2
 
3
3
  module Magellan
4
4
  module Gcs
5
5
  module Proxy
6
6
  class MessageWrapper
7
7
  attr_reader :msg, :context
8
- def initialize(msg, context)
9
- @msg, @context = msg, context
8
+ def initialize(context)
9
+ @msg = context.message
10
+ @context = ContextAccessor.new(context)
10
11
  end
11
12
 
12
13
  def [](key)
@@ -26,6 +27,31 @@ module Magellan
26
27
  Attrs.new(msg.attributes)
27
28
  end
28
29
 
30
+ class ContextAccessor
31
+ attr_accessor :context
32
+ def initialize(context)
33
+ @context = context
34
+ end
35
+
36
+ KEYS = [
37
+ :workspace,
38
+ :downloads_dir, :uploads_dir,
39
+ :download_files,
40
+ :local_download_files,
41
+ :remote_download_files
42
+ ].freeze
43
+
44
+ def [](key)
45
+ case key.to_sym
46
+ when *KEYS then context.send(key)
47
+ end
48
+ end
49
+
50
+ def include?(key)
51
+ KEYS.include?(key)
52
+ end
53
+ end
54
+
29
55
  class Attrs
30
56
  attr_reader :data
31
57
  def initialize(data)
@@ -35,7 +61,11 @@ module Magellan
35
61
  def [](key)
36
62
  value = data[key]
37
63
  if value.is_a?(String) && value =~ /\A\[.*\]\z|\A\{.*\}\z/
38
- JSON.parse(value) rescue value
64
+ begin
65
+ JSON.parse(value)
66
+ rescue
67
+ value
68
+ end
39
69
  else
40
70
  value
41
71
  end
@@ -44,7 +74,6 @@ module Magellan
44
74
  def include?(key)
45
75
  data.include?(key) || data.include?(key.to_sym)
46
76
  end
47
-
48
77
  end
49
78
  end
50
79
  end
@@ -0,0 +1,73 @@
1
+ require 'magellan/gcs/proxy'
2
+
3
+ module Magellan
4
+ module Gcs
5
+ module Proxy
6
+ module ProgressNotification
7
+ include Log
8
+
9
+ def process_with_notification(numbers, total, base_message, main = nil)
10
+ start_no, complete_no, error_no = *numbers
11
+ notify(start_no, total, "#{base_message} starting")
12
+ begin
13
+ main ? main.call(self) : yield(self)
14
+ rescue => e
15
+ notify(error_no, total, "#{base_message} error: [#{e.class}] #{e.message}", severity: :error)
16
+ raise e unless main
17
+ else
18
+ notify(complete_no, total, "#{base_message} completed")
19
+ yield(self) if main
20
+ end
21
+ end
22
+
23
+ def notify(progress, total, data, severity: :info)
24
+ notifier.notify(severity, message, data, progress: progress, total: total)
25
+ end
26
+
27
+ def notifier
28
+ @notifier ||= build_notifier
29
+ end
30
+
31
+ # Build the Notifier object like these...
32
+ #
33
+ # CompositeNotifier
34
+ # @notifiers:
35
+ # PubsubProgressNotifier
36
+ # ProgressNotifierAdapter
37
+ # @logger:
38
+ # CompositeLogger
39
+ # @loggers:
40
+ # Logger
41
+ # Google::Cloud::Logging::Logger
42
+ def build_notifier
43
+ notifiers = []
44
+ if c = Proxy.config[:progress_notification]
45
+ notifiers << PubsubProgressNotifier.new(c['topic'])
46
+ end
47
+ notifiers << ProgressNotifierAdapter.new(logger)
48
+ case notifiers.length
49
+ when 1 then notifiers.first
50
+ else CompositeNotifier.new(notifiers)
51
+ end
52
+ end
53
+
54
+ class CompositeNotifier
55
+ attr_reader :notifiers
56
+ def initialize(notifiers)
57
+ @notifiers = notifiers
58
+ end
59
+
60
+ def notify(*args, &block)
61
+ notifiers.each do |notifier|
62
+ begin
63
+ notifier.notify(*args, &block)
64
+ rescue => e
65
+ $stderr.puts("[#{e.class}] #{e.message}")
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,24 @@
1
+ require 'magellan/gcs/proxy'
2
+
3
+ module Magellan
4
+ module Gcs
5
+ module Proxy
6
+ class ProgressNotifierAdapter
7
+ attr_reader :logger
8
+ def initialize(logger)
9
+ @logger = logger
10
+ end
11
+
12
+ def ltsv(hash)
13
+ hash.map { |k, v| "#{k}:#{v}" }.join("\t")
14
+ end
15
+
16
+ def notify(severity, job_message, data, attrs)
17
+ d = { job_message_id: job_message.message_id }.merge(attrs)
18
+ d[:data] = data # Show data at the end of string
19
+ logger.send(severity, ltsv(d))
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,25 @@
1
+ require 'magellan/gcs/proxy'
2
+
3
+ require 'logger'
4
+ require 'json'
5
+
6
+ module Magellan
7
+ module Gcs
8
+ module Proxy
9
+ class PubsubProgressNotifier
10
+ attr_reader :topic_name
11
+ def initialize(topic_name)
12
+ @topic_name = topic_name
13
+ end
14
+
15
+ def topic
16
+ @topic ||= GCP.pubsub.topic(topic_name)
17
+ end
18
+
19
+ def notify(severity, job_message, data, attrs)
20
+ topic.publish data, { level: severity, job_message_id: job_message.message_id }.merge(attrs)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,54 @@
1
+ require 'magellan/gcs/proxy'
2
+
3
+ require 'logger'
4
+ require 'json'
5
+
6
+ module Magellan
7
+ module Gcs
8
+ module Proxy
9
+ class PubsubSustainer
10
+ include Log
11
+
12
+ class << self
13
+ def run(message)
14
+ raise "#{name}.run requires block" unless block_given?
15
+ if c = Proxy.config[:sustainer]
16
+ t = Thread.new(message, c['delay'], c['interval']) do |msg, delay, interval|
17
+ Thread.current[:processing_message] = true
18
+ new(msg, delay: delay, interval: interval).run
19
+ end
20
+ begin
21
+ yield
22
+ ensure
23
+ t[:processing_message] = false
24
+ t.join
25
+ end
26
+ else
27
+ yield
28
+ end
29
+ end
30
+ end
31
+
32
+ attr_reader :message, :delay, :interval
33
+ def initialize(message, delay: 10, interval: nil)
34
+ @message = message
35
+ @delay = delay.to_i
36
+ @interval = (interval || @delay * 0.9).to_f
37
+ end
38
+
39
+ def run
40
+ loop do
41
+ sleep(interval)
42
+ break unless Thread.current[:processing_message]
43
+ begin
44
+ message.delay! delay
45
+ rescue => e
46
+ logger.error(e)
47
+ break
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -1,7 +1,7 @@
1
1
  module Magellan
2
2
  module Gcs
3
3
  module Proxy
4
- VERSION = "0.1.1"
4
+ VERSION = '0.1.2'.freeze
5
5
  end
6
6
  end
7
7
  end
@@ -1,14 +1,30 @@
1
- require "magellan/gcs/proxy/version"
2
- require 'magellan/gcs/proxy/cli'
3
- require 'magellan/gcs/proxy/context'
1
+ require 'dotenv'
2
+ Dotenv.load
3
+
4
+ require 'magellan/gcs/proxy/version'
4
5
  require 'magellan/gcs/proxy/expand_variable'
5
- require 'magellan/gcs/proxy/gcp'
6
+ require 'magellan/gcs/proxy/config'
6
7
  require 'magellan/gcs/proxy/log'
8
+ require 'magellan/gcs/proxy/gcp'
9
+
10
+ require 'magellan/gcs/proxy/composite_logger'
11
+ require 'magellan/gcs/proxy/progress_notifier_adapter'
12
+ require 'magellan/gcs/proxy/pubsub_progress_notifier'
13
+ require 'magellan/gcs/proxy/pubsub_sustainer'
14
+ require 'magellan/gcs/proxy/progress_notification'
15
+
7
16
  require 'magellan/gcs/proxy/message_wrapper'
17
+ require 'magellan/gcs/proxy/context'
18
+ require 'magellan/gcs/proxy/cli'
8
19
 
9
20
  module Magellan
10
21
  module Gcs
11
22
  module Proxy
23
+ class << self
24
+ def config
25
+ @config ||= Config.new
26
+ end
27
+ end
12
28
  end
13
29
  end
14
30
  end