chronicle-etl 0.4.4 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,55 +1,73 @@
1
- require 'runcom'
1
+ require 'fileutils'
2
+ require 'yaml'
2
3
 
3
4
  module Chronicle
4
5
  module ETL
5
6
  # Utility methods to read, write, and access config files
6
7
  module Config
7
- module_function
8
+ extend self
8
9
 
9
- # Loads a yml config file
10
- def load(path)
11
- config = Runcom::Config.new(path)
12
- # FIXME: hack to deeply symbolize keys
13
- JSON.parse(config.to_h.to_json, symbolize_names: true)
10
+ attr_accessor :xdg_environment
11
+
12
+ def load(type, identifier)
13
+ base = config_pathname_for_type(type)
14
+ path = base.join("#{identifier}.yml")
15
+ return {} unless path.exist?
16
+
17
+ YAML.safe_load(File.read(path), symbolize_names: true, permitted_classes: [Symbol, Date, Time])
14
18
  end
15
19
 
16
20
  # Writes a hash as a yml config file
17
- def write(path, data)
18
- config = Runcom::Config.new(path)
19
- filename = config.all[0].to_s + '.yml'
20
- File.open(filename, 'w') do |f|
21
- f << data.to_yaml
21
+ def write(type, identifier, data)
22
+ base = config_pathname_for_type(type)
23
+ path = base.join("#{identifier}.yml")
24
+ FileUtils.mkdir_p(File.dirname(path))
25
+ File.open(path, 'w', 0o600) do |f|
26
+ # Ruby likes to add --- separators when writing yaml files
27
+ f << data.to_yaml.gsub(/^-+\n/, '')
22
28
  end
23
29
  end
24
30
 
31
+ def exists?(type, identifier)
32
+ base = config_pathname_for_type(type)
33
+ path = base.join("#{identifier}.yml")
34
+ return path.exist?
35
+ end
36
+
25
37
  # Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
26
38
  def available_jobs
27
- Dir.glob(File.join(config_directory("jobs"), "*.yml")).map do |filename|
39
+ Dir.glob(File.join(config_pathname_for_type("jobs"), "*.yml")).map do |filename|
28
40
  File.basename(filename, ".*")
29
41
  end
30
42
  end
31
43
 
32
- # Returns all available credentials available in ~/.config/chronicle/etl/credentials/*.yml
33
- def available_credentials
34
- Dir.glob(File.join(config_directory("credentials"), "*.yml")).map do |filename|
44
+ def available_configs(type)
45
+ Dir.glob(File.join(config_pathname_for_type(type), "*.yml")).map do |filename|
35
46
  File.basename(filename, ".*")
36
47
  end
37
48
  end
38
49
 
39
50
  # Load a job definition from job config directory
40
- def load_job_from_config(job_name)
41
- definition = self.load("chronicle/etl/jobs/#{job_name}.yml")
42
- definition[:name] = job_name
43
- definition
51
+ def read_job(job_name)
52
+ load('jobs', job_name)
44
53
  end
45
54
 
46
- def load_credentials(name)
47
- config = self.load("chronicle/etl/credentials/#{name}.yml")
55
+ def config_pathname
56
+ base = Pathname.new(xdg_config.config_home)
57
+ base.join('chronicle', 'etl')
48
58
  end
49
59
 
50
- def config_directory(type)
51
- path = "chronicle/etl/#{type}"
52
- Runcom::Config.new(path).current || raise(Chronicle::ETL::ConfigError, "Could not access config directory (#{path})")
60
+ def config_pathname_for_type(type)
61
+ config_pathname.join(type)
62
+ end
63
+
64
+ def xdg_config
65
+ # Only used for overriding ENV['HOME'] for XDG-related specs
66
+ if @xdg_environment
67
+ XDG::Environment.new(environment: @xdg_environment)
68
+ else
69
+ XDG::Environment.new
70
+ end
53
71
  end
54
72
  end
55
73
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "ostruct"
4
+ require "chronic_duration"
4
5
 
5
6
  module Chronicle
6
7
  module ETL
@@ -57,7 +58,9 @@ module Chronicle
57
58
 
58
59
  options.each do |name, value|
59
60
  setting = self.class.all_settings[name]
60
- raise(Chronicle::ETL::ConnectorConfigurationError, "Unrecognized setting: #{name}") unless setting
61
+
62
+ # Do nothing with a given option if it's not a connector setting
63
+ next unless setting
61
64
 
62
65
  @config[name] = coerced_value(setting, value)
63
66
  end
@@ -83,6 +86,8 @@ module Chronicle
83
86
 
84
87
  def coerced_value(setting, value)
85
88
  setting.type ? __send__("coerce_#{setting.type}", value) : value
89
+ rescue StandardError
90
+ raise(Chronicle::ETL::ConnectorConfigurationError, "Could not coerce #{value} into a #{setting.type}")
86
91
  end
87
92
 
88
93
  def coerce_string(value)
@@ -103,11 +108,15 @@ module Chronicle
103
108
  end
104
109
 
105
110
  def coerce_time(value)
106
- # TODO: handle durations like '3h'
107
- if value.is_a?(String)
108
- Time.parse(value)
111
+ return value unless value.is_a?(String)
112
+
113
+ # Hacky check for duration strings like "60m"
114
+ if value.match(/[a-z]+/)
115
+ ChronicDuration.raise_exceptions = true
116
+ duration_ago = ChronicDuration.parse(value)
117
+ Time.now - duration_ago
109
118
  else
110
- value
119
+ Time.parse(value)
111
120
  end
112
121
  end
113
122
  end
@@ -2,6 +2,8 @@ module Chronicle
2
2
  module ETL
3
3
  class Error < StandardError; end
4
4
 
5
+ class SecretsError < Error; end
6
+
5
7
  class ConfigError < Error; end
6
8
 
7
9
  class RunnerTypeError < Error; end
@@ -23,6 +25,7 @@ module Chronicle
23
25
  end
24
26
  end
25
27
 
28
+ class PluginNotInstalledError < PluginError; end
26
29
  class PluginConflictError < PluginError; end
27
30
  class PluginNotAvailableError < PluginError; end
28
31
  class PluginLoadError < PluginError; end
@@ -45,8 +45,10 @@ module Chronicle
45
45
  def plugins_missing?
46
46
  validate
47
47
 
48
- @errors[:plugins] || []
49
- .filter { |e| e.instance_of?(Chronicle::ETL::PluginLoadError) }
48
+ return false unless @errors[:plugins]&.any?
49
+
50
+ @errors[:plugins]
51
+ .filter { |e| e.instance_of?(Chronicle::ETL::PluginNotInstalledError) }
50
52
  .any?
51
53
  end
52
54
 
@@ -62,6 +64,30 @@ module Chronicle
62
64
  load_credentials
63
65
  end
64
66
 
67
+ # For each connector in this job, mix in secrets into the options
68
+ def apply_default_secrets
69
+ Chronicle::ETL::Registry::PHASES.each do |phase|
70
+ # If the option have a `secrets` key, we look up those secrets and
71
+ # mix them in. If not, use the connector's plugin name and look up
72
+ # secrets with the same namespace
73
+ if @definition[phase][:options][:secrets]
74
+ namespace = @definition[phase][:options][:secrets]
75
+ else
76
+ # We don't want to do this lookup for built-in connectors
77
+ next if __send__("#{phase}_klass".to_sym).connector_registration.built_in?
78
+
79
+ # infer plugin name from connector name and use it for secrets
80
+ # namesepace
81
+ namespace = @definition[phase][:name].split(":").first
82
+ end
83
+
84
+ # Reverse merge secrets into connector's options (we want to preserve
85
+ # options that came from job file or CLI options)
86
+ secrets = Chronicle::ETL::Secrets.read(namespace)
87
+ @definition[phase][:options] = secrets.merge(@definition[phase][:options])
88
+ end
89
+ end
90
+
65
91
  # Is this job continuing from a previous run?
66
92
  def incremental?
67
93
  @definition[:incremental]
@@ -1,5 +1,6 @@
1
- require 'sequel'
2
1
  require 'forwardable'
2
+ require 'sequel'
3
+ require 'xdg'
3
4
 
4
5
  module Chronicle
5
6
  module ETL
@@ -35,8 +36,8 @@ module Chronicle
35
36
  end
36
37
 
37
38
  def self.db_filename
38
- data = Runcom::Data.new "chronicle/etl/job_log.db"
39
- filename = data.all[0].to_s
39
+ base = Pathname.new(XDG::Data.new.home)
40
+ base.join('job_log.db')
40
41
  end
41
42
 
42
43
  def self.initialize_db
@@ -3,11 +3,13 @@ require 'csv'
3
3
  module Chronicle
4
4
  module ETL
5
5
  class CSVLoader < Chronicle::ETL::Loader
6
+ include Chronicle::ETL::Loaders::Helpers::StdoutHelper
7
+
6
8
  register_connector do |r|
7
9
  r.description = 'CSV'
8
10
  end
9
11
 
10
- setting :output, default: $stdout
12
+ setting :output
11
13
  setting :headers, default: true
12
14
  setting :header_row, default: true
13
15
 
@@ -30,16 +32,7 @@ module Chronicle
30
32
  csv_options[:headers] = headers
31
33
  end
32
34
 
33
- if @config.output.is_a?(IO)
34
- # This might seem like a duplication of the default value ($stdout)
35
- # but it's because rspec overwrites $stdout (in helper #capture) to
36
- # capture output.
37
- io = $stdout.dup
38
- else
39
- io = File.open(@config.output, "w+")
40
- end
41
-
42
- output = CSV.generate(**csv_options) do |csv|
35
+ csv_output = CSV.generate(**csv_options) do |csv|
43
36
  records.each do |record|
44
37
  csv << record
45
38
  .transform_keys(&:to_sym)
@@ -48,8 +41,12 @@ module Chronicle
48
41
  end
49
42
  end
50
43
 
51
- io.write(output)
52
- io.close
44
+ # TODO: just write to io directly
45
+ if output_to_stdout?
46
+ write_to_stdout(csv_output)
47
+ else
48
+ File.write(@config.output, csv_output)
49
+ end
53
50
  end
54
51
  end
55
52
  end
@@ -0,0 +1,36 @@
1
+ require 'tempfile'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ module Loaders
6
+ module Helpers
7
+ module StdoutHelper
8
+ # TODO: let users use "stdout" as an option for the `output` setting
9
+ # Assume we're using stdout if no output is specified
10
+ def output_to_stdout?
11
+ !@config.output
12
+ end
13
+
14
+ def create_stdout_temp_file
15
+ file = Tempfile.new('chronicle-stdout')
16
+ file.unlink
17
+ file
18
+ end
19
+
20
+ def write_to_stdout_from_temp_file(file)
21
+ file.rewind
22
+ write_to_stdout(file.read)
23
+ end
24
+
25
+ def write_to_stdout(output)
26
+ # We .dup because rspec overwrites $stdout (in helper #capture) to
27
+ # capture output.
28
+ stdout = $stdout.dup
29
+ stdout.write(output)
30
+ stdout.flush
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -1,19 +1,35 @@
1
+ require 'tempfile'
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  class JSONLoader < Chronicle::ETL::Loader
6
+ include Chronicle::ETL::Loaders::Helpers::StdoutHelper
7
+
4
8
  register_connector do |r|
5
9
  r.description = 'json'
6
10
  end
7
11
 
8
12
  setting :serializer
9
- setting :output, default: $stdout
13
+ setting :output
14
+
15
+ # If true, one JSON record per line. If false, output a single json
16
+ # object with an array of records
17
+ setting :line_separated, default: true, type: :boolean
18
+
19
+ def initialize(*args)
20
+ super
21
+ @first_line = true
22
+ end
10
23
 
11
24
  def start
12
- if @config.output == $stdout
13
- @output = @config.output
14
- else
15
- @output = File.open(@config.output, "w")
16
- end
25
+ @output_file =
26
+ if output_to_stdout?
27
+ create_stdout_temp_file
28
+ else
29
+ File.open(@config.output, "w+")
30
+ end
31
+
32
+ @output_file.puts("[\n") unless @config.line_separated
17
33
  end
18
34
 
19
35
  def load(record)
@@ -27,15 +43,34 @@ module Chronicle
27
43
 
28
44
  force_utf8(value)
29
45
  end
30
- @output.puts encoded.to_json
46
+
47
+ line = encoded.to_json
48
+ # For line-separated output, we just put json + newline
49
+ if @config.line_separated
50
+ line = "#{line}\n"
51
+ # Otherwise, we add a comma and newline and then add record to the
52
+ # array we created in #start (unless it's the first line).
53
+ else
54
+ line = ",\n#{line}" unless @first_line
55
+ end
56
+
57
+ @output_file.write(line)
58
+
59
+ @first_line = false
31
60
  end
32
61
 
33
62
  def finish
34
- @output.close
63
+ # Close the array unless we're doing line-separated JSON
64
+ @output_file.puts("\n]") unless @config.line_separated
65
+
66
+ write_to_stdout_from_temp_file(@output_file) if output_to_stdout?
67
+
68
+ @output_file.close
35
69
  end
36
70
 
37
71
  private
38
72
 
73
+ # TODO: implement this
39
74
  def serializer
40
75
  @config.serializer || Chronicle::ETL::RawSerializer
41
76
  end
@@ -1,4 +1,5 @@
1
1
  require_relative 'helpers/encoding_helper'
2
+ require_relative 'helpers/stdout_helper'
2
3
 
3
4
  module Chronicle
4
5
  module ETL
@@ -13,8 +13,8 @@ module Chronicle
13
13
  module PluginRegistry
14
14
  # Does this plugin exist?
15
15
  def self.exists?(name)
16
- # TODO: implement this. Could query rubygems.org or have a
17
- # hardcoded approved list
16
+ # TODO: implement this. Could query rubygems.org or use a hardcoded
17
+ # list somewhere
18
18
  true
19
19
  end
20
20
 
@@ -31,6 +31,12 @@ module Chronicle
31
31
  .values
32
32
  end
33
33
 
34
+ # Check whether a given plugin is installed
35
+ def self.installed?(name)
36
+ gem_name = "chronicle-#{name}"
37
+ all_installed.map(&:name).include?(gem_name)
38
+ end
39
+
34
40
  # Activate a plugin with given name by `require`ing it
35
41
  def self.activate(name)
36
42
  # By default, activates the latest available version of a gem
@@ -39,14 +45,17 @@ module Chronicle
39
45
  rescue Gem::ConflictError => e
40
46
  # TODO: figure out if there's more we can do here
41
47
  raise Chronicle::ETL::PluginConflictError.new(name), "Plugin '#{name}' couldn't be loaded. #{e.message}"
42
- rescue LoadError => e
43
- raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{name}' couldn't be loaded" if exists?(name)
44
-
45
- raise Chronicle::ETL::PluginNotAvailableError.new(name), "Plugin #{name} doesn't exist"
48
+ rescue StandardError, LoadError => e
49
+ # StandardError to catch random non-loading problems that might occur
50
+ # when requiring the plugin (eg class macro invoked the wrong way)
51
+ # TODO: decide if this should be separated
52
+ raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{name}' couldn't be loaded"
46
53
  end
47
54
 
48
55
  # Install a plugin to local gems
49
56
  def self.install(name)
57
+ return if installed?(name)
58
+
50
59
  gem_name = "chronicle-#{name}"
51
60
  raise(Chronicle::ETL::PluginNotAvailableError.new(gem_name), "Plugin #{name} doesn't exist") unless exists?(gem_name)
52
61
 
@@ -9,18 +9,7 @@ module Chronicle
9
9
  class << self
10
10
  attr_accessor :connectors
11
11
 
12
- def load_all!
13
- load_connectors_from_gems
14
- end
15
-
16
- def load_connectors_from_gems
17
- Gem::Specification.filter{|s| s.name.match(/^chronicle/) }.each do |gem|
18
- require_str = gem.name.gsub('chronicle-', 'chronicle/')
19
- require require_str rescue LoadError
20
- end
21
- end
22
-
23
- def register connector
12
+ def register(connector)
24
13
  connectors << connector
25
14
  end
26
15
 
@@ -28,9 +17,14 @@ module Chronicle
28
17
  @connectors ||= []
29
18
  end
30
19
 
31
- def find_by_phase_and_identifier(phase, identifier)
32
- # Simple case: built in connector
20
+ # Find connector from amongst those currently loaded
21
+ def find_by_phase_and_identifier_local(phase, identifier)
33
22
  connector = connectors.find { |c| c.phase == phase && c.identifier == identifier }
23
+ end
24
+
25
+ # Find connector and load relevant plugin to find it if necessary
26
+ def find_by_phase_and_identifier(phase, identifier)
27
+ connector = find_by_phase_and_identifier_local(phase, identifier)
34
28
  return connector if connector
35
29
 
36
30
  # if not available in built-in connectors, try to activate a
@@ -44,6 +38,8 @@ module Chronicle
44
38
  plugin = identifier
45
39
  end
46
40
 
41
+ raise(Chronicle::ETL::PluginNotInstalledError.new(plugin)) unless PluginRegistry.installed?(plugin)
42
+
47
43
  PluginRegistry.activate(plugin)
48
44
 
49
45
  candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
@@ -50,7 +50,7 @@ class Chronicle::ETL::Runner
50
50
  transformer = @job.instantiate_transformer(extraction)
51
51
  record = transformer.transform
52
52
 
53
- Chronicle::ETL::Logger.info(tty_log_transformation(transformer))
53
+ Chronicle::ETL::Logger.debug(tty_log_transformation(transformer))
54
54
  @job_logger.log_transformation(transformer)
55
55
 
56
56
  @loader.load(record) unless @job.dry_run?
@@ -0,0 +1,55 @@
1
+ module Chronicle
2
+ module ETL
3
+ # Secret management module
4
+ module Secrets
5
+ module_function
6
+
7
+ # Save a setting to a namespaced config file
8
+ def set(namespace, key, value)
9
+ config = read(namespace)
10
+ config[key.to_sym] = value
11
+ write(namespace, config)
12
+ end
13
+
14
+ # Remove a setting from a namespaced config file
15
+ def unset(namespace, key)
16
+ config = read(namespace)
17
+ config.delete(key.to_sym)
18
+ write(namespace, config)
19
+ end
20
+
21
+ # Retrieve all secrets from all namespaces
22
+ def all(namespace = nil)
23
+ namespaces = namespace.nil? ? available_secrets : [namespace]
24
+ namespaces
25
+ .to_h { |namespace| [namespace.to_sym, read(namespace)] }
26
+ .delete_if { |_, v| v.empty? }
27
+ end
28
+
29
+ # Return whether a namespace name is valid (lowercase alphanumeric and -)
30
+ def valid_namespace_name?(namespace)
31
+ namespace.match(/^[a-z0-9\-]+$/)
32
+ end
33
+
34
+ # Read secrets from a config file
35
+ def read(namespace)
36
+ definition = Chronicle::ETL::Config.load("secrets", namespace)
37
+ definition[:secrets] || {}
38
+ end
39
+
40
+ # Write secrets to a config file
41
+ def write(namespace, secrets)
42
+ data = {
43
+ secrets: (secrets || {}).transform_keys(&:to_s),
44
+ chronicle_etl_version: Chronicle::ETL::VERSION
45
+ }.transform_keys(&:to_s) # Should I implement deeply_transform_keys...?
46
+ Chronicle::ETL::Config.write("secrets", namespace, data)
47
+ end
48
+
49
+ # Which config files are available in ~/.config/chronicle/etl/secrets
50
+ def available_secrets
51
+ Chronicle::ETL::Config.available_configs('secrets')
52
+ end
53
+ end
54
+ end
55
+ end
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
2
  module ETL
3
- VERSION = "0.4.4"
3
+ VERSION = "0.5.2"
4
4
  end
5
5
  end
data/lib/chronicle/etl.rb CHANGED
@@ -14,6 +14,7 @@ require_relative 'etl/models/base'
14
14
  require_relative 'etl/models/raw'
15
15
  require_relative 'etl/models/entity'
16
16
  require_relative 'etl/runner'
17
+ require_relative 'etl/secrets'
17
18
  require_relative 'etl/serializers/serializer'
18
19
  require_relative 'etl/utils/binary_attachments'
19
20
  require_relative 'etl/utils/hash_utilities'