chronicle-etl 0.4.4 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,55 +1,73 @@
1
- require 'runcom'
1
+ require 'fileutils'
2
+ require 'yaml'
2
3
 
3
4
  module Chronicle
4
5
  module ETL
5
6
  # Utility methods to read, write, and access config files
6
7
  module Config
7
- module_function
8
+ extend self
8
9
 
9
- # Loads a yml config file
10
- def load(path)
11
- config = Runcom::Config.new(path)
12
- # FIXME: hack to deeply symbolize keys
13
- JSON.parse(config.to_h.to_json, symbolize_names: true)
10
+ attr_accessor :xdg_environment
11
+
12
+ def load(type, identifier)
13
+ base = config_pathname_for_type(type)
14
+ path = base.join("#{identifier}.yml")
15
+ return {} unless path.exist?
16
+
17
+ YAML.safe_load(File.read(path), symbolize_names: true, permitted_classes: [Symbol, Date, Time])
14
18
  end
15
19
 
16
20
  # Writes a hash as a yml config file
17
- def write(path, data)
18
- config = Runcom::Config.new(path)
19
- filename = config.all[0].to_s + '.yml'
20
- File.open(filename, 'w') do |f|
21
- f << data.to_yaml
21
+ def write(type, identifier, data)
22
+ base = config_pathname_for_type(type)
23
+ path = base.join("#{identifier}.yml")
24
+ FileUtils.mkdir_p(File.dirname(path))
25
+ File.open(path, 'w', 0o600) do |f|
26
+ # Ruby likes to add --- separators when writing yaml files
27
+ f << data.to_yaml.gsub(/^-+\n/, '')
22
28
  end
23
29
  end
24
30
 
31
+ def exists?(type, identifier)
32
+ base = config_pathname_for_type(type)
33
+ path = base.join("#{identifier}.yml")
34
+ return path.exist?
35
+ end
36
+
25
37
  # Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
26
38
  def available_jobs
27
- Dir.glob(File.join(config_directory("jobs"), "*.yml")).map do |filename|
39
+ Dir.glob(File.join(config_pathname_for_type("jobs"), "*.yml")).map do |filename|
28
40
  File.basename(filename, ".*")
29
41
  end
30
42
  end
31
43
 
32
- # Returns all available credentials available in ~/.config/chronicle/etl/credentials/*.yml
33
- def available_credentials
34
- Dir.glob(File.join(config_directory("credentials"), "*.yml")).map do |filename|
44
+ def available_configs(type)
45
+ Dir.glob(File.join(config_pathname_for_type(type), "*.yml")).map do |filename|
35
46
  File.basename(filename, ".*")
36
47
  end
37
48
  end
38
49
 
39
50
  # Load a job definition from job config directory
40
- def load_job_from_config(job_name)
41
- definition = self.load("chronicle/etl/jobs/#{job_name}.yml")
42
- definition[:name] = job_name
43
- definition
51
+ def read_job(job_name)
52
+ load('jobs', job_name)
44
53
  end
45
54
 
46
- def load_credentials(name)
47
- config = self.load("chronicle/etl/credentials/#{name}.yml")
55
+ def config_pathname
56
+ base = Pathname.new(xdg_config.config_home)
57
+ base.join('chronicle', 'etl')
48
58
  end
49
59
 
50
- def config_directory(type)
51
- path = "chronicle/etl/#{type}"
52
- Runcom::Config.new(path).current || raise(Chronicle::ETL::ConfigError, "Could not access config directory (#{path})")
60
+ def config_pathname_for_type(type)
61
+ config_pathname.join(type)
62
+ end
63
+
64
+ def xdg_config
65
+ # Only used for overriding ENV['HOME'] for XDG-related specs
66
+ if @xdg_environment
67
+ XDG::Environment.new(environment: @xdg_environment)
68
+ else
69
+ XDG::Environment.new
70
+ end
53
71
  end
54
72
  end
55
73
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "ostruct"
4
+ require "chronic_duration"
4
5
 
5
6
  module Chronicle
6
7
  module ETL
@@ -57,7 +58,9 @@ module Chronicle
57
58
 
58
59
  options.each do |name, value|
59
60
  setting = self.class.all_settings[name]
60
- raise(Chronicle::ETL::ConnectorConfigurationError, "Unrecognized setting: #{name}") unless setting
61
+
62
+ # Do nothing with a given option if it's not a connector setting
63
+ next unless setting
61
64
 
62
65
  @config[name] = coerced_value(setting, value)
63
66
  end
@@ -83,6 +86,8 @@ module Chronicle
83
86
 
84
87
  def coerced_value(setting, value)
85
88
  setting.type ? __send__("coerce_#{setting.type}", value) : value
89
+ rescue StandardError
90
+ raise(Chronicle::ETL::ConnectorConfigurationError, "Could not coerce #{value} into a #{setting.type}")
86
91
  end
87
92
 
88
93
  def coerce_string(value)
@@ -103,11 +108,15 @@ module Chronicle
103
108
  end
104
109
 
105
110
  def coerce_time(value)
106
- # TODO: handle durations like '3h'
107
- if value.is_a?(String)
108
- Time.parse(value)
111
+ return value unless value.is_a?(String)
112
+
113
+ # Hacky check for duration strings like "60m"
114
+ if value.match(/[a-z]+/)
115
+ ChronicDuration.raise_exceptions = true
116
+ duration_ago = ChronicDuration.parse(value)
117
+ Time.now - duration_ago
109
118
  else
110
- value
119
+ Time.parse(value)
111
120
  end
112
121
  end
113
122
  end
@@ -2,6 +2,8 @@ module Chronicle
2
2
  module ETL
3
3
  class Error < StandardError; end
4
4
 
5
+ class SecretsError < Error; end
6
+
5
7
  class ConfigError < Error; end
6
8
 
7
9
  class RunnerTypeError < Error; end
@@ -23,6 +25,7 @@ module Chronicle
23
25
  end
24
26
  end
25
27
 
28
+ class PluginNotInstalledError < PluginError; end
26
29
  class PluginConflictError < PluginError; end
27
30
  class PluginNotAvailableError < PluginError; end
28
31
  class PluginLoadError < PluginError; end
@@ -45,8 +45,10 @@ module Chronicle
45
45
  def plugins_missing?
46
46
  validate
47
47
 
48
- @errors[:plugins] || []
49
- .filter { |e| e.instance_of?(Chronicle::ETL::PluginLoadError) }
48
+ return false unless @errors[:plugins]&.any?
49
+
50
+ @errors[:plugins]
51
+ .filter { |e| e.instance_of?(Chronicle::ETL::PluginNotInstalledError) }
50
52
  .any?
51
53
  end
52
54
 
@@ -62,6 +64,30 @@ module Chronicle
62
64
  load_credentials
63
65
  end
64
66
 
67
+ # For each connector in this job, mix in secrets into the options
68
+ def apply_default_secrets
69
+ Chronicle::ETL::Registry::PHASES.each do |phase|
70
+ # If the option have a `secrets` key, we look up those secrets and
71
+ # mix them in. If not, use the connector's plugin name and look up
72
+ # secrets with the same namespace
73
+ if @definition[phase][:options][:secrets]
74
+ namespace = @definition[phase][:options][:secrets]
75
+ else
76
+ # We don't want to do this lookup for built-in connectors
77
+ next if __send__("#{phase}_klass".to_sym).connector_registration.built_in?
78
+
79
+ # infer plugin name from connector name and use it for secrets
80
+ # namesepace
81
+ namespace = @definition[phase][:name].split(":").first
82
+ end
83
+
84
+ # Reverse merge secrets into connector's options (we want to preserve
85
+ # options that came from job file or CLI options)
86
+ secrets = Chronicle::ETL::Secrets.read(namespace)
87
+ @definition[phase][:options] = secrets.merge(@definition[phase][:options])
88
+ end
89
+ end
90
+
65
91
  # Is this job continuing from a previous run?
66
92
  def incremental?
67
93
  @definition[:incremental]
@@ -1,5 +1,6 @@
1
- require 'sequel'
2
1
  require 'forwardable'
2
+ require 'sequel'
3
+ require 'xdg'
3
4
 
4
5
  module Chronicle
5
6
  module ETL
@@ -35,8 +36,8 @@ module Chronicle
35
36
  end
36
37
 
37
38
  def self.db_filename
38
- data = Runcom::Data.new "chronicle/etl/job_log.db"
39
- filename = data.all[0].to_s
39
+ base = Pathname.new(XDG::Data.new.home)
40
+ base.join('job_log.db')
40
41
  end
41
42
 
42
43
  def self.initialize_db
@@ -3,11 +3,13 @@ require 'csv'
3
3
  module Chronicle
4
4
  module ETL
5
5
  class CSVLoader < Chronicle::ETL::Loader
6
+ include Chronicle::ETL::Loaders::Helpers::StdoutHelper
7
+
6
8
  register_connector do |r|
7
9
  r.description = 'CSV'
8
10
  end
9
11
 
10
- setting :output, default: $stdout
12
+ setting :output
11
13
  setting :headers, default: true
12
14
  setting :header_row, default: true
13
15
 
@@ -30,16 +32,7 @@ module Chronicle
30
32
  csv_options[:headers] = headers
31
33
  end
32
34
 
33
- if @config.output.is_a?(IO)
34
- # This might seem like a duplication of the default value ($stdout)
35
- # but it's because rspec overwrites $stdout (in helper #capture) to
36
- # capture output.
37
- io = $stdout.dup
38
- else
39
- io = File.open(@config.output, "w+")
40
- end
41
-
42
- output = CSV.generate(**csv_options) do |csv|
35
+ csv_output = CSV.generate(**csv_options) do |csv|
43
36
  records.each do |record|
44
37
  csv << record
45
38
  .transform_keys(&:to_sym)
@@ -48,8 +41,12 @@ module Chronicle
48
41
  end
49
42
  end
50
43
 
51
- io.write(output)
52
- io.close
44
+ # TODO: just write to io directly
45
+ if output_to_stdout?
46
+ write_to_stdout(csv_output)
47
+ else
48
+ File.write(@config.output, csv_output)
49
+ end
53
50
  end
54
51
  end
55
52
  end
@@ -0,0 +1,36 @@
1
+ require 'tempfile'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ module Loaders
6
+ module Helpers
7
+ module StdoutHelper
8
+ # TODO: let users use "stdout" as an option for the `output` setting
9
+ # Assume we're using stdout if no output is specified
10
+ def output_to_stdout?
11
+ !@config.output
12
+ end
13
+
14
+ def create_stdout_temp_file
15
+ file = Tempfile.new('chronicle-stdout')
16
+ file.unlink
17
+ file
18
+ end
19
+
20
+ def write_to_stdout_from_temp_file(file)
21
+ file.rewind
22
+ write_to_stdout(file.read)
23
+ end
24
+
25
+ def write_to_stdout(output)
26
+ # We .dup because rspec overwrites $stdout (in helper #capture) to
27
+ # capture output.
28
+ stdout = $stdout.dup
29
+ stdout.write(output)
30
+ stdout.flush
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -1,19 +1,35 @@
1
+ require 'tempfile'
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  class JSONLoader < Chronicle::ETL::Loader
6
+ include Chronicle::ETL::Loaders::Helpers::StdoutHelper
7
+
4
8
  register_connector do |r|
5
9
  r.description = 'json'
6
10
  end
7
11
 
8
12
  setting :serializer
9
- setting :output, default: $stdout
13
+ setting :output
14
+
15
+ # If true, one JSON record per line. If false, output a single json
16
+ # object with an array of records
17
+ setting :line_separated, default: true, type: :boolean
18
+
19
+ def initialize(*args)
20
+ super
21
+ @first_line = true
22
+ end
10
23
 
11
24
  def start
12
- if @config.output == $stdout
13
- @output = @config.output
14
- else
15
- @output = File.open(@config.output, "w")
16
- end
25
+ @output_file =
26
+ if output_to_stdout?
27
+ create_stdout_temp_file
28
+ else
29
+ File.open(@config.output, "w+")
30
+ end
31
+
32
+ @output_file.puts("[\n") unless @config.line_separated
17
33
  end
18
34
 
19
35
  def load(record)
@@ -27,15 +43,34 @@ module Chronicle
27
43
 
28
44
  force_utf8(value)
29
45
  end
30
- @output.puts encoded.to_json
46
+
47
+ line = encoded.to_json
48
+ # For line-separated output, we just put json + newline
49
+ if @config.line_separated
50
+ line = "#{line}\n"
51
+ # Otherwise, we add a comma and newline and then add record to the
52
+ # array we created in #start (unless it's the first line).
53
+ else
54
+ line = ",\n#{line}" unless @first_line
55
+ end
56
+
57
+ @output_file.write(line)
58
+
59
+ @first_line = false
31
60
  end
32
61
 
33
62
  def finish
34
- @output.close
63
+ # Close the array unless we're doing line-separated JSON
64
+ @output_file.puts("\n]") unless @config.line_separated
65
+
66
+ write_to_stdout_from_temp_file(@output_file) if output_to_stdout?
67
+
68
+ @output_file.close
35
69
  end
36
70
 
37
71
  private
38
72
 
73
+ # TODO: implement this
39
74
  def serializer
40
75
  @config.serializer || Chronicle::ETL::RawSerializer
41
76
  end
@@ -1,4 +1,5 @@
1
1
  require_relative 'helpers/encoding_helper'
2
+ require_relative 'helpers/stdout_helper'
2
3
 
3
4
  module Chronicle
4
5
  module ETL
@@ -13,8 +13,8 @@ module Chronicle
13
13
  module PluginRegistry
14
14
  # Does this plugin exist?
15
15
  def self.exists?(name)
16
- # TODO: implement this. Could query rubygems.org or have a
17
- # hardcoded approved list
16
+ # TODO: implement this. Could query rubygems.org or use a hardcoded
17
+ # list somewhere
18
18
  true
19
19
  end
20
20
 
@@ -31,6 +31,12 @@ module Chronicle
31
31
  .values
32
32
  end
33
33
 
34
+ # Check whether a given plugin is installed
35
+ def self.installed?(name)
36
+ gem_name = "chronicle-#{name}"
37
+ all_installed.map(&:name).include?(gem_name)
38
+ end
39
+
34
40
  # Activate a plugin with given name by `require`ing it
35
41
  def self.activate(name)
36
42
  # By default, activates the latest available version of a gem
@@ -39,14 +45,17 @@ module Chronicle
39
45
  rescue Gem::ConflictError => e
40
46
  # TODO: figure out if there's more we can do here
41
47
  raise Chronicle::ETL::PluginConflictError.new(name), "Plugin '#{name}' couldn't be loaded. #{e.message}"
42
- rescue LoadError => e
43
- raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{name}' couldn't be loaded" if exists?(name)
44
-
45
- raise Chronicle::ETL::PluginNotAvailableError.new(name), "Plugin #{name} doesn't exist"
48
+ rescue StandardError, LoadError => e
49
+ # StandardError to catch random non-loading problems that might occur
50
+ # when requiring the plugin (eg class macro invoked the wrong way)
51
+ # TODO: decide if this should be separated
52
+ raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{name}' couldn't be loaded"
46
53
  end
47
54
 
48
55
  # Install a plugin to local gems
49
56
  def self.install(name)
57
+ return if installed?(name)
58
+
50
59
  gem_name = "chronicle-#{name}"
51
60
  raise(Chronicle::ETL::PluginNotAvailableError.new(gem_name), "Plugin #{name} doesn't exist") unless exists?(gem_name)
52
61
 
@@ -9,18 +9,7 @@ module Chronicle
9
9
  class << self
10
10
  attr_accessor :connectors
11
11
 
12
- def load_all!
13
- load_connectors_from_gems
14
- end
15
-
16
- def load_connectors_from_gems
17
- Gem::Specification.filter{|s| s.name.match(/^chronicle/) }.each do |gem|
18
- require_str = gem.name.gsub('chronicle-', 'chronicle/')
19
- require require_str rescue LoadError
20
- end
21
- end
22
-
23
- def register connector
12
+ def register(connector)
24
13
  connectors << connector
25
14
  end
26
15
 
@@ -28,9 +17,14 @@ module Chronicle
28
17
  @connectors ||= []
29
18
  end
30
19
 
31
- def find_by_phase_and_identifier(phase, identifier)
32
- # Simple case: built in connector
20
+ # Find connector from amongst those currently loaded
21
+ def find_by_phase_and_identifier_local(phase, identifier)
33
22
  connector = connectors.find { |c| c.phase == phase && c.identifier == identifier }
23
+ end
24
+
25
+ # Find connector and load relevant plugin to find it if necessary
26
+ def find_by_phase_and_identifier(phase, identifier)
27
+ connector = find_by_phase_and_identifier_local(phase, identifier)
34
28
  return connector if connector
35
29
 
36
30
  # if not available in built-in connectors, try to activate a
@@ -44,6 +38,8 @@ module Chronicle
44
38
  plugin = identifier
45
39
  end
46
40
 
41
+ raise(Chronicle::ETL::PluginNotInstalledError.new(plugin)) unless PluginRegistry.installed?(plugin)
42
+
47
43
  PluginRegistry.activate(plugin)
48
44
 
49
45
  candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
@@ -50,7 +50,7 @@ class Chronicle::ETL::Runner
50
50
  transformer = @job.instantiate_transformer(extraction)
51
51
  record = transformer.transform
52
52
 
53
- Chronicle::ETL::Logger.info(tty_log_transformation(transformer))
53
+ Chronicle::ETL::Logger.debug(tty_log_transformation(transformer))
54
54
  @job_logger.log_transformation(transformer)
55
55
 
56
56
  @loader.load(record) unless @job.dry_run?
@@ -0,0 +1,55 @@
1
+ module Chronicle
2
+ module ETL
3
+ # Secret management module
4
+ module Secrets
5
+ module_function
6
+
7
+ # Save a setting to a namespaced config file
8
+ def set(namespace, key, value)
9
+ config = read(namespace)
10
+ config[key.to_sym] = value
11
+ write(namespace, config)
12
+ end
13
+
14
+ # Remove a setting from a namespaced config file
15
+ def unset(namespace, key)
16
+ config = read(namespace)
17
+ config.delete(key.to_sym)
18
+ write(namespace, config)
19
+ end
20
+
21
+ # Retrieve all secrets from all namespaces
22
+ def all(namespace = nil)
23
+ namespaces = namespace.nil? ? available_secrets : [namespace]
24
+ namespaces
25
+ .to_h { |namespace| [namespace.to_sym, read(namespace)] }
26
+ .delete_if { |_, v| v.empty? }
27
+ end
28
+
29
+ # Return whether a namespace name is valid (lowercase alphanumeric and -)
30
+ def valid_namespace_name?(namespace)
31
+ namespace.match(/^[a-z0-9\-]+$/)
32
+ end
33
+
34
+ # Read secrets from a config file
35
+ def read(namespace)
36
+ definition = Chronicle::ETL::Config.load("secrets", namespace)
37
+ definition[:secrets] || {}
38
+ end
39
+
40
+ # Write secrets to a config file
41
+ def write(namespace, secrets)
42
+ data = {
43
+ secrets: (secrets || {}).transform_keys(&:to_s),
44
+ chronicle_etl_version: Chronicle::ETL::VERSION
45
+ }.transform_keys(&:to_s) # Should I implement deeply_transform_keys...?
46
+ Chronicle::ETL::Config.write("secrets", namespace, data)
47
+ end
48
+
49
+ # Which config files are available in ~/.config/chronicle/etl/secrets
50
+ def available_secrets
51
+ Chronicle::ETL::Config.available_configs('secrets')
52
+ end
53
+ end
54
+ end
55
+ end
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
2
  module ETL
3
- VERSION = "0.4.4"
3
+ VERSION = "0.5.2"
4
4
  end
5
5
  end
data/lib/chronicle/etl.rb CHANGED
@@ -14,6 +14,7 @@ require_relative 'etl/models/base'
14
14
  require_relative 'etl/models/raw'
15
15
  require_relative 'etl/models/entity'
16
16
  require_relative 'etl/runner'
17
+ require_relative 'etl/secrets'
17
18
  require_relative 'etl/serializers/serializer'
18
19
  require_relative 'etl/utils/binary_attachments'
19
20
  require_relative 'etl/utils/hash_utilities'