chronicle-etl 0.4.0 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/.rubocop.yml +3 -0
- data/README.md +156 -81
- data/chronicle-etl.gemspec +3 -0
- data/lib/chronicle/etl/cli/cli_base.rb +31 -0
- data/lib/chronicle/etl/cli/connectors.rb +4 -11
- data/lib/chronicle/etl/cli/jobs.rb +49 -22
- data/lib/chronicle/etl/cli/main.rb +32 -1
- data/lib/chronicle/etl/cli/plugins.rb +62 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +1 -1
- data/lib/chronicle/etl/cli.rb +3 -0
- data/lib/chronicle/etl/config.rb +7 -4
- data/lib/chronicle/etl/configurable.rb +15 -2
- data/lib/chronicle/etl/exceptions.rb +29 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -17
- data/lib/chronicle/etl/extractors/extractor.rb +5 -5
- data/lib/chronicle/etl/extractors/file_extractor.rb +33 -13
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +21 -12
- data/lib/chronicle/etl/job.rb +7 -1
- data/lib/chronicle/etl/job_definition.rb +32 -6
- data/lib/chronicle/etl/loaders/csv_loader.rb +35 -8
- data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
- data/lib/chronicle/etl/loaders/loader.rb +24 -1
- data/lib/chronicle/etl/loaders/table_loader.rb +13 -26
- data/lib/chronicle/etl/logger.rb +6 -2
- data/lib/chronicle/etl/models/base.rb +3 -0
- data/lib/chronicle/etl/models/entity.rb +8 -2
- data/lib/chronicle/etl/models/raw.rb +26 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +5 -0
- data/lib/chronicle/etl/registry/plugin_registry.rb +75 -0
- data/lib/chronicle/etl/registry/registry.rb +27 -14
- data/lib/chronicle/etl/runner.rb +35 -17
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
- data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
- data/lib/chronicle/etl/serializers/serializer.rb +2 -1
- data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +11 -4
- metadata +53 -6
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
- data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
- data/lib/chronicle/etl/models/generic.rb +0 -23
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "tty-prompt"
|
4
|
+
require "tty-spinner"
|
5
|
+
|
6
|
+
module Chronicle
|
7
|
+
module ETL
|
8
|
+
module CLI
|
9
|
+
# CLI commands for working with ETL plugins
|
10
|
+
class Plugins < SubcommandBase
|
11
|
+
default_task 'list'
|
12
|
+
namespace :plugins
|
13
|
+
|
14
|
+
desc "install", "Install a plugin"
|
15
|
+
def install(*plugins)
|
16
|
+
cli_fail(message: "Please specify a plugin to install") unless plugins.any?
|
17
|
+
|
18
|
+
spinner = TTY::Spinner.new("[:spinner] Installing #{plugins.join(", ")}...", format: :dots_2)
|
19
|
+
spinner.auto_spin
|
20
|
+
plugins.each do |plugin|
|
21
|
+
spinner.update(title: "Installing #{plugin}")
|
22
|
+
Chronicle::ETL::Registry::PluginRegistry.install(plugin)
|
23
|
+
rescue Chronicle::ETL::PluginError => e
|
24
|
+
spinner.error("Error".red)
|
25
|
+
cli_fail(message: "Plugin '#{plugin}' could not be installed", exception: e)
|
26
|
+
end
|
27
|
+
spinner.success("(#{'successful'.green})")
|
28
|
+
end
|
29
|
+
|
30
|
+
desc "uninstall", "Unintall a plugin"
|
31
|
+
def uninstall(name)
|
32
|
+
spinner = TTY::Spinner.new("[:spinner] Uninstalling plugin #{name}...", format: :dots_2)
|
33
|
+
spinner.auto_spin
|
34
|
+
Chronicle::ETL::Registry::PluginRegistry.uninstall(name)
|
35
|
+
spinner.success("(#{'successful'.green})")
|
36
|
+
rescue Chronicle::ETL::PluginError => e
|
37
|
+
spinner.error("Error".red)
|
38
|
+
cli_fail(message: "Plugin '#{name}' could not be uninstalled (was it installed?)", exception: e)
|
39
|
+
end
|
40
|
+
|
41
|
+
desc "list", "Lists available plugins"
|
42
|
+
# Display all available plugins that chronicle-etl has access to
|
43
|
+
def list
|
44
|
+
plugins = Chronicle::ETL::Registry::PluginRegistry.all_installed_latest
|
45
|
+
|
46
|
+
info = plugins.map do |plugin|
|
47
|
+
{
|
48
|
+
name: plugin.name.sub("chronicle-", ""),
|
49
|
+
description: plugin.description,
|
50
|
+
version: plugin.version
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
headers = ['name', 'description', 'latest version'].map{ |h| h.to_s.upcase.bold }
|
55
|
+
table = TTY::Table.new(headers, info.map(&:values))
|
56
|
+
puts "Installed plugins:"
|
57
|
+
puts table.render(indent: 2, padding: [0, 0])
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -2,7 +2,7 @@ module Chronicle
|
|
2
2
|
module ETL
|
3
3
|
module CLI
|
4
4
|
# Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
|
5
|
-
class SubcommandBase < ::
|
5
|
+
class SubcommandBase < Chronicle::ETL::CLI::CLIBase
|
6
6
|
# Print usage instructions for a subcommand
|
7
7
|
def self.help(shell, subcommand = false)
|
8
8
|
list = printable_commands(true, subcommand)
|
data/lib/chronicle/etl/cli.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
require 'thor'
|
2
|
+
require 'thor/hollaback'
|
2
3
|
require 'chronicle/etl'
|
3
4
|
|
5
|
+
require 'chronicle/etl/cli/cli_base'
|
4
6
|
require 'chronicle/etl/cli/subcommand_base'
|
5
7
|
require 'chronicle/etl/cli/connectors'
|
6
8
|
require 'chronicle/etl/cli/jobs'
|
9
|
+
require 'chronicle/etl/cli/plugins'
|
7
10
|
require 'chronicle/etl/cli/main'
|
data/lib/chronicle/etl/config.rb
CHANGED
@@ -24,16 +24,14 @@ module Chronicle
|
|
24
24
|
|
25
25
|
# Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
|
26
26
|
def available_jobs
|
27
|
-
|
28
|
-
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|
27
|
+
Dir.glob(File.join(config_directory("jobs"), "*.yml")).map do |filename|
|
29
28
|
File.basename(filename, ".*")
|
30
29
|
end
|
31
30
|
end
|
32
31
|
|
33
32
|
# Returns all available credentials available in ~/.config/chronicle/etl/credentials/*.yml
|
34
33
|
def available_credentials
|
35
|
-
|
36
|
-
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|
34
|
+
Dir.glob(File.join(config_directory("credentials"), "*.yml")).map do |filename|
|
37
35
|
File.basename(filename, ".*")
|
38
36
|
end
|
39
37
|
end
|
@@ -48,6 +46,11 @@ module Chronicle
|
|
48
46
|
def load_credentials(name)
|
49
47
|
config = self.load("chronicle/etl/credentials/#{name}.yml")
|
50
48
|
end
|
49
|
+
|
50
|
+
def config_directory(type)
|
51
|
+
path = "chronicle/etl/#{type}"
|
52
|
+
Runcom::Config.new(path).current || raise(Chronicle::ETL::ConfigError, "Could not access config directory (#{path})")
|
53
|
+
end
|
51
54
|
end
|
52
55
|
end
|
53
56
|
end
|
@@ -57,7 +57,7 @@ module Chronicle
|
|
57
57
|
|
58
58
|
options.each do |name, value|
|
59
59
|
setting = self.class.all_settings[name]
|
60
|
-
raise(Chronicle::ETL::
|
60
|
+
raise(Chronicle::ETL::ConnectorConfigurationError, "Unrecognized setting: #{name}") unless setting
|
61
61
|
|
62
62
|
@config[name] = coerced_value(setting, value)
|
63
63
|
end
|
@@ -78,7 +78,7 @@ module Chronicle
|
|
78
78
|
|
79
79
|
def validate_config
|
80
80
|
missing = (self.class.all_required_settings.keys - @config.compacted_h.keys)
|
81
|
-
raise Chronicle::ETL::
|
81
|
+
raise Chronicle::ETL::ConnectorConfigurationError, "Missing options: #{missing}" if missing.count.positive?
|
82
82
|
end
|
83
83
|
|
84
84
|
def coerced_value(setting, value)
|
@@ -89,6 +89,19 @@ module Chronicle
|
|
89
89
|
value.to_s
|
90
90
|
end
|
91
91
|
|
92
|
+
# TODO: think about whether to split up float, integer
|
93
|
+
def coerce_numeric(value)
|
94
|
+
value.to_f
|
95
|
+
end
|
96
|
+
|
97
|
+
def coerce_boolean(value)
|
98
|
+
if value.is_a?(String)
|
99
|
+
value.downcase == "true"
|
100
|
+
else
|
101
|
+
value
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
92
105
|
def coerce_time(value)
|
93
106
|
# TODO: handle durations like '3h'
|
94
107
|
if value.is_a?(String)
|
@@ -1,11 +1,34 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
|
-
class Error < StandardError; end
|
3
|
+
class Error < StandardError; end
|
4
4
|
|
5
|
-
class
|
5
|
+
class ConfigError < Error; end
|
6
6
|
|
7
7
|
class RunnerTypeError < Error; end
|
8
8
|
|
9
|
+
class JobDefinitionError < Error
|
10
|
+
attr_reader :job_definition
|
11
|
+
|
12
|
+
def initialize(job_definition)
|
13
|
+
@job_definition = job_definition
|
14
|
+
super
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class PluginError < Error
|
19
|
+
attr_reader :name
|
20
|
+
|
21
|
+
def initialize(name)
|
22
|
+
@name = name
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class PluginConflictError < PluginError; end
|
27
|
+
class PluginNotAvailableError < PluginError; end
|
28
|
+
class PluginLoadError < PluginError; end
|
29
|
+
|
30
|
+
class ConnectorConfigurationError < Error; end
|
31
|
+
|
9
32
|
class ConnectorNotAvailableError < Error
|
10
33
|
def initialize(message, provider: nil, name: nil)
|
11
34
|
super(message)
|
@@ -18,6 +41,10 @@ module Chronicle
|
|
18
41
|
class ProviderNotAvailableError < ConnectorNotAvailableError; end
|
19
42
|
class ProviderConnectorNotAvailableError < ConnectorNotAvailableError; end
|
20
43
|
|
44
|
+
class ExtractionError < Error; end
|
45
|
+
|
46
|
+
class SerializationError < Error; end
|
47
|
+
|
21
48
|
class TransformationError < Error
|
22
49
|
attr_reader :transformation
|
23
50
|
|
@@ -3,39 +3,46 @@ require 'csv'
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
5
|
class CSVExtractor < Chronicle::ETL::Extractor
|
6
|
-
include Extractors::Helpers::
|
6
|
+
include Extractors::Helpers::InputReader
|
7
7
|
|
8
8
|
register_connector do |r|
|
9
|
-
r.description = '
|
9
|
+
r.description = 'CSV'
|
10
10
|
end
|
11
11
|
|
12
12
|
setting :headers, default: true
|
13
|
-
|
13
|
+
|
14
|
+
def prepare
|
15
|
+
@csvs = prepare_sources
|
16
|
+
end
|
14
17
|
|
15
18
|
def extract
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
+
@csvs.each do |csv|
|
20
|
+
csv.read.each do |row|
|
21
|
+
yield Chronicle::ETL::Extraction.new(data: row.to_h)
|
22
|
+
end
|
19
23
|
end
|
20
24
|
end
|
21
25
|
|
22
26
|
def results_count
|
23
|
-
|
27
|
+
@csvs.reduce(0) do |total_rows, csv|
|
28
|
+
row_count = csv.readlines.size
|
29
|
+
csv.rewind
|
30
|
+
total_rows + row_count
|
31
|
+
end
|
24
32
|
end
|
25
33
|
|
26
34
|
private
|
27
35
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
open_from_filesystem(filename: @config.filename) do |file|
|
37
|
-
return CSV.new(file, **csv_options)
|
36
|
+
def prepare_sources
|
37
|
+
@csvs = []
|
38
|
+
read_input do |csv_data|
|
39
|
+
csv_options = {
|
40
|
+
headers: @config.headers.is_a?(String) ? @config.headers.split(',') : @config.headers,
|
41
|
+
converters: :all
|
42
|
+
}
|
43
|
+
@csvs << CSV.new(csv_data, **csv_options)
|
38
44
|
end
|
45
|
+
@csvs
|
39
46
|
end
|
40
47
|
end
|
41
48
|
end
|
@@ -7,11 +7,11 @@ module Chronicle
|
|
7
7
|
extend Chronicle::ETL::Registry::SelfRegistering
|
8
8
|
include Chronicle::ETL::Configurable
|
9
9
|
|
10
|
-
setting :since, type: :
|
11
|
-
setting :until, type: :
|
12
|
-
setting :limit
|
10
|
+
setting :since, type: :time
|
11
|
+
setting :until, type: :time
|
12
|
+
setting :limit, type: :numeric
|
13
13
|
setting :load_after_id
|
14
|
-
setting :
|
14
|
+
setting :input
|
15
15
|
|
16
16
|
# Construct a new instance of this extractor. Options are passed in from a Runner
|
17
17
|
# == Parameters:
|
@@ -46,7 +46,7 @@ module Chronicle
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
-
require_relative 'helpers/
|
49
|
+
require_relative 'helpers/input_reader'
|
50
50
|
require_relative 'csv_extractor'
|
51
51
|
require_relative 'file_extractor'
|
52
52
|
require_relative 'json_extractor'
|
@@ -2,35 +2,55 @@ require 'pathname'
|
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
|
+
# Return filenames that match a pattern in a directory
|
5
6
|
class FileExtractor < Chronicle::ETL::Extractor
|
6
|
-
include Extractors::Helpers::FilesystemReader
|
7
7
|
|
8
8
|
register_connector do |r|
|
9
9
|
r.description = 'file or directory of files'
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
setting :dir_glob_pattern
|
12
|
+
setting :input, default: ['.']
|
13
|
+
setting :dir_glob_pattern, default: "**/*"
|
14
|
+
setting :larger_than
|
15
|
+
setting :smaller_than
|
16
|
+
|
17
|
+
def prepare
|
18
|
+
@pathnames = gather_files
|
19
|
+
end
|
14
20
|
|
15
21
|
def extract
|
16
|
-
|
17
|
-
yield Chronicle::ETL::Extraction.new(data:
|
22
|
+
@pathnames.each do |pathname|
|
23
|
+
yield Chronicle::ETL::Extraction.new(data: pathname.to_path)
|
18
24
|
end
|
19
25
|
end
|
20
26
|
|
21
27
|
def results_count
|
22
|
-
|
28
|
+
@pathnames.count
|
23
29
|
end
|
24
30
|
|
25
31
|
private
|
26
32
|
|
27
|
-
def
|
28
|
-
@
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
33
|
+
def gather_files
|
34
|
+
roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
|
35
|
+
raise(ExtractionError, "Input must exist") unless roots.all?(&:exist?)
|
36
|
+
|
37
|
+
directories, files = roots.partition(&:directory?)
|
38
|
+
|
39
|
+
directories.each do |directory|
|
40
|
+
files += Dir.glob(File.join(directory, @config.dir_glob_pattern)).map { |filename| Pathname.new(filename) }
|
41
|
+
end
|
42
|
+
|
43
|
+
files = files.uniq
|
44
|
+
|
45
|
+
files = files.keep_if { |f| (f.mtime > @config.since) } if @config.since
|
46
|
+
files = files.keep_if { |f| (f.mtime < @config.until) } if @config.until
|
47
|
+
|
48
|
+
# pass in file sizes in bytes
|
49
|
+
files = files.keep_if { |f| (f.size < @config.smaller_than) } if @config.smaller_than
|
50
|
+
files = files.keep_if { |f| (f.size > @config.larger_than) } if @config.larger_than
|
51
|
+
|
52
|
+
# # TODO: incorporate sort argument
|
53
|
+
files.sort_by(&:mtime)
|
34
54
|
end
|
35
55
|
end
|
36
56
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
module Extractors
|
6
|
+
module Helpers
|
7
|
+
module InputReader
|
8
|
+
# Return an array of input filenames; converts a single string
|
9
|
+
# to an array if necessary
|
10
|
+
def filenames
|
11
|
+
[@config.input].flatten.map
|
12
|
+
end
|
13
|
+
|
14
|
+
# Filenames as an array of pathnames
|
15
|
+
def pathnames
|
16
|
+
filenames.map { |filename| Pathname.new(filename) }
|
17
|
+
end
|
18
|
+
|
19
|
+
# Whether we're reading from files
|
20
|
+
def read_from_files?
|
21
|
+
filenames.any?
|
22
|
+
end
|
23
|
+
|
24
|
+
# Whether we're reading input from stdin
|
25
|
+
def read_from_stdin?
|
26
|
+
!read_from_files? && $stdin.stat.pipe?
|
27
|
+
end
|
28
|
+
|
29
|
+
# Read input sources and yield each content
|
30
|
+
def read_input
|
31
|
+
if read_from_files?
|
32
|
+
pathnames.each do |pathname|
|
33
|
+
File.open(pathname) do |file|
|
34
|
+
yield file.read, pathname.to_path
|
35
|
+
end
|
36
|
+
end
|
37
|
+
elsif read_from_stdin?
|
38
|
+
yield $stdin.read, $stdin
|
39
|
+
else
|
40
|
+
raise ExtractionError, "No input files or stdin provided"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Read input sources line by line
|
45
|
+
def read_input_as_lines(&block)
|
46
|
+
if read_from_files?
|
47
|
+
lines_from_files(&block)
|
48
|
+
elsif read_from_stdin?
|
49
|
+
lines_from_stdin(&block)
|
50
|
+
else
|
51
|
+
raise ExtractionError, "No input files or stdin provided"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def lines_from_files(&block)
|
58
|
+
pathnames.each do |pathname|
|
59
|
+
File.open(pathname) do |file|
|
60
|
+
lines_from_io(file, &block)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def lines_from_stdin(&block)
|
66
|
+
lines_from_io($stdin, &block)
|
67
|
+
end
|
68
|
+
|
69
|
+
def lines_from_io(io, &block)
|
70
|
+
io.each_line(&block)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -1,35 +1,44 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
|
-
class
|
4
|
-
include Extractors::Helpers::
|
3
|
+
class JSONExtractor < Chronicle::ETL::Extractor
|
4
|
+
include Extractors::Helpers::InputReader
|
5
5
|
|
6
6
|
register_connector do |r|
|
7
|
-
r.description = '
|
7
|
+
r.description = 'JSON'
|
8
8
|
end
|
9
9
|
|
10
|
-
setting :
|
11
|
-
setting :jsonl, default: true
|
10
|
+
setting :jsonl, default: true, type: :boolean
|
12
11
|
|
13
|
-
def
|
12
|
+
def prepare
|
13
|
+
@jsons = []
|
14
14
|
load_input do |input|
|
15
|
-
|
16
|
-
|
15
|
+
@jsons << parse_data(input)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def extract
|
20
|
+
@jsons.each do |json|
|
21
|
+
yield Chronicle::ETL::Extraction.new(data: json)
|
17
22
|
end
|
18
23
|
end
|
19
24
|
|
20
25
|
def results_count
|
26
|
+
@jsons.count
|
21
27
|
end
|
22
28
|
|
23
29
|
private
|
24
30
|
|
25
31
|
def parse_data data
|
26
32
|
JSON.parse(data)
|
27
|
-
rescue JSON::ParserError
|
33
|
+
rescue JSON::ParserError
|
34
|
+
raise Chronicle::ETL::ExtractionError, "Could not parse JSON"
|
28
35
|
end
|
29
36
|
|
30
|
-
def load_input
|
31
|
-
|
32
|
-
|
37
|
+
def load_input(&block)
|
38
|
+
if @config.jsonl
|
39
|
+
read_input_as_lines(&block)
|
40
|
+
else
|
41
|
+
read_input(&block)
|
33
42
|
end
|
34
43
|
end
|
35
44
|
end
|
data/lib/chronicle/etl/job.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require 'forwardable'
|
2
|
+
|
2
3
|
module Chronicle
|
3
4
|
module ETL
|
5
|
+
# A runner job
|
6
|
+
#
|
7
|
+
# TODO: this can probably be merged with JobDefinition. Not clear
|
8
|
+
# where the boundaries are
|
4
9
|
class Job
|
5
10
|
extend Forwardable
|
6
11
|
|
@@ -12,7 +17,8 @@ module Chronicle
|
|
12
17
|
:transformer_klass,
|
13
18
|
:transformer_options,
|
14
19
|
:loader_klass,
|
15
|
-
:loader_options
|
20
|
+
:loader_options,
|
21
|
+
:job_definition
|
16
22
|
|
17
23
|
# TODO: build a proper id system
|
18
24
|
alias id name
|
@@ -14,22 +14,52 @@ module Chronicle
|
|
14
14
|
options: {}
|
15
15
|
},
|
16
16
|
loader: {
|
17
|
-
name: '
|
17
|
+
name: 'table',
|
18
18
|
options: {}
|
19
19
|
}
|
20
20
|
}.freeze
|
21
21
|
|
22
|
+
attr_reader :errors
|
22
23
|
attr_accessor :definition
|
23
24
|
|
24
25
|
def initialize()
|
25
26
|
@definition = SKELETON_DEFINITION
|
26
27
|
end
|
27
28
|
|
29
|
+
def valid?
|
30
|
+
validate
|
31
|
+
@errors.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
def validate
|
35
|
+
@errors = {}
|
36
|
+
|
37
|
+
Chronicle::ETL::Registry::PHASES.each do |phase|
|
38
|
+
__send__("#{phase}_klass".to_sym)
|
39
|
+
rescue Chronicle::ETL::PluginError => e
|
40
|
+
@errors[:plugins] ||= []
|
41
|
+
@errors[:plugins] << e
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def plugins_missing?
|
46
|
+
validate
|
47
|
+
|
48
|
+
@errors[:plugins] || []
|
49
|
+
.filter { |e| e.instance_of?(Chronicle::ETL::PluginLoadError) }
|
50
|
+
.any?
|
51
|
+
end
|
52
|
+
|
53
|
+
def validate!
|
54
|
+
raise(Chronicle::ETL::JobDefinitionError.new(self), "Job definition is invalid") unless valid?
|
55
|
+
|
56
|
+
true
|
57
|
+
end
|
58
|
+
|
28
59
|
# Add config hash to this definition
|
29
60
|
def add_config(config = {})
|
30
61
|
@definition = @definition.deep_merge(config)
|
31
62
|
load_credentials
|
32
|
-
validate
|
33
63
|
end
|
34
64
|
|
35
65
|
# Is this job continuing from a previous run?
|
@@ -80,10 +110,6 @@ module Chronicle
|
|
80
110
|
end
|
81
111
|
end
|
82
112
|
end
|
83
|
-
|
84
|
-
def validate
|
85
|
-
return true # TODO
|
86
|
-
end
|
87
113
|
end
|
88
114
|
end
|
89
115
|
end
|
@@ -7,22 +7,49 @@ module Chronicle
|
|
7
7
|
r.description = 'CSV'
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
setting :output, default: $stdout
|
11
|
+
setting :headers, default: true
|
12
|
+
setting :header_row, default: true
|
13
|
+
|
14
|
+
def records
|
15
|
+
@records ||= []
|
13
16
|
end
|
14
17
|
|
15
18
|
def load(record)
|
16
|
-
|
19
|
+
records << record.to_h_flattened
|
17
20
|
end
|
18
21
|
|
19
22
|
def finish
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
23
|
+
return unless records.any?
|
24
|
+
|
25
|
+
headers = build_headers(records)
|
26
|
+
|
27
|
+
csv_options = {}
|
28
|
+
if @config.headers
|
29
|
+
csv_options[:write_headers] = @config.header_row
|
30
|
+
csv_options[:headers] = headers
|
31
|
+
end
|
32
|
+
|
33
|
+
if @config.output.is_a?(IO)
|
34
|
+
# This might seem like a duplication of the default value ($stdout)
|
35
|
+
# but it's because rspec overwrites $stdout (in helper #capture) to
|
36
|
+
# capture output.
|
37
|
+
io = $stdout.dup
|
38
|
+
else
|
39
|
+
io = File.open(@config.output, "w+")
|
40
|
+
end
|
41
|
+
|
42
|
+
output = CSV.generate(**csv_options) do |csv|
|
43
|
+
records.each do |record|
|
44
|
+
csv << record
|
45
|
+
.transform_keys(&:to_sym)
|
46
|
+
.values_at(*headers)
|
47
|
+
.map { |value| force_utf8(value) }
|
24
48
|
end
|
25
49
|
end
|
50
|
+
|
51
|
+
io.write(output)
|
52
|
+
io.close
|
26
53
|
end
|
27
54
|
end
|
28
55
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
module Loaders
|
6
|
+
module Helpers
|
7
|
+
module EncodingHelper
|
8
|
+
# Mostly useful for handling loading with binary data from a raw extraction
|
9
|
+
def force_utf8(value)
|
10
|
+
return value unless value.is_a?(String)
|
11
|
+
|
12
|
+
value.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|