chronicle-etl 0.4.0 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/.rubocop.yml +3 -0
- data/README.md +156 -81
- data/chronicle-etl.gemspec +3 -0
- data/lib/chronicle/etl/cli/cli_base.rb +31 -0
- data/lib/chronicle/etl/cli/connectors.rb +4 -11
- data/lib/chronicle/etl/cli/jobs.rb +49 -22
- data/lib/chronicle/etl/cli/main.rb +32 -1
- data/lib/chronicle/etl/cli/plugins.rb +62 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +1 -1
- data/lib/chronicle/etl/cli.rb +3 -0
- data/lib/chronicle/etl/config.rb +7 -4
- data/lib/chronicle/etl/configurable.rb +15 -2
- data/lib/chronicle/etl/exceptions.rb +29 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -17
- data/lib/chronicle/etl/extractors/extractor.rb +5 -5
- data/lib/chronicle/etl/extractors/file_extractor.rb +33 -13
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +21 -12
- data/lib/chronicle/etl/job.rb +7 -1
- data/lib/chronicle/etl/job_definition.rb +32 -6
- data/lib/chronicle/etl/loaders/csv_loader.rb +35 -8
- data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
- data/lib/chronicle/etl/loaders/loader.rb +24 -1
- data/lib/chronicle/etl/loaders/table_loader.rb +13 -26
- data/lib/chronicle/etl/logger.rb +6 -2
- data/lib/chronicle/etl/models/base.rb +3 -0
- data/lib/chronicle/etl/models/entity.rb +8 -2
- data/lib/chronicle/etl/models/raw.rb +26 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +5 -0
- data/lib/chronicle/etl/registry/plugin_registry.rb +75 -0
- data/lib/chronicle/etl/registry/registry.rb +27 -14
- data/lib/chronicle/etl/runner.rb +35 -17
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
- data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
- data/lib/chronicle/etl/serializers/serializer.rb +2 -1
- data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +11 -4
- metadata +53 -6
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
- data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
- data/lib/chronicle/etl/models/generic.rb +0 -23
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "tty-prompt"
|
4
|
+
require "tty-spinner"
|
5
|
+
|
6
|
+
module Chronicle
|
7
|
+
module ETL
|
8
|
+
module CLI
|
9
|
+
# CLI commands for working with ETL plugins
|
10
|
+
class Plugins < SubcommandBase
|
11
|
+
default_task 'list'
|
12
|
+
namespace :plugins
|
13
|
+
|
14
|
+
desc "install", "Install a plugin"
|
15
|
+
def install(*plugins)
|
16
|
+
cli_fail(message: "Please specify a plugin to install") unless plugins.any?
|
17
|
+
|
18
|
+
spinner = TTY::Spinner.new("[:spinner] Installing #{plugins.join(", ")}...", format: :dots_2)
|
19
|
+
spinner.auto_spin
|
20
|
+
plugins.each do |plugin|
|
21
|
+
spinner.update(title: "Installing #{plugin}")
|
22
|
+
Chronicle::ETL::Registry::PluginRegistry.install(plugin)
|
23
|
+
rescue Chronicle::ETL::PluginError => e
|
24
|
+
spinner.error("Error".red)
|
25
|
+
cli_fail(message: "Plugin '#{plugin}' could not be installed", exception: e)
|
26
|
+
end
|
27
|
+
spinner.success("(#{'successful'.green})")
|
28
|
+
end
|
29
|
+
|
30
|
+
desc "uninstall", "Unintall a plugin"
|
31
|
+
def uninstall(name)
|
32
|
+
spinner = TTY::Spinner.new("[:spinner] Uninstalling plugin #{name}...", format: :dots_2)
|
33
|
+
spinner.auto_spin
|
34
|
+
Chronicle::ETL::Registry::PluginRegistry.uninstall(name)
|
35
|
+
spinner.success("(#{'successful'.green})")
|
36
|
+
rescue Chronicle::ETL::PluginError => e
|
37
|
+
spinner.error("Error".red)
|
38
|
+
cli_fail(message: "Plugin '#{name}' could not be uninstalled (was it installed?)", exception: e)
|
39
|
+
end
|
40
|
+
|
41
|
+
desc "list", "Lists available plugins"
|
42
|
+
# Display all available plugins that chronicle-etl has access to
|
43
|
+
def list
|
44
|
+
plugins = Chronicle::ETL::Registry::PluginRegistry.all_installed_latest
|
45
|
+
|
46
|
+
info = plugins.map do |plugin|
|
47
|
+
{
|
48
|
+
name: plugin.name.sub("chronicle-", ""),
|
49
|
+
description: plugin.description,
|
50
|
+
version: plugin.version
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
headers = ['name', 'description', 'latest version'].map{ |h| h.to_s.upcase.bold }
|
55
|
+
table = TTY::Table.new(headers, info.map(&:values))
|
56
|
+
puts "Installed plugins:"
|
57
|
+
puts table.render(indent: 2, padding: [0, 0])
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -2,7 +2,7 @@ module Chronicle
|
|
2
2
|
module ETL
|
3
3
|
module CLI
|
4
4
|
# Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
|
5
|
-
class SubcommandBase < ::
|
5
|
+
class SubcommandBase < Chronicle::ETL::CLI::CLIBase
|
6
6
|
# Print usage instructions for a subcommand
|
7
7
|
def self.help(shell, subcommand = false)
|
8
8
|
list = printable_commands(true, subcommand)
|
data/lib/chronicle/etl/cli.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
require 'thor'
|
2
|
+
require 'thor/hollaback'
|
2
3
|
require 'chronicle/etl'
|
3
4
|
|
5
|
+
require 'chronicle/etl/cli/cli_base'
|
4
6
|
require 'chronicle/etl/cli/subcommand_base'
|
5
7
|
require 'chronicle/etl/cli/connectors'
|
6
8
|
require 'chronicle/etl/cli/jobs'
|
9
|
+
require 'chronicle/etl/cli/plugins'
|
7
10
|
require 'chronicle/etl/cli/main'
|
data/lib/chronicle/etl/config.rb
CHANGED
@@ -24,16 +24,14 @@ module Chronicle
|
|
24
24
|
|
25
25
|
# Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
|
26
26
|
def available_jobs
|
27
|
-
|
28
|
-
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|
27
|
+
Dir.glob(File.join(config_directory("jobs"), "*.yml")).map do |filename|
|
29
28
|
File.basename(filename, ".*")
|
30
29
|
end
|
31
30
|
end
|
32
31
|
|
33
32
|
# Returns all available credentials available in ~/.config/chronicle/etl/credentials/*.yml
|
34
33
|
def available_credentials
|
35
|
-
|
36
|
-
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|
34
|
+
Dir.glob(File.join(config_directory("credentials"), "*.yml")).map do |filename|
|
37
35
|
File.basename(filename, ".*")
|
38
36
|
end
|
39
37
|
end
|
@@ -48,6 +46,11 @@ module Chronicle
|
|
48
46
|
def load_credentials(name)
|
49
47
|
config = self.load("chronicle/etl/credentials/#{name}.yml")
|
50
48
|
end
|
49
|
+
|
50
|
+
def config_directory(type)
|
51
|
+
path = "chronicle/etl/#{type}"
|
52
|
+
Runcom::Config.new(path).current || raise(Chronicle::ETL::ConfigError, "Could not access config directory (#{path})")
|
53
|
+
end
|
51
54
|
end
|
52
55
|
end
|
53
56
|
end
|
@@ -57,7 +57,7 @@ module Chronicle
|
|
57
57
|
|
58
58
|
options.each do |name, value|
|
59
59
|
setting = self.class.all_settings[name]
|
60
|
-
raise(Chronicle::ETL::
|
60
|
+
raise(Chronicle::ETL::ConnectorConfigurationError, "Unrecognized setting: #{name}") unless setting
|
61
61
|
|
62
62
|
@config[name] = coerced_value(setting, value)
|
63
63
|
end
|
@@ -78,7 +78,7 @@ module Chronicle
|
|
78
78
|
|
79
79
|
def validate_config
|
80
80
|
missing = (self.class.all_required_settings.keys - @config.compacted_h.keys)
|
81
|
-
raise Chronicle::ETL::
|
81
|
+
raise Chronicle::ETL::ConnectorConfigurationError, "Missing options: #{missing}" if missing.count.positive?
|
82
82
|
end
|
83
83
|
|
84
84
|
def coerced_value(setting, value)
|
@@ -89,6 +89,19 @@ module Chronicle
|
|
89
89
|
value.to_s
|
90
90
|
end
|
91
91
|
|
92
|
+
# TODO: think about whether to split up float, integer
|
93
|
+
def coerce_numeric(value)
|
94
|
+
value.to_f
|
95
|
+
end
|
96
|
+
|
97
|
+
def coerce_boolean(value)
|
98
|
+
if value.is_a?(String)
|
99
|
+
value.downcase == "true"
|
100
|
+
else
|
101
|
+
value
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
92
105
|
def coerce_time(value)
|
93
106
|
# TODO: handle durations like '3h'
|
94
107
|
if value.is_a?(String)
|
@@ -1,11 +1,34 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
|
-
class Error < StandardError; end
|
3
|
+
class Error < StandardError; end
|
4
4
|
|
5
|
-
class
|
5
|
+
class ConfigError < Error; end
|
6
6
|
|
7
7
|
class RunnerTypeError < Error; end
|
8
8
|
|
9
|
+
class JobDefinitionError < Error
|
10
|
+
attr_reader :job_definition
|
11
|
+
|
12
|
+
def initialize(job_definition)
|
13
|
+
@job_definition = job_definition
|
14
|
+
super
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class PluginError < Error
|
19
|
+
attr_reader :name
|
20
|
+
|
21
|
+
def initialize(name)
|
22
|
+
@name = name
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class PluginConflictError < PluginError; end
|
27
|
+
class PluginNotAvailableError < PluginError; end
|
28
|
+
class PluginLoadError < PluginError; end
|
29
|
+
|
30
|
+
class ConnectorConfigurationError < Error; end
|
31
|
+
|
9
32
|
class ConnectorNotAvailableError < Error
|
10
33
|
def initialize(message, provider: nil, name: nil)
|
11
34
|
super(message)
|
@@ -18,6 +41,10 @@ module Chronicle
|
|
18
41
|
class ProviderNotAvailableError < ConnectorNotAvailableError; end
|
19
42
|
class ProviderConnectorNotAvailableError < ConnectorNotAvailableError; end
|
20
43
|
|
44
|
+
class ExtractionError < Error; end
|
45
|
+
|
46
|
+
class SerializationError < Error; end
|
47
|
+
|
21
48
|
class TransformationError < Error
|
22
49
|
attr_reader :transformation
|
23
50
|
|
@@ -3,39 +3,46 @@ require 'csv'
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
5
|
class CSVExtractor < Chronicle::ETL::Extractor
|
6
|
-
include Extractors::Helpers::
|
6
|
+
include Extractors::Helpers::InputReader
|
7
7
|
|
8
8
|
register_connector do |r|
|
9
|
-
r.description = '
|
9
|
+
r.description = 'CSV'
|
10
10
|
end
|
11
11
|
|
12
12
|
setting :headers, default: true
|
13
|
-
|
13
|
+
|
14
|
+
def prepare
|
15
|
+
@csvs = prepare_sources
|
16
|
+
end
|
14
17
|
|
15
18
|
def extract
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
+
@csvs.each do |csv|
|
20
|
+
csv.read.each do |row|
|
21
|
+
yield Chronicle::ETL::Extraction.new(data: row.to_h)
|
22
|
+
end
|
19
23
|
end
|
20
24
|
end
|
21
25
|
|
22
26
|
def results_count
|
23
|
-
|
27
|
+
@csvs.reduce(0) do |total_rows, csv|
|
28
|
+
row_count = csv.readlines.size
|
29
|
+
csv.rewind
|
30
|
+
total_rows + row_count
|
31
|
+
end
|
24
32
|
end
|
25
33
|
|
26
34
|
private
|
27
35
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
open_from_filesystem(filename: @config.filename) do |file|
|
37
|
-
return CSV.new(file, **csv_options)
|
36
|
+
def prepare_sources
|
37
|
+
@csvs = []
|
38
|
+
read_input do |csv_data|
|
39
|
+
csv_options = {
|
40
|
+
headers: @config.headers.is_a?(String) ? @config.headers.split(',') : @config.headers,
|
41
|
+
converters: :all
|
42
|
+
}
|
43
|
+
@csvs << CSV.new(csv_data, **csv_options)
|
38
44
|
end
|
45
|
+
@csvs
|
39
46
|
end
|
40
47
|
end
|
41
48
|
end
|
@@ -7,11 +7,11 @@ module Chronicle
|
|
7
7
|
extend Chronicle::ETL::Registry::SelfRegistering
|
8
8
|
include Chronicle::ETL::Configurable
|
9
9
|
|
10
|
-
setting :since, type: :
|
11
|
-
setting :until, type: :
|
12
|
-
setting :limit
|
10
|
+
setting :since, type: :time
|
11
|
+
setting :until, type: :time
|
12
|
+
setting :limit, type: :numeric
|
13
13
|
setting :load_after_id
|
14
|
-
setting :
|
14
|
+
setting :input
|
15
15
|
|
16
16
|
# Construct a new instance of this extractor. Options are passed in from a Runner
|
17
17
|
# == Parameters:
|
@@ -46,7 +46,7 @@ module Chronicle
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
-
require_relative 'helpers/
|
49
|
+
require_relative 'helpers/input_reader'
|
50
50
|
require_relative 'csv_extractor'
|
51
51
|
require_relative 'file_extractor'
|
52
52
|
require_relative 'json_extractor'
|
@@ -2,35 +2,55 @@ require 'pathname'
|
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
|
+
# Return filenames that match a pattern in a directory
|
5
6
|
class FileExtractor < Chronicle::ETL::Extractor
|
6
|
-
include Extractors::Helpers::FilesystemReader
|
7
7
|
|
8
8
|
register_connector do |r|
|
9
9
|
r.description = 'file or directory of files'
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
setting :dir_glob_pattern
|
12
|
+
setting :input, default: ['.']
|
13
|
+
setting :dir_glob_pattern, default: "**/*"
|
14
|
+
setting :larger_than
|
15
|
+
setting :smaller_than
|
16
|
+
|
17
|
+
def prepare
|
18
|
+
@pathnames = gather_files
|
19
|
+
end
|
14
20
|
|
15
21
|
def extract
|
16
|
-
|
17
|
-
yield Chronicle::ETL::Extraction.new(data:
|
22
|
+
@pathnames.each do |pathname|
|
23
|
+
yield Chronicle::ETL::Extraction.new(data: pathname.to_path)
|
18
24
|
end
|
19
25
|
end
|
20
26
|
|
21
27
|
def results_count
|
22
|
-
|
28
|
+
@pathnames.count
|
23
29
|
end
|
24
30
|
|
25
31
|
private
|
26
32
|
|
27
|
-
def
|
28
|
-
@
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
33
|
+
def gather_files
|
34
|
+
roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
|
35
|
+
raise(ExtractionError, "Input must exist") unless roots.all?(&:exist?)
|
36
|
+
|
37
|
+
directories, files = roots.partition(&:directory?)
|
38
|
+
|
39
|
+
directories.each do |directory|
|
40
|
+
files += Dir.glob(File.join(directory, @config.dir_glob_pattern)).map { |filename| Pathname.new(filename) }
|
41
|
+
end
|
42
|
+
|
43
|
+
files = files.uniq
|
44
|
+
|
45
|
+
files = files.keep_if { |f| (f.mtime > @config.since) } if @config.since
|
46
|
+
files = files.keep_if { |f| (f.mtime < @config.until) } if @config.until
|
47
|
+
|
48
|
+
# pass in file sizes in bytes
|
49
|
+
files = files.keep_if { |f| (f.size < @config.smaller_than) } if @config.smaller_than
|
50
|
+
files = files.keep_if { |f| (f.size > @config.larger_than) } if @config.larger_than
|
51
|
+
|
52
|
+
# # TODO: incorporate sort argument
|
53
|
+
files.sort_by(&:mtime)
|
34
54
|
end
|
35
55
|
end
|
36
56
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
module Extractors
|
6
|
+
module Helpers
|
7
|
+
module InputReader
|
8
|
+
# Return an array of input filenames; converts a single string
|
9
|
+
# to an array if necessary
|
10
|
+
def filenames
|
11
|
+
[@config.input].flatten.map
|
12
|
+
end
|
13
|
+
|
14
|
+
# Filenames as an array of pathnames
|
15
|
+
def pathnames
|
16
|
+
filenames.map { |filename| Pathname.new(filename) }
|
17
|
+
end
|
18
|
+
|
19
|
+
# Whether we're reading from files
|
20
|
+
def read_from_files?
|
21
|
+
filenames.any?
|
22
|
+
end
|
23
|
+
|
24
|
+
# Whether we're reading input from stdin
|
25
|
+
def read_from_stdin?
|
26
|
+
!read_from_files? && $stdin.stat.pipe?
|
27
|
+
end
|
28
|
+
|
29
|
+
# Read input sources and yield each content
|
30
|
+
def read_input
|
31
|
+
if read_from_files?
|
32
|
+
pathnames.each do |pathname|
|
33
|
+
File.open(pathname) do |file|
|
34
|
+
yield file.read, pathname.to_path
|
35
|
+
end
|
36
|
+
end
|
37
|
+
elsif read_from_stdin?
|
38
|
+
yield $stdin.read, $stdin
|
39
|
+
else
|
40
|
+
raise ExtractionError, "No input files or stdin provided"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Read input sources line by line
|
45
|
+
def read_input_as_lines(&block)
|
46
|
+
if read_from_files?
|
47
|
+
lines_from_files(&block)
|
48
|
+
elsif read_from_stdin?
|
49
|
+
lines_from_stdin(&block)
|
50
|
+
else
|
51
|
+
raise ExtractionError, "No input files or stdin provided"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def lines_from_files(&block)
|
58
|
+
pathnames.each do |pathname|
|
59
|
+
File.open(pathname) do |file|
|
60
|
+
lines_from_io(file, &block)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def lines_from_stdin(&block)
|
66
|
+
lines_from_io($stdin, &block)
|
67
|
+
end
|
68
|
+
|
69
|
+
def lines_from_io(io, &block)
|
70
|
+
io.each_line(&block)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -1,35 +1,44 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
|
-
class
|
4
|
-
include Extractors::Helpers::
|
3
|
+
class JSONExtractor < Chronicle::ETL::Extractor
|
4
|
+
include Extractors::Helpers::InputReader
|
5
5
|
|
6
6
|
register_connector do |r|
|
7
|
-
r.description = '
|
7
|
+
r.description = 'JSON'
|
8
8
|
end
|
9
9
|
|
10
|
-
setting :
|
11
|
-
setting :jsonl, default: true
|
10
|
+
setting :jsonl, default: true, type: :boolean
|
12
11
|
|
13
|
-
def
|
12
|
+
def prepare
|
13
|
+
@jsons = []
|
14
14
|
load_input do |input|
|
15
|
-
|
16
|
-
|
15
|
+
@jsons << parse_data(input)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def extract
|
20
|
+
@jsons.each do |json|
|
21
|
+
yield Chronicle::ETL::Extraction.new(data: json)
|
17
22
|
end
|
18
23
|
end
|
19
24
|
|
20
25
|
def results_count
|
26
|
+
@jsons.count
|
21
27
|
end
|
22
28
|
|
23
29
|
private
|
24
30
|
|
25
31
|
def parse_data data
|
26
32
|
JSON.parse(data)
|
27
|
-
rescue JSON::ParserError
|
33
|
+
rescue JSON::ParserError
|
34
|
+
raise Chronicle::ETL::ExtractionError, "Could not parse JSON"
|
28
35
|
end
|
29
36
|
|
30
|
-
def load_input
|
31
|
-
|
32
|
-
|
37
|
+
def load_input(&block)
|
38
|
+
if @config.jsonl
|
39
|
+
read_input_as_lines(&block)
|
40
|
+
else
|
41
|
+
read_input(&block)
|
33
42
|
end
|
34
43
|
end
|
35
44
|
end
|
data/lib/chronicle/etl/job.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require 'forwardable'
|
2
|
+
|
2
3
|
module Chronicle
|
3
4
|
module ETL
|
5
|
+
# A runner job
|
6
|
+
#
|
7
|
+
# TODO: this can probably be merged with JobDefinition. Not clear
|
8
|
+
# where the boundaries are
|
4
9
|
class Job
|
5
10
|
extend Forwardable
|
6
11
|
|
@@ -12,7 +17,8 @@ module Chronicle
|
|
12
17
|
:transformer_klass,
|
13
18
|
:transformer_options,
|
14
19
|
:loader_klass,
|
15
|
-
:loader_options
|
20
|
+
:loader_options,
|
21
|
+
:job_definition
|
16
22
|
|
17
23
|
# TODO: build a proper id system
|
18
24
|
alias id name
|
@@ -14,22 +14,52 @@ module Chronicle
|
|
14
14
|
options: {}
|
15
15
|
},
|
16
16
|
loader: {
|
17
|
-
name: '
|
17
|
+
name: 'table',
|
18
18
|
options: {}
|
19
19
|
}
|
20
20
|
}.freeze
|
21
21
|
|
22
|
+
attr_reader :errors
|
22
23
|
attr_accessor :definition
|
23
24
|
|
24
25
|
def initialize()
|
25
26
|
@definition = SKELETON_DEFINITION
|
26
27
|
end
|
27
28
|
|
29
|
+
def valid?
|
30
|
+
validate
|
31
|
+
@errors.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
def validate
|
35
|
+
@errors = {}
|
36
|
+
|
37
|
+
Chronicle::ETL::Registry::PHASES.each do |phase|
|
38
|
+
__send__("#{phase}_klass".to_sym)
|
39
|
+
rescue Chronicle::ETL::PluginError => e
|
40
|
+
@errors[:plugins] ||= []
|
41
|
+
@errors[:plugins] << e
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def plugins_missing?
|
46
|
+
validate
|
47
|
+
|
48
|
+
@errors[:plugins] || []
|
49
|
+
.filter { |e| e.instance_of?(Chronicle::ETL::PluginLoadError) }
|
50
|
+
.any?
|
51
|
+
end
|
52
|
+
|
53
|
+
def validate!
|
54
|
+
raise(Chronicle::ETL::JobDefinitionError.new(self), "Job definition is invalid") unless valid?
|
55
|
+
|
56
|
+
true
|
57
|
+
end
|
58
|
+
|
28
59
|
# Add config hash to this definition
|
29
60
|
def add_config(config = {})
|
30
61
|
@definition = @definition.deep_merge(config)
|
31
62
|
load_credentials
|
32
|
-
validate
|
33
63
|
end
|
34
64
|
|
35
65
|
# Is this job continuing from a previous run?
|
@@ -80,10 +110,6 @@ module Chronicle
|
|
80
110
|
end
|
81
111
|
end
|
82
112
|
end
|
83
|
-
|
84
|
-
def validate
|
85
|
-
return true # TODO
|
86
|
-
end
|
87
113
|
end
|
88
114
|
end
|
89
115
|
end
|
@@ -7,22 +7,49 @@ module Chronicle
|
|
7
7
|
r.description = 'CSV'
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
setting :output, default: $stdout
|
11
|
+
setting :headers, default: true
|
12
|
+
setting :header_row, default: true
|
13
|
+
|
14
|
+
def records
|
15
|
+
@records ||= []
|
13
16
|
end
|
14
17
|
|
15
18
|
def load(record)
|
16
|
-
|
19
|
+
records << record.to_h_flattened
|
17
20
|
end
|
18
21
|
|
19
22
|
def finish
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
23
|
+
return unless records.any?
|
24
|
+
|
25
|
+
headers = build_headers(records)
|
26
|
+
|
27
|
+
csv_options = {}
|
28
|
+
if @config.headers
|
29
|
+
csv_options[:write_headers] = @config.header_row
|
30
|
+
csv_options[:headers] = headers
|
31
|
+
end
|
32
|
+
|
33
|
+
if @config.output.is_a?(IO)
|
34
|
+
# This might seem like a duplication of the default value ($stdout)
|
35
|
+
# but it's because rspec overwrites $stdout (in helper #capture) to
|
36
|
+
# capture output.
|
37
|
+
io = $stdout.dup
|
38
|
+
else
|
39
|
+
io = File.open(@config.output, "w+")
|
40
|
+
end
|
41
|
+
|
42
|
+
output = CSV.generate(**csv_options) do |csv|
|
43
|
+
records.each do |record|
|
44
|
+
csv << record
|
45
|
+
.transform_keys(&:to_sym)
|
46
|
+
.values_at(*headers)
|
47
|
+
.map { |value| force_utf8(value) }
|
24
48
|
end
|
25
49
|
end
|
50
|
+
|
51
|
+
io.write(output)
|
52
|
+
io.close
|
26
53
|
end
|
27
54
|
end
|
28
55
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
module Loaders
|
6
|
+
module Helpers
|
7
|
+
module EncodingHelper
|
8
|
+
# Mostly useful for handling loading with binary data from a raw extraction
|
9
|
+
def force_utf8(value)
|
10
|
+
return value unless value.is_a?(String)
|
11
|
+
|
12
|
+
value.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|