chronicle-etl 0.3.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/.rubocop.yml +31 -1
- data/Guardfile +7 -0
- data/README.md +157 -82
- data/Rakefile +4 -2
- data/chronicle-etl.gemspec +11 -3
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/cli/connectors.rb +34 -5
- data/lib/chronicle/etl/cli/jobs.rb +90 -24
- data/lib/chronicle/etl/cli/main.rb +41 -19
- data/lib/chronicle/etl/cli/plugins.rb +62 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
- data/lib/chronicle/etl/cli.rb +9 -0
- data/lib/chronicle/etl/config.rb +7 -4
- data/lib/chronicle/etl/configurable.rb +163 -0
- data/lib/chronicle/etl/exceptions.rb +29 -1
- data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
- data/lib/chronicle/etl/extractors/extractor.rb +16 -15
- data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
- data/lib/chronicle/etl/job.rb +8 -2
- data/lib/chronicle/etl/job_definition.rb +20 -5
- data/lib/chronicle/etl/loaders/csv_loader.rb +36 -9
- data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
- data/lib/chronicle/etl/loaders/loader.rb +28 -2
- data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
- data/lib/chronicle/etl/loaders/table_loader.rb +18 -37
- data/lib/chronicle/etl/logger.rb +6 -2
- data/lib/chronicle/etl/models/base.rb +3 -0
- data/lib/chronicle/etl/models/entity.rb +8 -2
- data/lib/chronicle/etl/models/raw.rb +26 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +6 -0
- data/lib/chronicle/etl/registry/plugin_registry.rb +70 -0
- data/lib/chronicle/etl/registry/registry.rb +27 -14
- data/lib/chronicle/etl/runner.rb +35 -17
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
- data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
- data/lib/chronicle/etl/serializers/serializer.rb +2 -1
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
- data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
- data/lib/chronicle/etl/transformers/transformer.rb +3 -2
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +12 -4
- metadata +123 -18
- data/.ruby-version +0 -1
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
- data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
- data/lib/chronicle/etl/models/generic.rb +0 -23
@@ -2,46 +2,47 @@ require 'csv'
|
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
|
-
class
|
6
|
-
include Extractors::Helpers::
|
5
|
+
class CSVExtractor < Chronicle::ETL::Extractor
|
6
|
+
include Extractors::Helpers::InputReader
|
7
7
|
|
8
8
|
register_connector do |r|
|
9
|
-
r.description = '
|
9
|
+
r.description = 'CSV'
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
headers: true,
|
14
|
-
filename: $stdin
|
15
|
-
}.freeze
|
12
|
+
setting :headers, default: true
|
16
13
|
|
17
|
-
def
|
18
|
-
|
14
|
+
def prepare
|
15
|
+
@csvs = prepare_sources
|
19
16
|
end
|
20
17
|
|
21
18
|
def extract
|
22
|
-
|
23
|
-
|
24
|
-
|
19
|
+
@csvs.each do |csv|
|
20
|
+
csv.read.each do |row|
|
21
|
+
yield Chronicle::ETL::Extraction.new(data: row.to_h)
|
22
|
+
end
|
25
23
|
end
|
26
24
|
end
|
27
25
|
|
28
26
|
def results_count
|
29
|
-
|
27
|
+
@csvs.reduce(0) do |total_rows, csv|
|
28
|
+
row_count = csv.readlines.size
|
29
|
+
csv.rewind
|
30
|
+
total_rows + row_count
|
31
|
+
end
|
30
32
|
end
|
31
33
|
|
32
34
|
private
|
33
35
|
|
34
|
-
def
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
open_from_filesystem(filename: @options[:filename]) do |file|
|
43
|
-
return CSV.new(file, **csv_options)
|
36
|
+
def prepare_sources
|
37
|
+
@csvs = []
|
38
|
+
read_input do |csv_data|
|
39
|
+
csv_options = {
|
40
|
+
headers: @config.headers.is_a?(String) ? @config.headers.split(',') : @config.headers,
|
41
|
+
converters: :all
|
42
|
+
}
|
43
|
+
@csvs << CSV.new(csv_data, **csv_options)
|
44
44
|
end
|
45
|
+
@csvs
|
45
46
|
end
|
46
47
|
end
|
47
48
|
end
|
@@ -5,15 +5,20 @@ module Chronicle
|
|
5
5
|
# Abstract class representing an Extractor for an ETL job
|
6
6
|
class Extractor
|
7
7
|
extend Chronicle::ETL::Registry::SelfRegistering
|
8
|
+
include Chronicle::ETL::Configurable
|
9
|
+
|
10
|
+
setting :since, type: :time
|
11
|
+
setting :until, type: :time
|
12
|
+
setting :limit, type: :numeric
|
13
|
+
setting :load_after_id
|
14
|
+
setting :input
|
8
15
|
|
9
16
|
# Construct a new instance of this extractor. Options are passed in from a Runner
|
10
|
-
# ==
|
17
|
+
# == Parameters:
|
11
18
|
# options::
|
12
19
|
# Options for configuring this Extractor
|
13
20
|
def initialize(options = {})
|
14
|
-
|
15
|
-
sanitize_options
|
16
|
-
handle_continuation
|
21
|
+
apply_options(options)
|
17
22
|
end
|
18
23
|
|
19
24
|
# Hook called before #extract. Useful for gathering data, initailizing proxies, etc
|
@@ -30,22 +35,18 @@ module Chronicle
|
|
30
35
|
|
31
36
|
private
|
32
37
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
def handle_continuation
|
39
|
-
return unless @options[:continuation]
|
38
|
+
# TODO: reimplemenet this
|
39
|
+
# def handle_continuation
|
40
|
+
# return unless @config.continuation
|
40
41
|
|
41
|
-
|
42
|
-
|
43
|
-
end
|
42
|
+
# @config.since = @config.continuation.highest_timestamp if @config.continuation.highest_timestamp
|
43
|
+
# @config.load_after_id = @config.continuation.last_id if @config.continuation.last_id
|
44
|
+
# end
|
44
45
|
end
|
45
46
|
end
|
46
47
|
end
|
47
48
|
|
48
|
-
require_relative 'helpers/
|
49
|
+
require_relative 'helpers/input_reader'
|
49
50
|
require_relative 'csv_extractor'
|
50
51
|
require_relative 'file_extractor'
|
51
52
|
require_relative 'json_extractor'
|
@@ -2,32 +2,55 @@ require 'pathname'
|
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
|
+
# Return filenames that match a pattern in a directory
|
5
6
|
class FileExtractor < Chronicle::ETL::Extractor
|
6
|
-
include Extractors::Helpers::FilesystemReader
|
7
7
|
|
8
8
|
register_connector do |r|
|
9
9
|
r.description = 'file or directory of files'
|
10
10
|
end
|
11
11
|
|
12
|
+
setting :input, default: ['.']
|
13
|
+
setting :dir_glob_pattern, default: "**/*"
|
14
|
+
setting :larger_than
|
15
|
+
setting :smaller_than
|
16
|
+
|
17
|
+
def prepare
|
18
|
+
@pathnames = gather_files
|
19
|
+
end
|
20
|
+
|
12
21
|
def extract
|
13
|
-
|
14
|
-
yield Chronicle::ETL::Extraction.new(data:
|
22
|
+
@pathnames.each do |pathname|
|
23
|
+
yield Chronicle::ETL::Extraction.new(data: pathname.to_path)
|
15
24
|
end
|
16
25
|
end
|
17
26
|
|
18
27
|
def results_count
|
19
|
-
|
28
|
+
@pathnames.count
|
20
29
|
end
|
21
30
|
|
22
31
|
private
|
23
32
|
|
24
|
-
def
|
25
|
-
@
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
33
|
+
def gather_files
|
34
|
+
roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
|
35
|
+
raise(ExtractionError, "Input must exist") unless roots.all?(&:exist?)
|
36
|
+
|
37
|
+
directories, files = roots.partition(&:directory?)
|
38
|
+
|
39
|
+
directories.each do |directory|
|
40
|
+
files += Dir.glob(File.join(directory, @config.dir_glob_pattern)).map { |filename| Pathname.new(filename) }
|
41
|
+
end
|
42
|
+
|
43
|
+
files = files.uniq
|
44
|
+
|
45
|
+
files = files.keep_if { |f| (f.mtime > @config.since) } if @config.since
|
46
|
+
files = files.keep_if { |f| (f.mtime < @config.until) } if @config.until
|
47
|
+
|
48
|
+
# pass in file sizes in bytes
|
49
|
+
files = files.keep_if { |f| (f.size < @config.smaller_than) } if @config.smaller_than
|
50
|
+
files = files.keep_if { |f| (f.size > @config.larger_than) } if @config.larger_than
|
51
|
+
|
52
|
+
# # TODO: incorporate sort argument
|
53
|
+
files.sort_by(&:mtime)
|
31
54
|
end
|
32
55
|
end
|
33
56
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
module Extractors
|
6
|
+
module Helpers
|
7
|
+
module InputReader
|
8
|
+
# Return an array of input filenames; converts a single string
|
9
|
+
# to an array if necessary
|
10
|
+
def filenames
|
11
|
+
[@config.input].flatten.map
|
12
|
+
end
|
13
|
+
|
14
|
+
# Filenames as an array of pathnames
|
15
|
+
def pathnames
|
16
|
+
filenames.map { |filename| Pathname.new(filename) }
|
17
|
+
end
|
18
|
+
|
19
|
+
# Whether we're reading from files
|
20
|
+
def read_from_files?
|
21
|
+
filenames.any?
|
22
|
+
end
|
23
|
+
|
24
|
+
# Whether we're reading input from stdin
|
25
|
+
def read_from_stdin?
|
26
|
+
!read_from_files? && $stdin.stat.pipe?
|
27
|
+
end
|
28
|
+
|
29
|
+
# Read input sources and yield each content
|
30
|
+
def read_input
|
31
|
+
if read_from_files?
|
32
|
+
pathnames.each do |pathname|
|
33
|
+
File.open(pathname) do |file|
|
34
|
+
yield file.read, pathname.to_path
|
35
|
+
end
|
36
|
+
end
|
37
|
+
elsif read_from_stdin?
|
38
|
+
yield $stdin.read, $stdin
|
39
|
+
else
|
40
|
+
raise ExtractionError, "No input files or stdin provided"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Read input sources line by line
|
45
|
+
def read_input_as_lines(&block)
|
46
|
+
if read_from_files?
|
47
|
+
lines_from_files(&block)
|
48
|
+
elsif read_from_stdin?
|
49
|
+
lines_from_stdin(&block)
|
50
|
+
else
|
51
|
+
raise ExtractionError, "No input files or stdin provided"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def lines_from_files(&block)
|
58
|
+
pathnames.each do |pathname|
|
59
|
+
File.open(pathname) do |file|
|
60
|
+
lines_from_io(file, &block)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def lines_from_stdin(&block)
|
66
|
+
lines_from_io($stdin, &block)
|
67
|
+
end
|
68
|
+
|
69
|
+
def lines_from_io(io, &block)
|
70
|
+
io.each_line(&block)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -1,43 +1,44 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
|
-
class
|
4
|
-
include Extractors::Helpers::
|
3
|
+
class JSONExtractor < Chronicle::ETL::Extractor
|
4
|
+
include Extractors::Helpers::InputReader
|
5
5
|
|
6
6
|
register_connector do |r|
|
7
|
-
r.description = '
|
7
|
+
r.description = 'JSON'
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
filename: $stdin,
|
10
|
+
setting :jsonl, default: true, type: :boolean
|
12
11
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
super(DEFAULT_OPTIONS.merge(options))
|
12
|
+
def prepare
|
13
|
+
@jsons = []
|
14
|
+
load_input do |input|
|
15
|
+
@jsons << parse_data(input)
|
16
|
+
end
|
19
17
|
end
|
20
18
|
|
21
19
|
def extract
|
22
|
-
|
23
|
-
|
24
|
-
yield Chronicle::ETL::Extraction.new(data: parsed_data) if parsed_data
|
20
|
+
@jsons.each do |json|
|
21
|
+
yield Chronicle::ETL::Extraction.new(data: json)
|
25
22
|
end
|
26
23
|
end
|
27
24
|
|
28
25
|
def results_count
|
26
|
+
@jsons.count
|
29
27
|
end
|
30
28
|
|
31
29
|
private
|
32
30
|
|
33
31
|
def parse_data data
|
34
32
|
JSON.parse(data)
|
35
|
-
rescue JSON::ParserError
|
33
|
+
rescue JSON::ParserError
|
34
|
+
raise Chronicle::ETL::ExtractionError, "Could not parse JSON"
|
36
35
|
end
|
37
36
|
|
38
|
-
def load_input
|
39
|
-
|
40
|
-
|
37
|
+
def load_input(&block)
|
38
|
+
if @config.jsonl
|
39
|
+
read_input_as_lines(&block)
|
40
|
+
else
|
41
|
+
read_input(&block)
|
41
42
|
end
|
42
43
|
end
|
43
44
|
end
|
data/lib/chronicle/etl/job.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require 'forwardable'
|
2
|
+
|
2
3
|
module Chronicle
|
3
4
|
module ETL
|
5
|
+
# A runner job
|
6
|
+
#
|
7
|
+
# TODO: this can probably be merged with JobDefinition. Not clear
|
8
|
+
# where the boundaries are
|
4
9
|
class Job
|
5
10
|
extend Forwardable
|
6
11
|
|
@@ -12,7 +17,8 @@ module Chronicle
|
|
12
17
|
:transformer_klass,
|
13
18
|
:transformer_options,
|
14
19
|
:loader_klass,
|
15
|
-
:loader_options
|
20
|
+
:loader_options,
|
21
|
+
:job_definition
|
16
22
|
|
17
23
|
# TODO: build a proper id system
|
18
24
|
alias id name
|
@@ -35,7 +41,7 @@ module Chronicle
|
|
35
41
|
|
36
42
|
def instantiate_transformer(extraction)
|
37
43
|
@transformer_klass = @job_definition.transformer_klass
|
38
|
-
@transformer_klass.new(@transformer_options
|
44
|
+
@transformer_klass.new(extraction, @transformer_options)
|
39
45
|
end
|
40
46
|
|
41
47
|
def instantiate_loader
|
@@ -14,17 +14,36 @@ module Chronicle
|
|
14
14
|
options: {}
|
15
15
|
},
|
16
16
|
loader: {
|
17
|
-
name: '
|
17
|
+
name: 'table',
|
18
18
|
options: {}
|
19
19
|
}
|
20
20
|
}.freeze
|
21
21
|
|
22
|
+
attr_reader :errors
|
22
23
|
attr_accessor :definition
|
23
24
|
|
24
25
|
def initialize()
|
25
26
|
@definition = SKELETON_DEFINITION
|
26
27
|
end
|
27
28
|
|
29
|
+
def validate
|
30
|
+
@errors = []
|
31
|
+
|
32
|
+
Chronicle::ETL::Registry::PHASES.each do |phase|
|
33
|
+
__send__("#{phase}_klass".to_sym)
|
34
|
+
rescue Chronicle::ETL::PluginError => e
|
35
|
+
@errors << e
|
36
|
+
end
|
37
|
+
|
38
|
+
@errors.empty?
|
39
|
+
end
|
40
|
+
|
41
|
+
def validate!
|
42
|
+
raise(Chronicle::ETL::JobDefinitionError.new(self), "Job definition is invalid") unless validate
|
43
|
+
|
44
|
+
true
|
45
|
+
end
|
46
|
+
|
28
47
|
# Add config hash to this definition
|
29
48
|
def add_config(config = {})
|
30
49
|
@definition = @definition.deep_merge(config)
|
@@ -80,10 +99,6 @@ module Chronicle
|
|
80
99
|
end
|
81
100
|
end
|
82
101
|
end
|
83
|
-
|
84
|
-
def validate
|
85
|
-
return true # TODO
|
86
|
-
end
|
87
102
|
end
|
88
103
|
end
|
89
104
|
end
|
@@ -2,27 +2,54 @@ require 'csv'
|
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
|
-
class
|
5
|
+
class CSVLoader < Chronicle::ETL::Loader
|
6
6
|
register_connector do |r|
|
7
7
|
r.description = 'CSV'
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
setting :output, default: $stdout
|
11
|
+
setting :headers, default: true
|
12
|
+
setting :header_row, default: true
|
13
|
+
|
14
|
+
def records
|
15
|
+
@records ||= []
|
13
16
|
end
|
14
17
|
|
15
18
|
def load(record)
|
16
|
-
|
19
|
+
records << record.to_h_flattened
|
17
20
|
end
|
18
21
|
|
19
22
|
def finish
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
23
|
+
return unless records.any?
|
24
|
+
|
25
|
+
headers = build_headers(records)
|
26
|
+
|
27
|
+
csv_options = {}
|
28
|
+
if @config.headers
|
29
|
+
csv_options[:write_headers] = @config.header_row
|
30
|
+
csv_options[:headers] = headers
|
31
|
+
end
|
32
|
+
|
33
|
+
if @config.output.is_a?(IO)
|
34
|
+
# This might seem like a duplication of the default value ($stdout)
|
35
|
+
# but it's because rspec overwrites $stdout (in helper #capture) to
|
36
|
+
# capture output.
|
37
|
+
io = $stdout.dup
|
38
|
+
else
|
39
|
+
io = File.open(@config.output, "w+")
|
40
|
+
end
|
41
|
+
|
42
|
+
output = CSV.generate(**csv_options) do |csv|
|
43
|
+
records.each do |record|
|
44
|
+
csv << record
|
45
|
+
.transform_keys(&:to_sym)
|
46
|
+
.values_at(*headers)
|
47
|
+
.map { |value| force_utf8(value) }
|
24
48
|
end
|
25
49
|
end
|
50
|
+
|
51
|
+
io.write(output)
|
52
|
+
io.close
|
26
53
|
end
|
27
54
|
end
|
28
55
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
module Loaders
|
6
|
+
module Helpers
|
7
|
+
module EncodingHelper
|
8
|
+
# Mostly useful for handling loading with binary data from a raw extraction
|
9
|
+
def force_utf8(value)
|
10
|
+
return value unless value.is_a?(String)
|
11
|
+
|
12
|
+
value.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
class JSONLoader < Chronicle::ETL::Loader
|
4
|
+
register_connector do |r|
|
5
|
+
r.description = 'json'
|
6
|
+
end
|
7
|
+
|
8
|
+
setting :serializer
|
9
|
+
setting :output, default: $stdout
|
10
|
+
|
11
|
+
def start
|
12
|
+
if @config.output == $stdout
|
13
|
+
@output = @config.output
|
14
|
+
else
|
15
|
+
@output = File.open(@config.output, "w")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def load(record)
|
20
|
+
serialized = serializer.serialize(record)
|
21
|
+
|
22
|
+
# When dealing with raw data, we can get improperly encoded strings
|
23
|
+
# (eg from sqlite database columns). We force conversion to UTF-8
|
24
|
+
# before converting into JSON
|
25
|
+
encoded = serialized.transform_values do |value|
|
26
|
+
next value unless value.is_a?(String)
|
27
|
+
|
28
|
+
force_utf8(value)
|
29
|
+
end
|
30
|
+
@output.puts encoded.to_json
|
31
|
+
end
|
32
|
+
|
33
|
+
def finish
|
34
|
+
@output.close
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def serializer
|
40
|
+
@config.serializer || Chronicle::ETL::RawSerializer
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -1,15 +1,24 @@
|
|
1
|
+
require_relative 'helpers/encoding_helper'
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
# Abstract class representing a Loader for an ETL job
|
4
6
|
class Loader
|
5
7
|
extend Chronicle::ETL::Registry::SelfRegistering
|
8
|
+
include Chronicle::ETL::Configurable
|
9
|
+
include Chronicle::ETL::Loaders::Helpers::EncodingHelper
|
10
|
+
|
11
|
+
setting :output
|
12
|
+
setting :fields
|
13
|
+
setting :fields_limit, default: nil
|
14
|
+
setting :fields_exclude
|
6
15
|
|
7
16
|
# Construct a new instance of this loader. Options are passed in from a Runner
|
8
17
|
# == Parameters:
|
9
18
|
# options::
|
10
19
|
# Options for configuring this Loader
|
11
20
|
def initialize(options = {})
|
12
|
-
|
21
|
+
apply_options(options)
|
13
22
|
end
|
14
23
|
|
15
24
|
# Called once before processing records
|
@@ -22,11 +31,28 @@ module Chronicle
|
|
22
31
|
|
23
32
|
# Called once there are no more records to process
|
24
33
|
def finish; end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def build_headers(records)
|
38
|
+
headers =
|
39
|
+
if @config.fields && @config.fields.any?
|
40
|
+
Set[*@config.fields]
|
41
|
+
else
|
42
|
+
# use all the keys of the flattened record hash
|
43
|
+
Set[*records.map(&:keys).flatten.map(&:to_s).uniq]
|
44
|
+
end
|
45
|
+
|
46
|
+
headers = headers.delete_if { |header| header.end_with?(*@config.fields_exclude) }
|
47
|
+
headers = headers.first(@config.fields_limit) if @config.fields_limit
|
48
|
+
|
49
|
+
headers.to_a.map(&:to_sym)
|
50
|
+
end
|
25
51
|
end
|
26
52
|
end
|
27
53
|
end
|
28
54
|
|
29
55
|
require_relative 'csv_loader'
|
56
|
+
require_relative 'json_loader'
|
30
57
|
require_relative 'rest_loader'
|
31
|
-
require_relative 'stdout_loader'
|
32
58
|
require_relative 'table_loader'
|
@@ -9,19 +9,19 @@ module Chronicle
|
|
9
9
|
r.description = 'a REST endpoint'
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
setting :hostname, required: true
|
13
|
+
setting :endpoint, required: true
|
14
|
+
setting :access_token
|
15
15
|
|
16
16
|
def load(record)
|
17
17
|
payload = Chronicle::ETL::JSONAPISerializer.serialize(record)
|
18
18
|
# have the outer data key that json-api expects
|
19
19
|
payload = { data: payload } unless payload[:data]
|
20
20
|
|
21
|
-
uri = URI.parse("#{@
|
21
|
+
uri = URI.parse("#{@config.hostname}#{@config.endpoint}")
|
22
22
|
|
23
23
|
header = {
|
24
|
-
"Authorization" => "Bearer #{@
|
24
|
+
"Authorization" => "Bearer #{@config.access_token}",
|
25
25
|
"Content-Type": 'application/json'
|
26
26
|
}
|
27
27
|
use_ssl = uri.scheme == 'https'
|