chronicle-etl 0.3.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/.rubocop.yml +31 -1
- data/Guardfile +7 -0
- data/README.md +157 -82
- data/Rakefile +4 -2
- data/chronicle-etl.gemspec +11 -3
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/cli/connectors.rb +34 -5
- data/lib/chronicle/etl/cli/jobs.rb +90 -24
- data/lib/chronicle/etl/cli/main.rb +41 -19
- data/lib/chronicle/etl/cli/plugins.rb +62 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
- data/lib/chronicle/etl/cli.rb +9 -0
- data/lib/chronicle/etl/config.rb +7 -4
- data/lib/chronicle/etl/configurable.rb +163 -0
- data/lib/chronicle/etl/exceptions.rb +29 -1
- data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
- data/lib/chronicle/etl/extractors/extractor.rb +16 -15
- data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
- data/lib/chronicle/etl/job.rb +8 -2
- data/lib/chronicle/etl/job_definition.rb +20 -5
- data/lib/chronicle/etl/loaders/csv_loader.rb +36 -9
- data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
- data/lib/chronicle/etl/loaders/loader.rb +28 -2
- data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
- data/lib/chronicle/etl/loaders/table_loader.rb +18 -37
- data/lib/chronicle/etl/logger.rb +6 -2
- data/lib/chronicle/etl/models/base.rb +3 -0
- data/lib/chronicle/etl/models/entity.rb +8 -2
- data/lib/chronicle/etl/models/raw.rb +26 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +6 -0
- data/lib/chronicle/etl/registry/plugin_registry.rb +70 -0
- data/lib/chronicle/etl/registry/registry.rb +27 -14
- data/lib/chronicle/etl/runner.rb +35 -17
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
- data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
- data/lib/chronicle/etl/serializers/serializer.rb +2 -1
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
- data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
- data/lib/chronicle/etl/transformers/transformer.rb +3 -2
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +12 -4
- metadata +123 -18
- data/.ruby-version +0 -1
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
- data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
- data/lib/chronicle/etl/models/generic.rb +0 -23
@@ -2,46 +2,47 @@ require 'csv'
|
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
|
-
class
|
6
|
-
include Extractors::Helpers::
|
5
|
+
class CSVExtractor < Chronicle::ETL::Extractor
|
6
|
+
include Extractors::Helpers::InputReader
|
7
7
|
|
8
8
|
register_connector do |r|
|
9
|
-
r.description = '
|
9
|
+
r.description = 'CSV'
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
headers: true,
|
14
|
-
filename: $stdin
|
15
|
-
}.freeze
|
12
|
+
setting :headers, default: true
|
16
13
|
|
17
|
-
def
|
18
|
-
|
14
|
+
def prepare
|
15
|
+
@csvs = prepare_sources
|
19
16
|
end
|
20
17
|
|
21
18
|
def extract
|
22
|
-
|
23
|
-
|
24
|
-
|
19
|
+
@csvs.each do |csv|
|
20
|
+
csv.read.each do |row|
|
21
|
+
yield Chronicle::ETL::Extraction.new(data: row.to_h)
|
22
|
+
end
|
25
23
|
end
|
26
24
|
end
|
27
25
|
|
28
26
|
def results_count
|
29
|
-
|
27
|
+
@csvs.reduce(0) do |total_rows, csv|
|
28
|
+
row_count = csv.readlines.size
|
29
|
+
csv.rewind
|
30
|
+
total_rows + row_count
|
31
|
+
end
|
30
32
|
end
|
31
33
|
|
32
34
|
private
|
33
35
|
|
34
|
-
def
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
open_from_filesystem(filename: @options[:filename]) do |file|
|
43
|
-
return CSV.new(file, **csv_options)
|
36
|
+
def prepare_sources
|
37
|
+
@csvs = []
|
38
|
+
read_input do |csv_data|
|
39
|
+
csv_options = {
|
40
|
+
headers: @config.headers.is_a?(String) ? @config.headers.split(',') : @config.headers,
|
41
|
+
converters: :all
|
42
|
+
}
|
43
|
+
@csvs << CSV.new(csv_data, **csv_options)
|
44
44
|
end
|
45
|
+
@csvs
|
45
46
|
end
|
46
47
|
end
|
47
48
|
end
|
@@ -5,15 +5,20 @@ module Chronicle
|
|
5
5
|
# Abstract class representing an Extractor for an ETL job
|
6
6
|
class Extractor
|
7
7
|
extend Chronicle::ETL::Registry::SelfRegistering
|
8
|
+
include Chronicle::ETL::Configurable
|
9
|
+
|
10
|
+
setting :since, type: :time
|
11
|
+
setting :until, type: :time
|
12
|
+
setting :limit, type: :numeric
|
13
|
+
setting :load_after_id
|
14
|
+
setting :input
|
8
15
|
|
9
16
|
# Construct a new instance of this extractor. Options are passed in from a Runner
|
10
|
-
# ==
|
17
|
+
# == Parameters:
|
11
18
|
# options::
|
12
19
|
# Options for configuring this Extractor
|
13
20
|
def initialize(options = {})
|
14
|
-
|
15
|
-
sanitize_options
|
16
|
-
handle_continuation
|
21
|
+
apply_options(options)
|
17
22
|
end
|
18
23
|
|
19
24
|
# Hook called before #extract. Useful for gathering data, initailizing proxies, etc
|
@@ -30,22 +35,18 @@ module Chronicle
|
|
30
35
|
|
31
36
|
private
|
32
37
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
def handle_continuation
|
39
|
-
return unless @options[:continuation]
|
38
|
+
# TODO: reimplemenet this
|
39
|
+
# def handle_continuation
|
40
|
+
# return unless @config.continuation
|
40
41
|
|
41
|
-
|
42
|
-
|
43
|
-
end
|
42
|
+
# @config.since = @config.continuation.highest_timestamp if @config.continuation.highest_timestamp
|
43
|
+
# @config.load_after_id = @config.continuation.last_id if @config.continuation.last_id
|
44
|
+
# end
|
44
45
|
end
|
45
46
|
end
|
46
47
|
end
|
47
48
|
|
48
|
-
require_relative 'helpers/
|
49
|
+
require_relative 'helpers/input_reader'
|
49
50
|
require_relative 'csv_extractor'
|
50
51
|
require_relative 'file_extractor'
|
51
52
|
require_relative 'json_extractor'
|
@@ -2,32 +2,55 @@ require 'pathname'
|
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
|
+
# Return filenames that match a pattern in a directory
|
5
6
|
class FileExtractor < Chronicle::ETL::Extractor
|
6
|
-
include Extractors::Helpers::FilesystemReader
|
7
7
|
|
8
8
|
register_connector do |r|
|
9
9
|
r.description = 'file or directory of files'
|
10
10
|
end
|
11
11
|
|
12
|
+
setting :input, default: ['.']
|
13
|
+
setting :dir_glob_pattern, default: "**/*"
|
14
|
+
setting :larger_than
|
15
|
+
setting :smaller_than
|
16
|
+
|
17
|
+
def prepare
|
18
|
+
@pathnames = gather_files
|
19
|
+
end
|
20
|
+
|
12
21
|
def extract
|
13
|
-
|
14
|
-
yield Chronicle::ETL::Extraction.new(data:
|
22
|
+
@pathnames.each do |pathname|
|
23
|
+
yield Chronicle::ETL::Extraction.new(data: pathname.to_path)
|
15
24
|
end
|
16
25
|
end
|
17
26
|
|
18
27
|
def results_count
|
19
|
-
|
28
|
+
@pathnames.count
|
20
29
|
end
|
21
30
|
|
22
31
|
private
|
23
32
|
|
24
|
-
def
|
25
|
-
@
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
33
|
+
def gather_files
|
34
|
+
roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
|
35
|
+
raise(ExtractionError, "Input must exist") unless roots.all?(&:exist?)
|
36
|
+
|
37
|
+
directories, files = roots.partition(&:directory?)
|
38
|
+
|
39
|
+
directories.each do |directory|
|
40
|
+
files += Dir.glob(File.join(directory, @config.dir_glob_pattern)).map { |filename| Pathname.new(filename) }
|
41
|
+
end
|
42
|
+
|
43
|
+
files = files.uniq
|
44
|
+
|
45
|
+
files = files.keep_if { |f| (f.mtime > @config.since) } if @config.since
|
46
|
+
files = files.keep_if { |f| (f.mtime < @config.until) } if @config.until
|
47
|
+
|
48
|
+
# pass in file sizes in bytes
|
49
|
+
files = files.keep_if { |f| (f.size < @config.smaller_than) } if @config.smaller_than
|
50
|
+
files = files.keep_if { |f| (f.size > @config.larger_than) } if @config.larger_than
|
51
|
+
|
52
|
+
# # TODO: incorporate sort argument
|
53
|
+
files.sort_by(&:mtime)
|
31
54
|
end
|
32
55
|
end
|
33
56
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
module Extractors
|
6
|
+
module Helpers
|
7
|
+
module InputReader
|
8
|
+
# Return an array of input filenames; converts a single string
|
9
|
+
# to an array if necessary
|
10
|
+
def filenames
|
11
|
+
[@config.input].flatten.map
|
12
|
+
end
|
13
|
+
|
14
|
+
# Filenames as an array of pathnames
|
15
|
+
def pathnames
|
16
|
+
filenames.map { |filename| Pathname.new(filename) }
|
17
|
+
end
|
18
|
+
|
19
|
+
# Whether we're reading from files
|
20
|
+
def read_from_files?
|
21
|
+
filenames.any?
|
22
|
+
end
|
23
|
+
|
24
|
+
# Whether we're reading input from stdin
|
25
|
+
def read_from_stdin?
|
26
|
+
!read_from_files? && $stdin.stat.pipe?
|
27
|
+
end
|
28
|
+
|
29
|
+
# Read input sources and yield each content
|
30
|
+
def read_input
|
31
|
+
if read_from_files?
|
32
|
+
pathnames.each do |pathname|
|
33
|
+
File.open(pathname) do |file|
|
34
|
+
yield file.read, pathname.to_path
|
35
|
+
end
|
36
|
+
end
|
37
|
+
elsif read_from_stdin?
|
38
|
+
yield $stdin.read, $stdin
|
39
|
+
else
|
40
|
+
raise ExtractionError, "No input files or stdin provided"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Read input sources line by line
|
45
|
+
def read_input_as_lines(&block)
|
46
|
+
if read_from_files?
|
47
|
+
lines_from_files(&block)
|
48
|
+
elsif read_from_stdin?
|
49
|
+
lines_from_stdin(&block)
|
50
|
+
else
|
51
|
+
raise ExtractionError, "No input files or stdin provided"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def lines_from_files(&block)
|
58
|
+
pathnames.each do |pathname|
|
59
|
+
File.open(pathname) do |file|
|
60
|
+
lines_from_io(file, &block)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def lines_from_stdin(&block)
|
66
|
+
lines_from_io($stdin, &block)
|
67
|
+
end
|
68
|
+
|
69
|
+
def lines_from_io(io, &block)
|
70
|
+
io.each_line(&block)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -1,43 +1,44 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
|
-
class
|
4
|
-
include Extractors::Helpers::
|
3
|
+
class JSONExtractor < Chronicle::ETL::Extractor
|
4
|
+
include Extractors::Helpers::InputReader
|
5
5
|
|
6
6
|
register_connector do |r|
|
7
|
-
r.description = '
|
7
|
+
r.description = 'JSON'
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
filename: $stdin,
|
10
|
+
setting :jsonl, default: true, type: :boolean
|
12
11
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
super(DEFAULT_OPTIONS.merge(options))
|
12
|
+
def prepare
|
13
|
+
@jsons = []
|
14
|
+
load_input do |input|
|
15
|
+
@jsons << parse_data(input)
|
16
|
+
end
|
19
17
|
end
|
20
18
|
|
21
19
|
def extract
|
22
|
-
|
23
|
-
|
24
|
-
yield Chronicle::ETL::Extraction.new(data: parsed_data) if parsed_data
|
20
|
+
@jsons.each do |json|
|
21
|
+
yield Chronicle::ETL::Extraction.new(data: json)
|
25
22
|
end
|
26
23
|
end
|
27
24
|
|
28
25
|
def results_count
|
26
|
+
@jsons.count
|
29
27
|
end
|
30
28
|
|
31
29
|
private
|
32
30
|
|
33
31
|
def parse_data data
|
34
32
|
JSON.parse(data)
|
35
|
-
rescue JSON::ParserError
|
33
|
+
rescue JSON::ParserError
|
34
|
+
raise Chronicle::ETL::ExtractionError, "Could not parse JSON"
|
36
35
|
end
|
37
36
|
|
38
|
-
def load_input
|
39
|
-
|
40
|
-
|
37
|
+
def load_input(&block)
|
38
|
+
if @config.jsonl
|
39
|
+
read_input_as_lines(&block)
|
40
|
+
else
|
41
|
+
read_input(&block)
|
41
42
|
end
|
42
43
|
end
|
43
44
|
end
|
data/lib/chronicle/etl/job.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require 'forwardable'
|
2
|
+
|
2
3
|
module Chronicle
|
3
4
|
module ETL
|
5
|
+
# A runner job
|
6
|
+
#
|
7
|
+
# TODO: this can probably be merged with JobDefinition. Not clear
|
8
|
+
# where the boundaries are
|
4
9
|
class Job
|
5
10
|
extend Forwardable
|
6
11
|
|
@@ -12,7 +17,8 @@ module Chronicle
|
|
12
17
|
:transformer_klass,
|
13
18
|
:transformer_options,
|
14
19
|
:loader_klass,
|
15
|
-
:loader_options
|
20
|
+
:loader_options,
|
21
|
+
:job_definition
|
16
22
|
|
17
23
|
# TODO: build a proper id system
|
18
24
|
alias id name
|
@@ -35,7 +41,7 @@ module Chronicle
|
|
35
41
|
|
36
42
|
def instantiate_transformer(extraction)
|
37
43
|
@transformer_klass = @job_definition.transformer_klass
|
38
|
-
@transformer_klass.new(@transformer_options
|
44
|
+
@transformer_klass.new(extraction, @transformer_options)
|
39
45
|
end
|
40
46
|
|
41
47
|
def instantiate_loader
|
@@ -14,17 +14,36 @@ module Chronicle
|
|
14
14
|
options: {}
|
15
15
|
},
|
16
16
|
loader: {
|
17
|
-
name: '
|
17
|
+
name: 'table',
|
18
18
|
options: {}
|
19
19
|
}
|
20
20
|
}.freeze
|
21
21
|
|
22
|
+
attr_reader :errors
|
22
23
|
attr_accessor :definition
|
23
24
|
|
24
25
|
def initialize()
|
25
26
|
@definition = SKELETON_DEFINITION
|
26
27
|
end
|
27
28
|
|
29
|
+
def validate
|
30
|
+
@errors = []
|
31
|
+
|
32
|
+
Chronicle::ETL::Registry::PHASES.each do |phase|
|
33
|
+
__send__("#{phase}_klass".to_sym)
|
34
|
+
rescue Chronicle::ETL::PluginError => e
|
35
|
+
@errors << e
|
36
|
+
end
|
37
|
+
|
38
|
+
@errors.empty?
|
39
|
+
end
|
40
|
+
|
41
|
+
def validate!
|
42
|
+
raise(Chronicle::ETL::JobDefinitionError.new(self), "Job definition is invalid") unless validate
|
43
|
+
|
44
|
+
true
|
45
|
+
end
|
46
|
+
|
28
47
|
# Add config hash to this definition
|
29
48
|
def add_config(config = {})
|
30
49
|
@definition = @definition.deep_merge(config)
|
@@ -80,10 +99,6 @@ module Chronicle
|
|
80
99
|
end
|
81
100
|
end
|
82
101
|
end
|
83
|
-
|
84
|
-
def validate
|
85
|
-
return true # TODO
|
86
|
-
end
|
87
102
|
end
|
88
103
|
end
|
89
104
|
end
|
@@ -2,27 +2,54 @@ require 'csv'
|
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
|
-
class
|
5
|
+
class CSVLoader < Chronicle::ETL::Loader
|
6
6
|
register_connector do |r|
|
7
7
|
r.description = 'CSV'
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
setting :output, default: $stdout
|
11
|
+
setting :headers, default: true
|
12
|
+
setting :header_row, default: true
|
13
|
+
|
14
|
+
def records
|
15
|
+
@records ||= []
|
13
16
|
end
|
14
17
|
|
15
18
|
def load(record)
|
16
|
-
|
19
|
+
records << record.to_h_flattened
|
17
20
|
end
|
18
21
|
|
19
22
|
def finish
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
23
|
+
return unless records.any?
|
24
|
+
|
25
|
+
headers = build_headers(records)
|
26
|
+
|
27
|
+
csv_options = {}
|
28
|
+
if @config.headers
|
29
|
+
csv_options[:write_headers] = @config.header_row
|
30
|
+
csv_options[:headers] = headers
|
31
|
+
end
|
32
|
+
|
33
|
+
if @config.output.is_a?(IO)
|
34
|
+
# This might seem like a duplication of the default value ($stdout)
|
35
|
+
# but it's because rspec overwrites $stdout (in helper #capture) to
|
36
|
+
# capture output.
|
37
|
+
io = $stdout.dup
|
38
|
+
else
|
39
|
+
io = File.open(@config.output, "w+")
|
40
|
+
end
|
41
|
+
|
42
|
+
output = CSV.generate(**csv_options) do |csv|
|
43
|
+
records.each do |record|
|
44
|
+
csv << record
|
45
|
+
.transform_keys(&:to_sym)
|
46
|
+
.values_at(*headers)
|
47
|
+
.map { |value| force_utf8(value) }
|
24
48
|
end
|
25
49
|
end
|
50
|
+
|
51
|
+
io.write(output)
|
52
|
+
io.close
|
26
53
|
end
|
27
54
|
end
|
28
55
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
module Loaders
|
6
|
+
module Helpers
|
7
|
+
module EncodingHelper
|
8
|
+
# Mostly useful for handling loading with binary data from a raw extraction
|
9
|
+
def force_utf8(value)
|
10
|
+
return value unless value.is_a?(String)
|
11
|
+
|
12
|
+
value.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
class JSONLoader < Chronicle::ETL::Loader
|
4
|
+
register_connector do |r|
|
5
|
+
r.description = 'json'
|
6
|
+
end
|
7
|
+
|
8
|
+
setting :serializer
|
9
|
+
setting :output, default: $stdout
|
10
|
+
|
11
|
+
def start
|
12
|
+
if @config.output == $stdout
|
13
|
+
@output = @config.output
|
14
|
+
else
|
15
|
+
@output = File.open(@config.output, "w")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def load(record)
|
20
|
+
serialized = serializer.serialize(record)
|
21
|
+
|
22
|
+
# When dealing with raw data, we can get improperly encoded strings
|
23
|
+
# (eg from sqlite database columns). We force conversion to UTF-8
|
24
|
+
# before converting into JSON
|
25
|
+
encoded = serialized.transform_values do |value|
|
26
|
+
next value unless value.is_a?(String)
|
27
|
+
|
28
|
+
force_utf8(value)
|
29
|
+
end
|
30
|
+
@output.puts encoded.to_json
|
31
|
+
end
|
32
|
+
|
33
|
+
def finish
|
34
|
+
@output.close
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def serializer
|
40
|
+
@config.serializer || Chronicle::ETL::RawSerializer
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -1,15 +1,24 @@
|
|
1
|
+
require_relative 'helpers/encoding_helper'
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
# Abstract class representing a Loader for an ETL job
|
4
6
|
class Loader
|
5
7
|
extend Chronicle::ETL::Registry::SelfRegistering
|
8
|
+
include Chronicle::ETL::Configurable
|
9
|
+
include Chronicle::ETL::Loaders::Helpers::EncodingHelper
|
10
|
+
|
11
|
+
setting :output
|
12
|
+
setting :fields
|
13
|
+
setting :fields_limit, default: nil
|
14
|
+
setting :fields_exclude
|
6
15
|
|
7
16
|
# Construct a new instance of this loader. Options are passed in from a Runner
|
8
17
|
# == Parameters:
|
9
18
|
# options::
|
10
19
|
# Options for configuring this Loader
|
11
20
|
def initialize(options = {})
|
12
|
-
|
21
|
+
apply_options(options)
|
13
22
|
end
|
14
23
|
|
15
24
|
# Called once before processing records
|
@@ -22,11 +31,28 @@ module Chronicle
|
|
22
31
|
|
23
32
|
# Called once there are no more records to process
|
24
33
|
def finish; end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def build_headers(records)
|
38
|
+
headers =
|
39
|
+
if @config.fields && @config.fields.any?
|
40
|
+
Set[*@config.fields]
|
41
|
+
else
|
42
|
+
# use all the keys of the flattened record hash
|
43
|
+
Set[*records.map(&:keys).flatten.map(&:to_s).uniq]
|
44
|
+
end
|
45
|
+
|
46
|
+
headers = headers.delete_if { |header| header.end_with?(*@config.fields_exclude) }
|
47
|
+
headers = headers.first(@config.fields_limit) if @config.fields_limit
|
48
|
+
|
49
|
+
headers.to_a.map(&:to_sym)
|
50
|
+
end
|
25
51
|
end
|
26
52
|
end
|
27
53
|
end
|
28
54
|
|
29
55
|
require_relative 'csv_loader'
|
56
|
+
require_relative 'json_loader'
|
30
57
|
require_relative 'rest_loader'
|
31
|
-
require_relative 'stdout_loader'
|
32
58
|
require_relative 'table_loader'
|
@@ -9,19 +9,19 @@ module Chronicle
|
|
9
9
|
r.description = 'a REST endpoint'
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
setting :hostname, required: true
|
13
|
+
setting :endpoint, required: true
|
14
|
+
setting :access_token
|
15
15
|
|
16
16
|
def load(record)
|
17
17
|
payload = Chronicle::ETL::JSONAPISerializer.serialize(record)
|
18
18
|
# have the outer data key that json-api expects
|
19
19
|
payload = { data: payload } unless payload[:data]
|
20
20
|
|
21
|
-
uri = URI.parse("#{@
|
21
|
+
uri = URI.parse("#{@config.hostname}#{@config.endpoint}")
|
22
22
|
|
23
23
|
header = {
|
24
|
-
"Authorization" => "Bearer #{@
|
24
|
+
"Authorization" => "Bearer #{@config.access_token}",
|
25
25
|
"Content-Type": 'application/json'
|
26
26
|
}
|
27
27
|
use_ssl = uri.scheme == 'https'
|