chronicle-etl 0.5.4 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +98 -73
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +50 -45
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +10 -8
- data/lib/chronicle/etl/cli/connectors.rb +9 -9
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +29 -26
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +20 -7
- data/lib/chronicle/etl/configurable.rb +24 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +39 -27
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +3 -3
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -10
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +117 -0
- data/lib/chronicle/etl/registry/plugin_registration.rb +19 -0
- data/lib/chronicle/etl/registry/plugins.rb +171 -0
- data/lib/chronicle/etl/registry/registry.rb +3 -52
- data/lib/chronicle/etl/registry/self_registering.rb +1 -1
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +5 -5
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +91 -45
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/registry/plugin_registry.rb +0 -95
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'active_support/core_ext/hash/deep_merge'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -9,12 +11,14 @@ module Chronicle
|
|
9
11
|
name: 'stdin',
|
10
12
|
options: {}
|
11
13
|
},
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
transformers: [
|
15
|
+
{
|
16
|
+
name: 'null',
|
17
|
+
options: {}
|
18
|
+
}
|
19
|
+
],
|
16
20
|
loader: {
|
17
|
-
name: '
|
21
|
+
name: 'json',
|
18
22
|
options: {}
|
19
23
|
}
|
20
24
|
}.freeze
|
@@ -22,7 +26,7 @@ module Chronicle
|
|
22
26
|
attr_reader :errors
|
23
27
|
attr_accessor :definition
|
24
28
|
|
25
|
-
def initialize
|
29
|
+
def initialize
|
26
30
|
@definition = SKELETON_DEFINITION
|
27
31
|
end
|
28
32
|
|
@@ -34,12 +38,12 @@ module Chronicle
|
|
34
38
|
def validate
|
35
39
|
@errors = {}
|
36
40
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
41
|
+
extractor_klass
|
42
|
+
transformer_klasses
|
43
|
+
loader_klass
|
44
|
+
rescue Chronicle::ETL::PluginError => e
|
45
|
+
@errors[:plugins] ||= []
|
46
|
+
@errors[:plugins] << e
|
43
47
|
end
|
44
48
|
|
45
49
|
def plugins_missing?
|
@@ -48,12 +52,11 @@ module Chronicle
|
|
48
52
|
return false unless @errors[:plugins]&.any?
|
49
53
|
|
50
54
|
@errors[:plugins]
|
51
|
-
.
|
52
|
-
.any?
|
55
|
+
.any? { |e| e.instance_of?(Chronicle::ETL::PluginNotInstalledError) }
|
53
56
|
end
|
54
57
|
|
55
58
|
def validate!
|
56
|
-
raise(Chronicle::ETL::JobDefinitionError.new(self),
|
59
|
+
raise(Chronicle::ETL::JobDefinitionError.new(self), 'Job definition is invalid') unless valid?
|
57
60
|
|
58
61
|
true
|
59
62
|
end
|
@@ -66,19 +69,20 @@ module Chronicle
|
|
66
69
|
|
67
70
|
# For each connector in this job, mix in secrets into the options
|
68
71
|
def apply_default_secrets
|
69
|
-
|
72
|
+
# FIXME: handle transformer secrets
|
73
|
+
%i[extractor loader].each do |phase|
|
70
74
|
# If the option have a `secrets` key, we look up those secrets and
|
71
|
-
# mix them in. If not, use the connector's plugin name and look up
|
75
|
+
# mix them in. If not, use the connector's plugin name and look up
|
72
76
|
# secrets with the same namespace
|
73
77
|
if @definition[phase][:options][:secrets]
|
74
78
|
namespace = @definition[phase][:options][:secrets]
|
75
79
|
else
|
76
80
|
# We don't want to do this lookup for built-in connectors
|
77
|
-
next if __send__("#{phase}_klass"
|
81
|
+
next if __send__(:"#{phase}_klass").connector_registration.built_in?
|
78
82
|
|
79
83
|
# infer plugin name from connector name and use it for secrets
|
80
84
|
# namesepace
|
81
|
-
namespace = @definition[phase][:name].split(
|
85
|
+
namespace = @definition[phase][:name].split(':').first
|
82
86
|
end
|
83
87
|
|
84
88
|
# Reverse merge secrets into connector's options (we want to preserve
|
@@ -98,15 +102,17 @@ module Chronicle
|
|
98
102
|
end
|
99
103
|
|
100
104
|
def extractor_klass
|
101
|
-
|
105
|
+
find_connector_klass(:extractor, @definition[:extractor][:name])
|
102
106
|
end
|
103
107
|
|
104
|
-
def
|
105
|
-
|
108
|
+
def transformer_klasses
|
109
|
+
@definition[:transformers].map do |transformer|
|
110
|
+
find_connector_klass(:transformer, transformer[:name])
|
111
|
+
end
|
106
112
|
end
|
107
113
|
|
108
114
|
def loader_klass
|
109
|
-
|
115
|
+
find_connector_klass(:loader, @definition[:loader][:name])
|
110
116
|
end
|
111
117
|
|
112
118
|
def extractor_options
|
@@ -114,7 +120,9 @@ module Chronicle
|
|
114
120
|
end
|
115
121
|
|
116
122
|
def transformer_options
|
117
|
-
@definition[:transformer
|
123
|
+
@definition[:transformers].map do |transformer|
|
124
|
+
transformer[:options]
|
125
|
+
end
|
118
126
|
end
|
119
127
|
|
120
128
|
def loader_options
|
@@ -123,12 +131,16 @@ module Chronicle
|
|
123
131
|
|
124
132
|
private
|
125
133
|
|
126
|
-
def
|
127
|
-
Chronicle::ETL::Registry.
|
134
|
+
def find_schema_transformer_klass(source_klass, target)
|
135
|
+
Chronicle::ETL::Registry::Connectors.find_converter_for_source(source_klass, target).klass
|
136
|
+
end
|
137
|
+
|
138
|
+
def find_connector_klass(phase, identifier)
|
139
|
+
Chronicle::ETL::Registry::Connectors.find_by_phase_and_identifier(phase, identifier).klass
|
128
140
|
end
|
129
141
|
|
130
142
|
def load_credentials
|
131
|
-
|
143
|
+
%i[extractor loader].each do |phase|
|
132
144
|
credentials_name = @definition[phase].dig(:options, :credentials)
|
133
145
|
if credentials_name
|
134
146
|
credentials = Chronicle::ETL::Config.load_credentials(credentials_name)
|
@@ -9,13 +9,13 @@ module Chronicle
|
|
9
9
|
extend Forwardable
|
10
10
|
|
11
11
|
attr_accessor :job,
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
12
|
+
:job_id,
|
13
|
+
:last_id,
|
14
|
+
:highest_timestamp,
|
15
|
+
:num_records_processed,
|
16
|
+
:started_at,
|
17
|
+
:finished_at,
|
18
|
+
:success
|
19
19
|
|
20
20
|
def_delegators :@job, :save_log?
|
21
21
|
|
@@ -28,11 +28,11 @@ module Chronicle
|
|
28
28
|
|
29
29
|
# Log the result of a single transformation in a job
|
30
30
|
# @param transformer [Chronicle::ETL::Tranformer] The transformer that ran
|
31
|
-
def log_transformation(
|
32
|
-
@last_id = transformer.id if transformer.id
|
31
|
+
def log_transformation(_transformer)
|
32
|
+
# @last_id = transformer.id if transformer.id
|
33
33
|
|
34
34
|
# Save the highest timestamp that we've encountered so far
|
35
|
-
@highest_timestamp = [transformer.timestamp, @highest_timestamp].compact.max if transformer.timestamp
|
35
|
+
# @highest_timestamp = [transformer.timestamp, @highest_timestamp].compact.max if transformer.timestamp
|
36
36
|
|
37
37
|
# TODO: a transformer might yield nil. We might also want certain transformers to explode
|
38
38
|
# records into multiple new ones. Therefore, this this variable will need more subtle behaviour
|
@@ -54,13 +54,13 @@ module Chronicle
|
|
54
54
|
@finished_at = Time.now
|
55
55
|
end
|
56
56
|
|
57
|
-
def job=
|
57
|
+
def job=(job)
|
58
58
|
@job = job
|
59
59
|
@job_id = job.id
|
60
60
|
end
|
61
61
|
|
62
62
|
def duration
|
63
|
-
return unless @finished_at
|
63
|
+
return unless @finished_at && @started_at
|
64
64
|
|
65
65
|
@finished_at - @started_at
|
66
66
|
end
|
@@ -78,14 +78,12 @@ module Chronicle
|
|
78
78
|
}
|
79
79
|
end
|
80
80
|
|
81
|
-
private
|
82
|
-
|
83
81
|
# Create a new JobLog and set its instance variables from a serialized hash
|
84
|
-
def self.build_from_serialized
|
82
|
+
def self.build_from_serialized(attrs)
|
85
83
|
attrs.delete(:id)
|
86
84
|
new do |job_log|
|
87
85
|
attrs.each do |key, value|
|
88
|
-
setter = "#{key
|
86
|
+
setter = :"#{key}="
|
89
87
|
job_log.send(setter, value)
|
90
88
|
end
|
91
89
|
end
|
@@ -12,7 +12,7 @@ module Chronicle
|
|
12
12
|
attr_accessor :job_log
|
13
13
|
|
14
14
|
# For a given `job_id`, return the last successful log
|
15
|
-
def self.load_latest(
|
15
|
+
def self.load_latest(_job_id)
|
16
16
|
with_db_connection do |db|
|
17
17
|
attrs = db[:job_logs].reverse_order(:finished_at).where(success: true).first
|
18
18
|
JobLog.build_from_serialized(attrs) if attrs
|
@@ -28,11 +28,11 @@ module Chronicle
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def self.db_exists?
|
31
|
-
File.
|
31
|
+
File.exist?(db_filename)
|
32
32
|
end
|
33
33
|
|
34
34
|
def self.schema_exists?(db)
|
35
|
-
|
35
|
+
db.tables.include? :job_logs
|
36
36
|
end
|
37
37
|
|
38
38
|
def self.db_filename
|
@@ -44,7 +44,7 @@ module Chronicle
|
|
44
44
|
FileUtils.mkdir_p(File.dirname(db_filename))
|
45
45
|
end
|
46
46
|
|
47
|
-
def self.initialize_schema
|
47
|
+
def self.initialize_schema(db)
|
48
48
|
db.create_table :job_logs do
|
49
49
|
primary_key :id
|
50
50
|
String :job_id, null: false
|
@@ -1,4 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'csv'
|
4
|
+
require 'chronicle/utils/hash_utils'
|
2
5
|
|
3
6
|
module Chronicle
|
4
7
|
module ETL
|
@@ -6,6 +9,7 @@ module Chronicle
|
|
6
9
|
include Chronicle::ETL::Loaders::Helpers::StdoutHelper
|
7
10
|
|
8
11
|
register_connector do |r|
|
12
|
+
r.identifier = :csv
|
9
13
|
r.description = 'CSV'
|
10
14
|
end
|
11
15
|
|
@@ -18,13 +22,14 @@ module Chronicle
|
|
18
22
|
end
|
19
23
|
|
20
24
|
def load(record)
|
21
|
-
records << record
|
25
|
+
records << record
|
22
26
|
end
|
23
27
|
|
24
28
|
def finish
|
25
29
|
return unless records.any?
|
26
30
|
|
27
|
-
headers =
|
31
|
+
# headers = filtered_headers(records)
|
32
|
+
headers = gather_headers(records)
|
28
33
|
|
29
34
|
csv_options = {}
|
30
35
|
if @config.headers
|
@@ -34,8 +39,7 @@ module Chronicle
|
|
34
39
|
|
35
40
|
csv_output = CSV.generate(**csv_options) do |csv|
|
36
41
|
records.each do |record|
|
37
|
-
csv << record
|
38
|
-
.transform_keys(&:to_sym)
|
42
|
+
csv << Chronicle::Utils::HashUtils.flatten_hash(record.to_h)
|
39
43
|
.values_at(*headers)
|
40
44
|
.map { |value| force_utf8(value) }
|
41
45
|
end
|
@@ -48,6 +52,15 @@ module Chronicle
|
|
48
52
|
File.write(@config.output, csv_output)
|
49
53
|
end
|
50
54
|
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def gather_headers(records)
|
59
|
+
records_flattened = records.map do |record|
|
60
|
+
Chronicle::Utils::HashUtils.flatten_hash(record.to_h)
|
61
|
+
end
|
62
|
+
records_flattened.flat_map(&:keys).uniq
|
63
|
+
end
|
51
64
|
end
|
52
65
|
end
|
53
66
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'tempfile'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -5,6 +7,8 @@ module Chronicle
|
|
5
7
|
module Loaders
|
6
8
|
module Helpers
|
7
9
|
module StdoutHelper
|
10
|
+
# TODO: have option to immediately output to stdout
|
11
|
+
|
8
12
|
# TODO: let users use "stdout" as an option for the `output` setting
|
9
13
|
# Assume we're using stdout if no output is specified
|
10
14
|
def output_to_stdout?
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'tempfile'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -6,10 +8,10 @@ module Chronicle
|
|
6
8
|
include Chronicle::ETL::Loaders::Helpers::StdoutHelper
|
7
9
|
|
8
10
|
register_connector do |r|
|
11
|
+
r.identifier = :json
|
9
12
|
r.description = 'json'
|
10
13
|
end
|
11
14
|
|
12
|
-
setting :serializer
|
13
15
|
setting :output
|
14
16
|
|
15
17
|
# If true, one JSON record per line. If false, output a single json
|
@@ -26,23 +28,24 @@ module Chronicle
|
|
26
28
|
if output_to_stdout?
|
27
29
|
create_stdout_temp_file
|
28
30
|
else
|
29
|
-
File.open(@config.output,
|
31
|
+
File.open(@config.output, 'w+')
|
30
32
|
end
|
31
33
|
|
32
34
|
@output_file.puts("[\n") unless @config.line_separated
|
33
35
|
end
|
34
36
|
|
35
37
|
def load(record)
|
36
|
-
serialized =
|
38
|
+
serialized = record.to_h
|
37
39
|
|
38
40
|
# When dealing with raw data, we can get improperly encoded strings
|
39
41
|
# (eg from sqlite database columns). We force conversion to UTF-8
|
40
42
|
# before converting into JSON
|
41
|
-
encoded = serialized.transform_values do |value|
|
42
|
-
|
43
|
+
# encoded = serialized.transform_values do |value|
|
44
|
+
# next value unless value.is_a?(String)
|
43
45
|
|
44
|
-
|
45
|
-
end
|
46
|
+
# force_utf8(value)
|
47
|
+
# end
|
48
|
+
encoded = deeply_force_utf8(serialized)
|
46
49
|
|
47
50
|
line = encoded.to_json
|
48
51
|
# For line-separated output, we just put json + newline
|
@@ -57,6 +60,8 @@ module Chronicle
|
|
57
60
|
@output_file.write(line)
|
58
61
|
|
59
62
|
@first_line = false
|
63
|
+
# rescue StandardError => e
|
64
|
+
# binding.pry
|
60
65
|
end
|
61
66
|
|
62
67
|
def finish
|
@@ -70,9 +75,24 @@ module Chronicle
|
|
70
75
|
|
71
76
|
private
|
72
77
|
|
73
|
-
# TODO:
|
74
|
-
def
|
75
|
-
|
78
|
+
# TODO: Move this to a helper module
|
79
|
+
def deeply_force_utf8(hash)
|
80
|
+
# FIXME: probably shouldn't happen but it does
|
81
|
+
return hash.map { |x| force_utf8(x) } if hash.is_a?(Array)
|
82
|
+
return force_utf8(hash) unless hash.is_a?(Hash)
|
83
|
+
|
84
|
+
hash.transform_values do |value|
|
85
|
+
case value
|
86
|
+
when String
|
87
|
+
force_utf8(value)
|
88
|
+
when Hash
|
89
|
+
deeply_force_utf8(value)
|
90
|
+
when Array
|
91
|
+
value.map { |v| deeply_force_utf8(v) }
|
92
|
+
else
|
93
|
+
value
|
94
|
+
end
|
95
|
+
end
|
76
96
|
end
|
77
97
|
end
|
78
98
|
end
|
@@ -32,23 +32,6 @@ module Chronicle
|
|
32
32
|
|
33
33
|
# Called once there are no more records to process
|
34
34
|
def finish; end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
def build_headers(records)
|
39
|
-
headers =
|
40
|
-
if @config.fields && @config.fields.any?
|
41
|
-
Set[*@config.fields]
|
42
|
-
else
|
43
|
-
# use all the keys of the flattened record hash
|
44
|
-
Set[*records.map(&:keys).flatten.map(&:to_s).uniq]
|
45
|
-
end
|
46
|
-
|
47
|
-
headers = headers.delete_if { |header| header.end_with?(*@config.fields_exclude) }
|
48
|
-
headers = headers.first(@config.fields_limit) if @config.fields_limit
|
49
|
-
|
50
|
-
headers.to_a.map(&:to_sym)
|
51
|
-
end
|
52
35
|
end
|
53
36
|
end
|
54
37
|
end
|
@@ -1,11 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'net/http'
|
2
4
|
require 'uri'
|
3
5
|
require 'json'
|
6
|
+
require 'chronicle/serialization'
|
4
7
|
|
5
8
|
module Chronicle
|
6
9
|
module ETL
|
7
10
|
class RestLoader < Chronicle::ETL::Loader
|
8
11
|
register_connector do |r|
|
12
|
+
r.identifier = :rest
|
9
13
|
r.description = 'a REST endpoint'
|
10
14
|
end
|
11
15
|
|
@@ -13,16 +17,12 @@ module Chronicle
|
|
13
17
|
setting :endpoint, required: true
|
14
18
|
setting :access_token
|
15
19
|
|
16
|
-
def load(
|
17
|
-
payload = Chronicle::ETL::JSONAPISerializer.serialize(record)
|
18
|
-
# have the outer data key that json-api expects
|
19
|
-
payload = { data: payload } unless payload[:data]
|
20
|
-
|
20
|
+
def load(payload)
|
21
21
|
uri = URI.parse("#{@config.hostname}#{@config.endpoint}")
|
22
22
|
|
23
23
|
header = {
|
24
|
-
|
25
|
-
|
24
|
+
'Authorization' => "Bearer #{@config.access_token}",
|
25
|
+
'Content-Type': 'application/json'
|
26
26
|
}
|
27
27
|
use_ssl = uri.scheme == 'https'
|
28
28
|
|
@@ -1,49 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'tty/table'
|
4
|
+
require 'chronicle/utils/hash_utils'
|
2
5
|
require 'active_support/core_ext/string/filters'
|
3
6
|
require 'active_support/core_ext/hash/reverse_merge'
|
4
7
|
|
5
8
|
module Chronicle
|
6
9
|
module ETL
|
7
10
|
class TableLoader < Chronicle::ETL::Loader
|
11
|
+
|
8
12
|
register_connector do |r|
|
13
|
+
r.identifier = :table
|
9
14
|
r.description = 'an ASCII table'
|
10
15
|
end
|
11
16
|
|
12
17
|
setting :truncate_values_at, default: 40
|
13
18
|
setting :table_renderer, default: :basic
|
14
|
-
setting :fields_exclude, default: ['
|
19
|
+
setting :fields_exclude, default: ['type']
|
15
20
|
setting :header_row, default: true
|
16
21
|
|
17
22
|
def load(record)
|
18
|
-
records << record
|
23
|
+
records << record
|
19
24
|
end
|
20
25
|
|
21
26
|
def finish
|
22
27
|
return if records.empty?
|
23
28
|
|
24
|
-
headers =
|
29
|
+
headers = gather_headers(records)
|
25
30
|
rows = build_rows(records, headers)
|
26
31
|
|
32
|
+
render_table(headers, rows)
|
33
|
+
end
|
34
|
+
|
35
|
+
def records
|
36
|
+
@records ||= []
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def render_table(headers, rows)
|
27
42
|
@table = TTY::Table.new(header: (headers if @config.header_row), rows: rows)
|
28
43
|
puts @table.render(
|
29
44
|
@config.table_renderer.to_sym,
|
30
45
|
padding: [0, 2, 0, 0]
|
31
46
|
)
|
47
|
+
rescue TTY::Table::ResizeError
|
48
|
+
# The library throws this error before trying to render the table
|
49
|
+
# vertically. These options seem to work.
|
50
|
+
puts @table.render(
|
51
|
+
@config.table_renderer.to_sym,
|
52
|
+
padding: [0, 2, 0, 0],
|
53
|
+
width: 10_000,
|
54
|
+
resize: false
|
55
|
+
)
|
32
56
|
end
|
33
57
|
|
34
|
-
def records
|
35
|
-
|
58
|
+
def gather_headers(records)
|
59
|
+
records_flattened = records.map do |record|
|
60
|
+
Chronicle::Utils::HashUtils.flatten_hash(record.to_h)
|
61
|
+
end
|
62
|
+
records_flattened.flat_map(&:keys).uniq
|
36
63
|
end
|
37
64
|
|
38
|
-
private
|
39
|
-
|
40
65
|
def build_rows(records, headers)
|
41
66
|
records.map do |record|
|
42
|
-
values =
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
67
|
+
values = Chronicle::Utils::HashUtils.flatten_hash(record.to_h)
|
68
|
+
.values_at(*headers)
|
69
|
+
.map { |value| force_utf8(value.to_s) }
|
70
|
+
|
71
|
+
values = values.map { |value| value.truncate(@config.truncate_values_at) } if @config.truncate_values_at
|
47
72
|
|
48
73
|
values
|
49
74
|
end
|
data/lib/chronicle/etl/logger.rb
CHANGED
@@ -14,13 +14,13 @@ module Chronicle
|
|
14
14
|
|
15
15
|
@log_level = INFO
|
16
16
|
|
17
|
-
def output
|
17
|
+
def output(message, level)
|
18
18
|
return unless level >= @log_level
|
19
19
|
|
20
20
|
if @ui_element
|
21
21
|
@ui_element.log(message)
|
22
22
|
else
|
23
|
-
|
23
|
+
warn(message)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
@@ -41,7 +41,7 @@ module Chronicle
|
|
41
41
|
end
|
42
42
|
|
43
43
|
def attach_to_ui(ui_element)
|
44
|
-
@
|
44
|
+
@ui_element = ui_element
|
45
45
|
end
|
46
46
|
|
47
47
|
def detach_from_ui
|
@@ -49,21 +49,19 @@ module Chronicle
|
|
49
49
|
def authorize!
|
50
50
|
associate_oauth_credentials
|
51
51
|
@server = load_server
|
52
|
-
spinner = TTY::Spinner.new(
|
53
|
-
Chronicle::ETL::Logger.attach_to_ui(spinner)
|
52
|
+
spinner = TTY::Spinner.new(':spinner :title', format: :dots_2)
|
54
53
|
spinner.auto_spin
|
55
|
-
spinner.update(title: "Starting temporary authorization server on port #{@port}"
|
54
|
+
spinner.update(title: "Starting temporary authorization server on port #{@port}"'')
|
56
55
|
|
57
56
|
server_thread = start_authorization_server(port: @port)
|
58
57
|
start_oauth_flow
|
59
58
|
|
60
|
-
spinner.update(title:
|
59
|
+
spinner.update(title: 'Waiting for authorization to complete in your browser')
|
61
60
|
sleep 0.1 while authorization_pending?(server_thread)
|
62
61
|
|
63
62
|
@server.quit!
|
64
63
|
server_thread.join
|
65
64
|
spinner.success("(#{'successful'.green})")
|
66
|
-
Chronicle::ETL::Logger.detach_from_ui
|
67
65
|
|
68
66
|
# TODO: properly handle failed authorizations
|
69
67
|
raise Chronicle::ETL::AuthorizationError unless @server.latest_authorization
|
@@ -87,7 +85,7 @@ module Chronicle
|
|
87
85
|
def load_server
|
88
86
|
# Load at runtime so that we can set omniauth strategies based on
|
89
87
|
# which chronicle plugin has been loaded.
|
90
|
-
require_relative '
|
88
|
+
require_relative 'authorization_server'
|
91
89
|
Chronicle::ETL::AuthorizationServer
|
92
90
|
end
|
93
91
|
|
@@ -99,7 +97,7 @@ module Chronicle
|
|
99
97
|
|
100
98
|
Thread.new do
|
101
99
|
@server.run!({ port: @port }) do |s|
|
102
|
-
s.silent = true if s.
|
100
|
+
s.silent = true if defined?(::Thin::Server) && s.instance_of?(::Thin::Server)
|
103
101
|
end
|
104
102
|
end
|
105
103
|
end
|
@@ -119,7 +117,7 @@ module Chronicle
|
|
119
117
|
AccessLog: [],
|
120
118
|
# TODO: make this windows friendly
|
121
119
|
# https://github.com/winton/stasis/commit/77da36f43285fda129300e382f18dfaff48571b0
|
122
|
-
Logger: WEBrick::Log
|
120
|
+
Logger: WEBrick::Log.new('/dev/null')
|
123
121
|
}
|
124
122
|
)
|
125
123
|
rescue LoadError
|
@@ -129,8 +127,8 @@ module Chronicle
|
|
129
127
|
def extract_secrets(authorization:, pluck_values:)
|
130
128
|
return authorization unless pluck_values&.any?
|
131
129
|
|
132
|
-
pluck_values.
|
133
|
-
|
130
|
+
pluck_values.transform_values do |identifiers|
|
131
|
+
authorization.dig(*identifiers)
|
134
132
|
end
|
135
133
|
end
|
136
134
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# TODO: move this into chronicle-core after figuring out what to do about data vs properties
|
4
|
+
module Chronicle
|
5
|
+
module ETL
|
6
|
+
class Record
|
7
|
+
attr_accessor :data, :extraction
|
8
|
+
|
9
|
+
def initialize(data: {}, extraction: nil)
|
10
|
+
@data = data
|
11
|
+
@extraction = extraction
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|