chronicle-etl 0.5.4 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +98 -73
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +50 -45
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +10 -8
- data/lib/chronicle/etl/cli/connectors.rb +9 -9
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +29 -26
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +20 -7
- data/lib/chronicle/etl/configurable.rb +24 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +39 -27
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +3 -3
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -10
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +117 -0
- data/lib/chronicle/etl/registry/plugin_registration.rb +19 -0
- data/lib/chronicle/etl/registry/plugins.rb +171 -0
- data/lib/chronicle/etl/registry/registry.rb +3 -52
- data/lib/chronicle/etl/registry/self_registering.rb +1 -1
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +5 -5
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +91 -45
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/registry/plugin_registry.rb +0 -95
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'active_support/core_ext/hash/deep_merge'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -9,12 +11,14 @@ module Chronicle
|
|
9
11
|
name: 'stdin',
|
10
12
|
options: {}
|
11
13
|
},
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
transformers: [
|
15
|
+
{
|
16
|
+
name: 'null',
|
17
|
+
options: {}
|
18
|
+
}
|
19
|
+
],
|
16
20
|
loader: {
|
17
|
-
name: '
|
21
|
+
name: 'json',
|
18
22
|
options: {}
|
19
23
|
}
|
20
24
|
}.freeze
|
@@ -22,7 +26,7 @@ module Chronicle
|
|
22
26
|
attr_reader :errors
|
23
27
|
attr_accessor :definition
|
24
28
|
|
25
|
-
def initialize
|
29
|
+
def initialize
|
26
30
|
@definition = SKELETON_DEFINITION
|
27
31
|
end
|
28
32
|
|
@@ -34,12 +38,12 @@ module Chronicle
|
|
34
38
|
def validate
|
35
39
|
@errors = {}
|
36
40
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
41
|
+
extractor_klass
|
42
|
+
transformer_klasses
|
43
|
+
loader_klass
|
44
|
+
rescue Chronicle::ETL::PluginError => e
|
45
|
+
@errors[:plugins] ||= []
|
46
|
+
@errors[:plugins] << e
|
43
47
|
end
|
44
48
|
|
45
49
|
def plugins_missing?
|
@@ -48,12 +52,11 @@ module Chronicle
|
|
48
52
|
return false unless @errors[:plugins]&.any?
|
49
53
|
|
50
54
|
@errors[:plugins]
|
51
|
-
.
|
52
|
-
.any?
|
55
|
+
.any? { |e| e.instance_of?(Chronicle::ETL::PluginNotInstalledError) }
|
53
56
|
end
|
54
57
|
|
55
58
|
def validate!
|
56
|
-
raise(Chronicle::ETL::JobDefinitionError.new(self),
|
59
|
+
raise(Chronicle::ETL::JobDefinitionError.new(self), 'Job definition is invalid') unless valid?
|
57
60
|
|
58
61
|
true
|
59
62
|
end
|
@@ -66,19 +69,20 @@ module Chronicle
|
|
66
69
|
|
67
70
|
# For each connector in this job, mix in secrets into the options
|
68
71
|
def apply_default_secrets
|
69
|
-
|
72
|
+
# FIXME: handle transformer secrets
|
73
|
+
%i[extractor loader].each do |phase|
|
70
74
|
# If the option have a `secrets` key, we look up those secrets and
|
71
|
-
# mix them in. If not, use the connector's plugin name and look up
|
75
|
+
# mix them in. If not, use the connector's plugin name and look up
|
72
76
|
# secrets with the same namespace
|
73
77
|
if @definition[phase][:options][:secrets]
|
74
78
|
namespace = @definition[phase][:options][:secrets]
|
75
79
|
else
|
76
80
|
# We don't want to do this lookup for built-in connectors
|
77
|
-
next if __send__("#{phase}_klass"
|
81
|
+
next if __send__(:"#{phase}_klass").connector_registration.built_in?
|
78
82
|
|
79
83
|
# infer plugin name from connector name and use it for secrets
|
80
84
|
# namesepace
|
81
|
-
namespace = @definition[phase][:name].split(
|
85
|
+
namespace = @definition[phase][:name].split(':').first
|
82
86
|
end
|
83
87
|
|
84
88
|
# Reverse merge secrets into connector's options (we want to preserve
|
@@ -98,15 +102,17 @@ module Chronicle
|
|
98
102
|
end
|
99
103
|
|
100
104
|
def extractor_klass
|
101
|
-
|
105
|
+
find_connector_klass(:extractor, @definition[:extractor][:name])
|
102
106
|
end
|
103
107
|
|
104
|
-
def
|
105
|
-
|
108
|
+
def transformer_klasses
|
109
|
+
@definition[:transformers].map do |transformer|
|
110
|
+
find_connector_klass(:transformer, transformer[:name])
|
111
|
+
end
|
106
112
|
end
|
107
113
|
|
108
114
|
def loader_klass
|
109
|
-
|
115
|
+
find_connector_klass(:loader, @definition[:loader][:name])
|
110
116
|
end
|
111
117
|
|
112
118
|
def extractor_options
|
@@ -114,7 +120,9 @@ module Chronicle
|
|
114
120
|
end
|
115
121
|
|
116
122
|
def transformer_options
|
117
|
-
@definition[:transformer
|
123
|
+
@definition[:transformers].map do |transformer|
|
124
|
+
transformer[:options]
|
125
|
+
end
|
118
126
|
end
|
119
127
|
|
120
128
|
def loader_options
|
@@ -123,12 +131,16 @@ module Chronicle
|
|
123
131
|
|
124
132
|
private
|
125
133
|
|
126
|
-
def
|
127
|
-
Chronicle::ETL::Registry.
|
134
|
+
def find_schema_transformer_klass(source_klass, target)
|
135
|
+
Chronicle::ETL::Registry::Connectors.find_converter_for_source(source_klass, target).klass
|
136
|
+
end
|
137
|
+
|
138
|
+
def find_connector_klass(phase, identifier)
|
139
|
+
Chronicle::ETL::Registry::Connectors.find_by_phase_and_identifier(phase, identifier).klass
|
128
140
|
end
|
129
141
|
|
130
142
|
def load_credentials
|
131
|
-
|
143
|
+
%i[extractor loader].each do |phase|
|
132
144
|
credentials_name = @definition[phase].dig(:options, :credentials)
|
133
145
|
if credentials_name
|
134
146
|
credentials = Chronicle::ETL::Config.load_credentials(credentials_name)
|
@@ -9,13 +9,13 @@ module Chronicle
|
|
9
9
|
extend Forwardable
|
10
10
|
|
11
11
|
attr_accessor :job,
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
12
|
+
:job_id,
|
13
|
+
:last_id,
|
14
|
+
:highest_timestamp,
|
15
|
+
:num_records_processed,
|
16
|
+
:started_at,
|
17
|
+
:finished_at,
|
18
|
+
:success
|
19
19
|
|
20
20
|
def_delegators :@job, :save_log?
|
21
21
|
|
@@ -28,11 +28,11 @@ module Chronicle
|
|
28
28
|
|
29
29
|
# Log the result of a single transformation in a job
|
30
30
|
# @param transformer [Chronicle::ETL::Tranformer] The transformer that ran
|
31
|
-
def log_transformation(
|
32
|
-
@last_id = transformer.id if transformer.id
|
31
|
+
def log_transformation(_transformer)
|
32
|
+
# @last_id = transformer.id if transformer.id
|
33
33
|
|
34
34
|
# Save the highest timestamp that we've encountered so far
|
35
|
-
@highest_timestamp = [transformer.timestamp, @highest_timestamp].compact.max if transformer.timestamp
|
35
|
+
# @highest_timestamp = [transformer.timestamp, @highest_timestamp].compact.max if transformer.timestamp
|
36
36
|
|
37
37
|
# TODO: a transformer might yield nil. We might also want certain transformers to explode
|
38
38
|
# records into multiple new ones. Therefore, this this variable will need more subtle behaviour
|
@@ -54,13 +54,13 @@ module Chronicle
|
|
54
54
|
@finished_at = Time.now
|
55
55
|
end
|
56
56
|
|
57
|
-
def job=
|
57
|
+
def job=(job)
|
58
58
|
@job = job
|
59
59
|
@job_id = job.id
|
60
60
|
end
|
61
61
|
|
62
62
|
def duration
|
63
|
-
return unless @finished_at
|
63
|
+
return unless @finished_at && @started_at
|
64
64
|
|
65
65
|
@finished_at - @started_at
|
66
66
|
end
|
@@ -78,14 +78,12 @@ module Chronicle
|
|
78
78
|
}
|
79
79
|
end
|
80
80
|
|
81
|
-
private
|
82
|
-
|
83
81
|
# Create a new JobLog and set its instance variables from a serialized hash
|
84
|
-
def self.build_from_serialized
|
82
|
+
def self.build_from_serialized(attrs)
|
85
83
|
attrs.delete(:id)
|
86
84
|
new do |job_log|
|
87
85
|
attrs.each do |key, value|
|
88
|
-
setter = "#{key
|
86
|
+
setter = :"#{key}="
|
89
87
|
job_log.send(setter, value)
|
90
88
|
end
|
91
89
|
end
|
@@ -12,7 +12,7 @@ module Chronicle
|
|
12
12
|
attr_accessor :job_log
|
13
13
|
|
14
14
|
# For a given `job_id`, return the last successful log
|
15
|
-
def self.load_latest(
|
15
|
+
def self.load_latest(_job_id)
|
16
16
|
with_db_connection do |db|
|
17
17
|
attrs = db[:job_logs].reverse_order(:finished_at).where(success: true).first
|
18
18
|
JobLog.build_from_serialized(attrs) if attrs
|
@@ -28,11 +28,11 @@ module Chronicle
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def self.db_exists?
|
31
|
-
File.
|
31
|
+
File.exist?(db_filename)
|
32
32
|
end
|
33
33
|
|
34
34
|
def self.schema_exists?(db)
|
35
|
-
|
35
|
+
db.tables.include? :job_logs
|
36
36
|
end
|
37
37
|
|
38
38
|
def self.db_filename
|
@@ -44,7 +44,7 @@ module Chronicle
|
|
44
44
|
FileUtils.mkdir_p(File.dirname(db_filename))
|
45
45
|
end
|
46
46
|
|
47
|
-
def self.initialize_schema
|
47
|
+
def self.initialize_schema(db)
|
48
48
|
db.create_table :job_logs do
|
49
49
|
primary_key :id
|
50
50
|
String :job_id, null: false
|
@@ -1,4 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'csv'
|
4
|
+
require 'chronicle/utils/hash_utils'
|
2
5
|
|
3
6
|
module Chronicle
|
4
7
|
module ETL
|
@@ -6,6 +9,7 @@ module Chronicle
|
|
6
9
|
include Chronicle::ETL::Loaders::Helpers::StdoutHelper
|
7
10
|
|
8
11
|
register_connector do |r|
|
12
|
+
r.identifier = :csv
|
9
13
|
r.description = 'CSV'
|
10
14
|
end
|
11
15
|
|
@@ -18,13 +22,14 @@ module Chronicle
|
|
18
22
|
end
|
19
23
|
|
20
24
|
def load(record)
|
21
|
-
records << record
|
25
|
+
records << record
|
22
26
|
end
|
23
27
|
|
24
28
|
def finish
|
25
29
|
return unless records.any?
|
26
30
|
|
27
|
-
headers =
|
31
|
+
# headers = filtered_headers(records)
|
32
|
+
headers = gather_headers(records)
|
28
33
|
|
29
34
|
csv_options = {}
|
30
35
|
if @config.headers
|
@@ -34,8 +39,7 @@ module Chronicle
|
|
34
39
|
|
35
40
|
csv_output = CSV.generate(**csv_options) do |csv|
|
36
41
|
records.each do |record|
|
37
|
-
csv << record
|
38
|
-
.transform_keys(&:to_sym)
|
42
|
+
csv << Chronicle::Utils::HashUtils.flatten_hash(record.to_h)
|
39
43
|
.values_at(*headers)
|
40
44
|
.map { |value| force_utf8(value) }
|
41
45
|
end
|
@@ -48,6 +52,15 @@ module Chronicle
|
|
48
52
|
File.write(@config.output, csv_output)
|
49
53
|
end
|
50
54
|
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def gather_headers(records)
|
59
|
+
records_flattened = records.map do |record|
|
60
|
+
Chronicle::Utils::HashUtils.flatten_hash(record.to_h)
|
61
|
+
end
|
62
|
+
records_flattened.flat_map(&:keys).uniq
|
63
|
+
end
|
51
64
|
end
|
52
65
|
end
|
53
66
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'tempfile'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -5,6 +7,8 @@ module Chronicle
|
|
5
7
|
module Loaders
|
6
8
|
module Helpers
|
7
9
|
module StdoutHelper
|
10
|
+
# TODO: have option to immediately output to stdout
|
11
|
+
|
8
12
|
# TODO: let users use "stdout" as an option for the `output` setting
|
9
13
|
# Assume we're using stdout if no output is specified
|
10
14
|
def output_to_stdout?
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'tempfile'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -6,10 +8,10 @@ module Chronicle
|
|
6
8
|
include Chronicle::ETL::Loaders::Helpers::StdoutHelper
|
7
9
|
|
8
10
|
register_connector do |r|
|
11
|
+
r.identifier = :json
|
9
12
|
r.description = 'json'
|
10
13
|
end
|
11
14
|
|
12
|
-
setting :serializer
|
13
15
|
setting :output
|
14
16
|
|
15
17
|
# If true, one JSON record per line. If false, output a single json
|
@@ -26,23 +28,24 @@ module Chronicle
|
|
26
28
|
if output_to_stdout?
|
27
29
|
create_stdout_temp_file
|
28
30
|
else
|
29
|
-
File.open(@config.output,
|
31
|
+
File.open(@config.output, 'w+')
|
30
32
|
end
|
31
33
|
|
32
34
|
@output_file.puts("[\n") unless @config.line_separated
|
33
35
|
end
|
34
36
|
|
35
37
|
def load(record)
|
36
|
-
serialized =
|
38
|
+
serialized = record.to_h
|
37
39
|
|
38
40
|
# When dealing with raw data, we can get improperly encoded strings
|
39
41
|
# (eg from sqlite database columns). We force conversion to UTF-8
|
40
42
|
# before converting into JSON
|
41
|
-
encoded = serialized.transform_values do |value|
|
42
|
-
|
43
|
+
# encoded = serialized.transform_values do |value|
|
44
|
+
# next value unless value.is_a?(String)
|
43
45
|
|
44
|
-
|
45
|
-
end
|
46
|
+
# force_utf8(value)
|
47
|
+
# end
|
48
|
+
encoded = deeply_force_utf8(serialized)
|
46
49
|
|
47
50
|
line = encoded.to_json
|
48
51
|
# For line-separated output, we just put json + newline
|
@@ -57,6 +60,8 @@ module Chronicle
|
|
57
60
|
@output_file.write(line)
|
58
61
|
|
59
62
|
@first_line = false
|
63
|
+
# rescue StandardError => e
|
64
|
+
# binding.pry
|
60
65
|
end
|
61
66
|
|
62
67
|
def finish
|
@@ -70,9 +75,24 @@ module Chronicle
|
|
70
75
|
|
71
76
|
private
|
72
77
|
|
73
|
-
# TODO:
|
74
|
-
def
|
75
|
-
|
78
|
+
# TODO: Move this to a helper module
|
79
|
+
def deeply_force_utf8(hash)
|
80
|
+
# FIXME: probably shouldn't happen but it does
|
81
|
+
return hash.map { |x| force_utf8(x) } if hash.is_a?(Array)
|
82
|
+
return force_utf8(hash) unless hash.is_a?(Hash)
|
83
|
+
|
84
|
+
hash.transform_values do |value|
|
85
|
+
case value
|
86
|
+
when String
|
87
|
+
force_utf8(value)
|
88
|
+
when Hash
|
89
|
+
deeply_force_utf8(value)
|
90
|
+
when Array
|
91
|
+
value.map { |v| deeply_force_utf8(v) }
|
92
|
+
else
|
93
|
+
value
|
94
|
+
end
|
95
|
+
end
|
76
96
|
end
|
77
97
|
end
|
78
98
|
end
|
@@ -32,23 +32,6 @@ module Chronicle
|
|
32
32
|
|
33
33
|
# Called once there are no more records to process
|
34
34
|
def finish; end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
def build_headers(records)
|
39
|
-
headers =
|
40
|
-
if @config.fields && @config.fields.any?
|
41
|
-
Set[*@config.fields]
|
42
|
-
else
|
43
|
-
# use all the keys of the flattened record hash
|
44
|
-
Set[*records.map(&:keys).flatten.map(&:to_s).uniq]
|
45
|
-
end
|
46
|
-
|
47
|
-
headers = headers.delete_if { |header| header.end_with?(*@config.fields_exclude) }
|
48
|
-
headers = headers.first(@config.fields_limit) if @config.fields_limit
|
49
|
-
|
50
|
-
headers.to_a.map(&:to_sym)
|
51
|
-
end
|
52
35
|
end
|
53
36
|
end
|
54
37
|
end
|
@@ -1,11 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'net/http'
|
2
4
|
require 'uri'
|
3
5
|
require 'json'
|
6
|
+
require 'chronicle/serialization'
|
4
7
|
|
5
8
|
module Chronicle
|
6
9
|
module ETL
|
7
10
|
class RestLoader < Chronicle::ETL::Loader
|
8
11
|
register_connector do |r|
|
12
|
+
r.identifier = :rest
|
9
13
|
r.description = 'a REST endpoint'
|
10
14
|
end
|
11
15
|
|
@@ -13,16 +17,12 @@ module Chronicle
|
|
13
17
|
setting :endpoint, required: true
|
14
18
|
setting :access_token
|
15
19
|
|
16
|
-
def load(
|
17
|
-
payload = Chronicle::ETL::JSONAPISerializer.serialize(record)
|
18
|
-
# have the outer data key that json-api expects
|
19
|
-
payload = { data: payload } unless payload[:data]
|
20
|
-
|
20
|
+
def load(payload)
|
21
21
|
uri = URI.parse("#{@config.hostname}#{@config.endpoint}")
|
22
22
|
|
23
23
|
header = {
|
24
|
-
|
25
|
-
|
24
|
+
'Authorization' => "Bearer #{@config.access_token}",
|
25
|
+
'Content-Type': 'application/json'
|
26
26
|
}
|
27
27
|
use_ssl = uri.scheme == 'https'
|
28
28
|
|
@@ -1,49 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'tty/table'
|
4
|
+
require 'chronicle/utils/hash_utils'
|
2
5
|
require 'active_support/core_ext/string/filters'
|
3
6
|
require 'active_support/core_ext/hash/reverse_merge'
|
4
7
|
|
5
8
|
module Chronicle
|
6
9
|
module ETL
|
7
10
|
class TableLoader < Chronicle::ETL::Loader
|
11
|
+
|
8
12
|
register_connector do |r|
|
13
|
+
r.identifier = :table
|
9
14
|
r.description = 'an ASCII table'
|
10
15
|
end
|
11
16
|
|
12
17
|
setting :truncate_values_at, default: 40
|
13
18
|
setting :table_renderer, default: :basic
|
14
|
-
setting :fields_exclude, default: ['
|
19
|
+
setting :fields_exclude, default: ['type']
|
15
20
|
setting :header_row, default: true
|
16
21
|
|
17
22
|
def load(record)
|
18
|
-
records << record
|
23
|
+
records << record
|
19
24
|
end
|
20
25
|
|
21
26
|
def finish
|
22
27
|
return if records.empty?
|
23
28
|
|
24
|
-
headers =
|
29
|
+
headers = gather_headers(records)
|
25
30
|
rows = build_rows(records, headers)
|
26
31
|
|
32
|
+
render_table(headers, rows)
|
33
|
+
end
|
34
|
+
|
35
|
+
def records
|
36
|
+
@records ||= []
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def render_table(headers, rows)
|
27
42
|
@table = TTY::Table.new(header: (headers if @config.header_row), rows: rows)
|
28
43
|
puts @table.render(
|
29
44
|
@config.table_renderer.to_sym,
|
30
45
|
padding: [0, 2, 0, 0]
|
31
46
|
)
|
47
|
+
rescue TTY::Table::ResizeError
|
48
|
+
# The library throws this error before trying to render the table
|
49
|
+
# vertically. These options seem to work.
|
50
|
+
puts @table.render(
|
51
|
+
@config.table_renderer.to_sym,
|
52
|
+
padding: [0, 2, 0, 0],
|
53
|
+
width: 10_000,
|
54
|
+
resize: false
|
55
|
+
)
|
32
56
|
end
|
33
57
|
|
34
|
-
def records
|
35
|
-
|
58
|
+
def gather_headers(records)
|
59
|
+
records_flattened = records.map do |record|
|
60
|
+
Chronicle::Utils::HashUtils.flatten_hash(record.to_h)
|
61
|
+
end
|
62
|
+
records_flattened.flat_map(&:keys).uniq
|
36
63
|
end
|
37
64
|
|
38
|
-
private
|
39
|
-
|
40
65
|
def build_rows(records, headers)
|
41
66
|
records.map do |record|
|
42
|
-
values =
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
67
|
+
values = Chronicle::Utils::HashUtils.flatten_hash(record.to_h)
|
68
|
+
.values_at(*headers)
|
69
|
+
.map { |value| force_utf8(value.to_s) }
|
70
|
+
|
71
|
+
values = values.map { |value| value.truncate(@config.truncate_values_at) } if @config.truncate_values_at
|
47
72
|
|
48
73
|
values
|
49
74
|
end
|
data/lib/chronicle/etl/logger.rb
CHANGED
@@ -14,13 +14,13 @@ module Chronicle
|
|
14
14
|
|
15
15
|
@log_level = INFO
|
16
16
|
|
17
|
-
def output
|
17
|
+
def output(message, level)
|
18
18
|
return unless level >= @log_level
|
19
19
|
|
20
20
|
if @ui_element
|
21
21
|
@ui_element.log(message)
|
22
22
|
else
|
23
|
-
|
23
|
+
warn(message)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
@@ -41,7 +41,7 @@ module Chronicle
|
|
41
41
|
end
|
42
42
|
|
43
43
|
def attach_to_ui(ui_element)
|
44
|
-
@
|
44
|
+
@ui_element = ui_element
|
45
45
|
end
|
46
46
|
|
47
47
|
def detach_from_ui
|
@@ -49,21 +49,19 @@ module Chronicle
|
|
49
49
|
def authorize!
|
50
50
|
associate_oauth_credentials
|
51
51
|
@server = load_server
|
52
|
-
spinner = TTY::Spinner.new(
|
53
|
-
Chronicle::ETL::Logger.attach_to_ui(spinner)
|
52
|
+
spinner = TTY::Spinner.new(':spinner :title', format: :dots_2)
|
54
53
|
spinner.auto_spin
|
55
|
-
spinner.update(title: "Starting temporary authorization server on port #{@port}"
|
54
|
+
spinner.update(title: "Starting temporary authorization server on port #{@port}"'')
|
56
55
|
|
57
56
|
server_thread = start_authorization_server(port: @port)
|
58
57
|
start_oauth_flow
|
59
58
|
|
60
|
-
spinner.update(title:
|
59
|
+
spinner.update(title: 'Waiting for authorization to complete in your browser')
|
61
60
|
sleep 0.1 while authorization_pending?(server_thread)
|
62
61
|
|
63
62
|
@server.quit!
|
64
63
|
server_thread.join
|
65
64
|
spinner.success("(#{'successful'.green})")
|
66
|
-
Chronicle::ETL::Logger.detach_from_ui
|
67
65
|
|
68
66
|
# TODO: properly handle failed authorizations
|
69
67
|
raise Chronicle::ETL::AuthorizationError unless @server.latest_authorization
|
@@ -87,7 +85,7 @@ module Chronicle
|
|
87
85
|
def load_server
|
88
86
|
# Load at runtime so that we can set omniauth strategies based on
|
89
87
|
# which chronicle plugin has been loaded.
|
90
|
-
require_relative '
|
88
|
+
require_relative 'authorization_server'
|
91
89
|
Chronicle::ETL::AuthorizationServer
|
92
90
|
end
|
93
91
|
|
@@ -99,7 +97,7 @@ module Chronicle
|
|
99
97
|
|
100
98
|
Thread.new do
|
101
99
|
@server.run!({ port: @port }) do |s|
|
102
|
-
s.silent = true if s.
|
100
|
+
s.silent = true if defined?(::Thin::Server) && s.instance_of?(::Thin::Server)
|
103
101
|
end
|
104
102
|
end
|
105
103
|
end
|
@@ -119,7 +117,7 @@ module Chronicle
|
|
119
117
|
AccessLog: [],
|
120
118
|
# TODO: make this windows friendly
|
121
119
|
# https://github.com/winton/stasis/commit/77da36f43285fda129300e382f18dfaff48571b0
|
122
|
-
Logger: WEBrick::Log
|
120
|
+
Logger: WEBrick::Log.new('/dev/null')
|
123
121
|
}
|
124
122
|
)
|
125
123
|
rescue LoadError
|
@@ -129,8 +127,8 @@ module Chronicle
|
|
129
127
|
def extract_secrets(authorization:, pluck_values:)
|
130
128
|
return authorization unless pluck_values&.any?
|
131
129
|
|
132
|
-
pluck_values.
|
133
|
-
|
130
|
+
pluck_values.transform_values do |identifiers|
|
131
|
+
authorization.dig(*identifiers)
|
134
132
|
end
|
135
133
|
end
|
136
134
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# TODO: move this into chronicle-core after figuring out what to do about data vs properties
|
4
|
+
module Chronicle
|
5
|
+
module ETL
|
6
|
+
class Record
|
7
|
+
attr_accessor :data, :extraction
|
8
|
+
|
9
|
+
def initialize(data: {}, extraction: nil)
|
10
|
+
@data = data
|
11
|
+
@extraction = extraction
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|