chronicle-etl 0.5.5 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +75 -68
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +51 -49
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +8 -6
- data/lib/chronicle/etl/cli/connectors.rb +7 -7
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +14 -15
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +18 -8
- data/lib/chronicle/etl/configurable.rb +20 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +38 -26
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +2 -2
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +93 -36
- data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
- data/lib/chronicle/etl/registry/plugins.rb +27 -19
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +4 -4
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +49 -47
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -0,0 +1,100 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class ChronobaseTransformer < Chronicle::ETL::Transformer
|
6
|
+
PROPERTY_MAP = {
|
7
|
+
source: :provider,
|
8
|
+
source_id: :provider_id,
|
9
|
+
url: :provider_url,
|
10
|
+
end_time: :end_at,
|
11
|
+
start_time: :start_at,
|
12
|
+
|
13
|
+
name: :title,
|
14
|
+
description: :body,
|
15
|
+
text: :body,
|
16
|
+
|
17
|
+
recipient: :consumers,
|
18
|
+
agent: :actor,
|
19
|
+
object: :involved,
|
20
|
+
|
21
|
+
# music ones
|
22
|
+
by_artist: :creators,
|
23
|
+
in_album: :containers
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
VERB_MAP = {
|
27
|
+
ListenAction: 'listened',
|
28
|
+
CommunicateAction: 'messaged'
|
29
|
+
}.freeze
|
30
|
+
|
31
|
+
ENTITY_MAP = {
|
32
|
+
MusicRecording: 'song',
|
33
|
+
MusicAlbum: 'album',
|
34
|
+
MusicGroup: 'musicartist',
|
35
|
+
Message: 'message',
|
36
|
+
Person: 'person'
|
37
|
+
}.freeze
|
38
|
+
|
39
|
+
register_connector do |r|
|
40
|
+
r.identifier = :chronobase
|
41
|
+
r.description = 'records to chronobase schema'
|
42
|
+
end
|
43
|
+
|
44
|
+
def transform(record)
|
45
|
+
deeply_convert_record(record.data)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def deeply_convert_record(record)
|
51
|
+
type = activity?(record) ? 'activity' : 'entity'
|
52
|
+
|
53
|
+
properties = record.properties.compact.each_with_object({}) do |(k, v), h|
|
54
|
+
key = PROPERTY_MAP[k.to_sym] || k
|
55
|
+
h[key] = v
|
56
|
+
end
|
57
|
+
|
58
|
+
properties[:verb] = VERB_MAP[record.type_id.to_sym] if VERB_MAP.key?(record.type_id.to_sym)
|
59
|
+
properties[:represents] = ENTITY_MAP[record.type_id.to_sym] if ENTITY_MAP.key?(record.type_id.to_sym)
|
60
|
+
|
61
|
+
properties.transform_values! do |v|
|
62
|
+
case v
|
63
|
+
when Chronicle::Models::Base
|
64
|
+
deeply_convert_record(v)
|
65
|
+
when Array
|
66
|
+
v.map { |e| e.is_a?(Chronicle::Models::Base) ? deeply_convert_record(e) : e }
|
67
|
+
else
|
68
|
+
v
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
Chronicle::Serialization::Record.new(
|
73
|
+
id: record.id,
|
74
|
+
type: type,
|
75
|
+
properties: properties.compact,
|
76
|
+
meta: {
|
77
|
+
dedupe_on: transform_dedupe_on(record)
|
78
|
+
},
|
79
|
+
schema: 'chronobase'
|
80
|
+
)
|
81
|
+
end
|
82
|
+
|
83
|
+
def activity?(record)
|
84
|
+
record.type_id.end_with?('Action')
|
85
|
+
end
|
86
|
+
|
87
|
+
def transform_dedupe_on(record)
|
88
|
+
property_map_with_type = PROPERTY_MAP.merge({
|
89
|
+
type: activity?(record) ? :verb : :represents
|
90
|
+
})
|
91
|
+
|
92
|
+
record.dedupe_on.map do |set|
|
93
|
+
set.map do |d|
|
94
|
+
property_map_with_type[d] || d
|
95
|
+
end.join(',')
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'chronicle/utils/hash_utils'
|
4
|
+
|
5
|
+
module Chronicle
|
6
|
+
module ETL
|
7
|
+
# A transformer that filters the fields of a record and returns a new hash with only the specified fields.
|
8
|
+
class FieldsLimitTransformer < Chronicle::ETL::Transformer
|
9
|
+
register_connector do |r|
|
10
|
+
r.identifier = :fields_limit
|
11
|
+
r.description = 'by taking first N fields'
|
12
|
+
end
|
13
|
+
|
14
|
+
setting :limit, type: :numeric, default: 10
|
15
|
+
|
16
|
+
def transform(record)
|
17
|
+
# flattern hash and then take the first limit fields
|
18
|
+
|
19
|
+
Chronicle::Utils::HashUtils.flatten_hash(record.data.to_h).first(@config.limit).to_h
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
# A transformer that filters the fields of a record and returns a new hash with only the specified fields.
|
6
|
+
class FilterFieldsTransformer < Chronicle::ETL::Transformer
|
7
|
+
register_connector do |r|
|
8
|
+
r.identifier = :filter_fields
|
9
|
+
r.description = 'by taking a subset of the fields'
|
10
|
+
end
|
11
|
+
|
12
|
+
setting :fields, type: :array, default: []
|
13
|
+
|
14
|
+
def transform(record)
|
15
|
+
hash = record.data.to_h.deep_transform_keys(&:to_sym)
|
16
|
+
filter_hash(hash, @config.fields.map)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def access_nested_value(data, path)
|
22
|
+
keys = path.split('.')
|
23
|
+
keys.reduce(data) do |acc, key|
|
24
|
+
if acc.is_a?(Array)
|
25
|
+
acc.map do |item|
|
26
|
+
item[key.to_sym]
|
27
|
+
rescue StandardError
|
28
|
+
nil
|
29
|
+
end
|
30
|
+
.compact
|
31
|
+
elsif key.include?('[')
|
32
|
+
key, index = key.split(/\[|\]/).reject(&:empty?)
|
33
|
+
acc = acc[key.to_sym] if acc
|
34
|
+
acc.is_a?(Array) ? acc[index.to_i] : nil
|
35
|
+
else
|
36
|
+
acc&.dig(key.to_sym)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def filter_hash(original_hash, fields)
|
42
|
+
fields.each_with_object({}) do |field, result|
|
43
|
+
value = access_nested_value(original_hash, field)
|
44
|
+
keys = field.split('.')
|
45
|
+
last_key = keys.pop.to_sym
|
46
|
+
|
47
|
+
current = result
|
48
|
+
keys.each do |key|
|
49
|
+
key = key.to_sym
|
50
|
+
key, = key.to_s.split(/\[|\]/) if key.to_s.include?('[')
|
51
|
+
current[key] ||= {}
|
52
|
+
current = current[key]
|
53
|
+
end
|
54
|
+
|
55
|
+
current[last_key] = value
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
# Return only records that match all the conditions of the filters
|
6
|
+
# setting.
|
7
|
+
class FilterTransformer < Chronicle::ETL::Transformer
|
8
|
+
register_connector do |r|
|
9
|
+
r.identifier = :filter
|
10
|
+
r.description = 'by only accepting records that match conditions'
|
11
|
+
end
|
12
|
+
|
13
|
+
setting :filters, type: :hash
|
14
|
+
|
15
|
+
def transform(record)
|
16
|
+
record_hash = record.data.to_h
|
17
|
+
|
18
|
+
@config.filters.each do |key, value|
|
19
|
+
path = key.split('.').map do |k|
|
20
|
+
k.match?(/^\d+$/) ? k.to_i : k.to_sym
|
21
|
+
end
|
22
|
+
|
23
|
+
return nil unless record_hash.dig(*path) == value
|
24
|
+
end
|
25
|
+
|
26
|
+
record.data
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class FormatTransformer < Chronicle::ETL::Transformer
|
6
|
+
register_connector do |r|
|
7
|
+
r.identifier = :format
|
8
|
+
r.description = 'records to a differnet hash/json format'
|
9
|
+
end
|
10
|
+
|
11
|
+
setting :format, default: nil
|
12
|
+
|
13
|
+
def transform(record)
|
14
|
+
serializer = find_serializer(@config.format)
|
15
|
+
serializer.serialize(record.data)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def find_serializer(format)
|
21
|
+
case format
|
22
|
+
when 'jsonld'
|
23
|
+
Chronicle::Serialization::JSONLDSerializer
|
24
|
+
when 'jsonapi'
|
25
|
+
Chronicle::Serialization::JSONAPISerializer
|
26
|
+
else
|
27
|
+
raise 'unknown format'
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class MergeMetaTransformer < Chronicle::ETL::Transformer
|
6
|
+
register_connector do |r|
|
7
|
+
r.identifier = :merge_meta
|
8
|
+
r.description = 'merge extraction meta fields into the record'
|
9
|
+
end
|
10
|
+
|
11
|
+
def transform(record)
|
12
|
+
record.data unless record.extraction&.meta
|
13
|
+
|
14
|
+
record.data[:_meta] = record.extraction.meta
|
15
|
+
record.data
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class MultiplyTransformer < Chronicle::ETL::Transformer
|
6
|
+
register_connector do |r|
|
7
|
+
r.identifier = :multiply
|
8
|
+
r.description = 'by taking a sample'
|
9
|
+
end
|
10
|
+
|
11
|
+
setting :n, default: 2, type: :numeric
|
12
|
+
|
13
|
+
# return the result, sample_size percentage of the time. otherwise nil
|
14
|
+
def transform(record)
|
15
|
+
@config.n.to_i.times do
|
16
|
+
yield record.data
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -1,18 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
class NullTransformer < Chronicle::ETL::Transformer
|
4
6
|
register_connector do |r|
|
5
|
-
r.identifier =
|
7
|
+
r.identifier = :null
|
6
8
|
r.description = 'in no way'
|
7
9
|
end
|
8
10
|
|
9
|
-
def transform
|
10
|
-
|
11
|
+
def transform(record)
|
12
|
+
yield record.data
|
11
13
|
end
|
12
|
-
|
13
|
-
def timestamp; end
|
14
|
-
|
15
|
-
def id; end
|
16
14
|
end
|
17
15
|
end
|
18
16
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class SamplerTransformer < Chronicle::ETL::Transformer
|
6
|
+
register_connector do |r|
|
7
|
+
r.identifier = :sampler
|
8
|
+
r.description = 'by taking a sample'
|
9
|
+
end
|
10
|
+
|
11
|
+
setting :percent, default: 10, type: :numeric
|
12
|
+
|
13
|
+
# return the result, `percent` percentage of the time. otherwise nil
|
14
|
+
def transform(record)
|
15
|
+
return unless rand(100) < @config.percent
|
16
|
+
|
17
|
+
record.data
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class SortTransformer < Chronicle::ETL::Transformer
|
6
|
+
register_connector do |r|
|
7
|
+
r.identifier = :sort
|
8
|
+
r.description = 'sorts records by a given field'
|
9
|
+
end
|
10
|
+
|
11
|
+
setting :key, required: true, default: 'id'
|
12
|
+
setting :direction, required: false, default: 'desc'
|
13
|
+
|
14
|
+
def transform(record)
|
15
|
+
stash_record(record)
|
16
|
+
end
|
17
|
+
|
18
|
+
def finish
|
19
|
+
return unless @stashed_records&.any?
|
20
|
+
|
21
|
+
sorted = @stashed_records.sort_by do |record|
|
22
|
+
value = record.data[@config.key]
|
23
|
+
value.nil? ? [1] : [0, value]
|
24
|
+
end
|
25
|
+
|
26
|
+
sorted.reverse! if @config.direction == 'desc'
|
27
|
+
sorted
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
# Abstract class representing an Transformer for an ETL job
|
@@ -5,65 +7,85 @@ module Chronicle
|
|
5
7
|
extend Chronicle::ETL::Registry::SelfRegistering
|
6
8
|
include Chronicle::ETL::Configurable
|
7
9
|
|
10
|
+
attr_reader :stashed_records
|
11
|
+
|
8
12
|
# Construct a new instance of this transformer. Options are passed in from a Runner
|
9
13
|
# == Parameters:
|
10
14
|
# options::
|
11
15
|
# Options for configuring this Transformer
|
12
|
-
def initialize(
|
13
|
-
unless extraction.is_a?(Chronicle::ETL::Extraction)
|
14
|
-
raise Chronicle::ETL::RunnerTypeError, "Extracted should be a Chronicle::ETL::Extraction"
|
15
|
-
end
|
16
|
-
|
17
|
-
@extraction = extraction
|
16
|
+
def initialize(options = {})
|
18
17
|
apply_options(options)
|
19
18
|
end
|
20
19
|
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
20
|
+
# Called once for each extracted record. Can return 0 or more transformed records.
|
21
|
+
def call(record, &block)
|
22
|
+
raise ArgumentError, 'Input must be a Chronicle::ETL::Record' unless record.is_a?(Record)
|
23
|
+
|
24
|
+
yielded = false
|
25
|
+
|
26
|
+
transformed_data = transform(record) do |data|
|
27
|
+
new_record = update_data(record, data)
|
28
|
+
block.call(new_record)
|
29
|
+
|
30
|
+
yielded = true
|
31
|
+
end
|
32
|
+
|
33
|
+
return if yielded
|
34
|
+
|
35
|
+
# Handle transformers that don't yield anything and return
|
36
|
+
# transformed data directly. Skip nil values.
|
37
|
+
[transformed_data].flatten.compact.each do |data|
|
38
|
+
new_record = update_data(record, data)
|
39
|
+
block.call(new_record)
|
40
|
+
end
|
32
41
|
end
|
33
42
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
43
|
+
def call_finish(&block)
|
44
|
+
remaining_records = finish
|
45
|
+
return if remaining_records.nil?
|
46
|
+
|
47
|
+
remaining_records.each do |record|
|
48
|
+
block.call(record)
|
49
|
+
end
|
39
50
|
end
|
40
51
|
|
41
|
-
|
42
|
-
|
43
|
-
def friendly_identifier
|
44
|
-
id
|
52
|
+
def transform(_record)
|
53
|
+
raise NotImplementedError, 'You must implement the transform method'
|
45
54
|
end
|
46
55
|
|
47
|
-
|
48
|
-
|
49
|
-
unknown = "???"
|
50
|
-
timestamp&.iso8601 || unknown
|
51
|
-
rescue TransformationError, NotImplementedError
|
52
|
-
unknown
|
53
|
-
end
|
56
|
+
# Called once after runner has processed all records
|
57
|
+
def finish; end
|
54
58
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
59
|
+
protected
|
60
|
+
|
61
|
+
def stash_record(record)
|
62
|
+
@stashed_records ||= []
|
63
|
+
@stashed_records << record
|
64
|
+
nil
|
65
|
+
end
|
66
|
+
|
67
|
+
def flush_stashed_records
|
68
|
+
@stashed_records.tap(&:clear)
|
69
|
+
end
|
61
70
|
|
62
|
-
|
71
|
+
def update_data(record, new_data)
|
72
|
+
new_record = record.clone
|
73
|
+
new_record.data = new_data
|
74
|
+
new_record
|
63
75
|
end
|
64
76
|
end
|
65
77
|
end
|
66
78
|
end
|
67
79
|
|
68
80
|
require_relative 'null_transformer'
|
69
|
-
require_relative '
|
81
|
+
require_relative 'sampler_transformer'
|
82
|
+
require_relative 'buffer_transformer'
|
83
|
+
require_relative 'multiply_transformer'
|
84
|
+
require_relative 'sort_transformer'
|
85
|
+
require_relative 'chronicle_transformer'
|
86
|
+
require_relative 'format_transformer'
|
87
|
+
require_relative 'filter_fields_transformer'
|
88
|
+
require_relative 'fields_limit_transformer'
|
89
|
+
require_relative 'merge_meta_transformer'
|
90
|
+
require_relative 'filter_transformer'
|
91
|
+
require_relative 'chronobase_transformer'
|
@@ -7,7 +7,7 @@ module Chronicle
|
|
7
7
|
# Utility methods for dealing with binary files
|
8
8
|
module BinaryAttachments
|
9
9
|
def self.filename_to_base64(filename:, mimetype: nil)
|
10
|
-
mimetype
|
10
|
+
mimetype ||= guess_mimetype(filename: filename)
|
11
11
|
|
12
12
|
"data:#{mimetype};base64,#{Base64.strict_encode64(File.read(filename))}"
|
13
13
|
end
|
@@ -4,7 +4,6 @@ require 'colorize'
|
|
4
4
|
module Chronicle
|
5
5
|
module ETL
|
6
6
|
module Utils
|
7
|
-
|
8
7
|
class ProgressBar
|
9
8
|
FORMAT_WITH_TOTAL = [
|
10
9
|
':bar ',
|
@@ -37,7 +36,7 @@ module Chronicle
|
|
37
36
|
'/s) '.light_black
|
38
37
|
].join.freeze
|
39
38
|
|
40
|
-
def initialize(title: 'Loading'
|
39
|
+
def initialize(total:, title: 'Loading')
|
41
40
|
opts = {
|
42
41
|
clear: true,
|
43
42
|
complete: '▓'.light_blue,
|
@@ -64,7 +63,7 @@ module Chronicle
|
|
64
63
|
end
|
65
64
|
|
66
65
|
def log(message)
|
67
|
-
message.split("\n").each do |
|
66
|
+
message.split("\n").each do |_line|
|
68
67
|
@pbar.log message
|
69
68
|
end
|
70
69
|
end
|
data/lib/chronicle/etl.rb
CHANGED
@@ -1,25 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'chronicle/schema'
|
4
|
+
require 'chronicle/models/base'
|
5
|
+
|
1
6
|
require_relative 'etl/registry/registry'
|
2
7
|
require_relative 'etl/authorizer'
|
3
8
|
require_relative 'etl/config'
|
4
9
|
require_relative 'etl/configurable'
|
5
10
|
require_relative 'etl/exceptions'
|
6
11
|
require_relative 'etl/extraction'
|
12
|
+
require_relative 'etl/record'
|
7
13
|
require_relative 'etl/job_definition'
|
8
14
|
require_relative 'etl/job_log'
|
9
15
|
require_relative 'etl/job_logger'
|
10
16
|
require_relative 'etl/job'
|
11
17
|
require_relative 'etl/logger'
|
12
|
-
require_relative 'etl/models/activity'
|
13
|
-
require_relative 'etl/models/attachment'
|
14
|
-
require_relative 'etl/models/base'
|
15
|
-
require_relative 'etl/models/raw'
|
16
|
-
require_relative 'etl/models/entity'
|
17
18
|
require_relative 'etl/runner'
|
18
19
|
require_relative 'etl/secrets'
|
19
|
-
require_relative 'etl/serializers/serializer'
|
20
20
|
require_relative 'etl/utils/binary_attachments'
|
21
|
-
require_relative 'etl/utils/hash_utilities'
|
22
|
-
require_relative 'etl/utils/text_recognition'
|
23
21
|
require_relative 'etl/utils/progress_bar'
|
24
22
|
require_relative 'etl/version'
|
25
23
|
|