chronicle-etl 0.2.2 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.rubocop.yml +3 -0
- data/README.md +22 -15
- data/chronicle-etl.gemspec +13 -7
- data/lib/chronicle/etl/cli/connectors.rb +19 -7
- data/lib/chronicle/etl/cli/jobs.rb +38 -26
- data/lib/chronicle/etl/cli/main.rb +10 -2
- data/lib/chronicle/etl/config.rb +24 -3
- data/lib/chronicle/etl/exceptions.rb +13 -0
- data/lib/chronicle/etl/extraction.rb +12 -0
- data/lib/chronicle/etl/extractors/csv_extractor.rb +43 -37
- data/lib/chronicle/etl/extractors/extractor.rb +25 -4
- data/lib/chronicle/etl/extractors/file_extractor.rb +15 -33
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +104 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +45 -0
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +6 -1
- data/lib/chronicle/etl/job.rb +72 -0
- data/lib/chronicle/etl/job_definition.rb +89 -0
- data/lib/chronicle/etl/job_log.rb +95 -0
- data/lib/chronicle/etl/job_logger.rb +81 -0
- data/lib/chronicle/etl/loaders/csv_loader.rb +6 -6
- data/lib/chronicle/etl/loaders/loader.rb +2 -2
- data/lib/chronicle/etl/loaders/rest_loader.rb +16 -9
- data/lib/chronicle/etl/loaders/stdout_loader.rb +8 -3
- data/lib/chronicle/etl/loaders/table_loader.rb +58 -7
- data/lib/chronicle/etl/logger.rb +48 -0
- data/lib/chronicle/etl/models/activity.rb +15 -0
- data/lib/chronicle/etl/models/attachment.rb +14 -0
- data/lib/chronicle/etl/models/base.rb +119 -0
- data/lib/chronicle/etl/models/entity.rb +21 -0
- data/lib/chronicle/etl/models/generic.rb +23 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +61 -0
- data/lib/chronicle/etl/registry/registry.rb +52 -0
- data/lib/chronicle/etl/registry/self_registering.rb +25 -0
- data/lib/chronicle/etl/runner.rb +66 -24
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +25 -0
- data/lib/chronicle/etl/serializers/serializer.rb +27 -0
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +253 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +11 -3
- data/lib/chronicle/etl/transformers/transformer.rb +42 -13
- data/lib/chronicle/etl/utils/binary_attachments.rb +21 -0
- data/lib/chronicle/etl/utils/hash_utilities.rb +19 -0
- data/lib/chronicle/etl/utils/progress_bar.rb +3 -1
- data/lib/chronicle/etl/utils/text_recognition.rb +15 -0
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +16 -1
- metadata +139 -36
- data/CHANGELOG.md +0 -23
- data/Gemfile.lock +0 -85
- data/lib/chronicle/etl/catalog.rb +0 -102
- data/lib/chronicle/etl/transformers/json_transformer.rb +0 -11
@@ -3,49 +3,31 @@ require 'pathname'
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
5
|
class FileExtractor < Chronicle::ETL::Extractor
|
6
|
-
|
7
|
-
if file?
|
8
|
-
extract_file do |data, metadata|
|
9
|
-
yield(data, metadata)
|
10
|
-
end
|
11
|
-
elsif directory?
|
12
|
-
extract_from_directory do |data, metadata|
|
13
|
-
yield(data, metadata)
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
6
|
+
include Extractors::Helpers::FilesystemReader
|
17
7
|
|
18
|
-
|
19
|
-
|
20
|
-
return 1
|
21
|
-
else
|
22
|
-
search_pattern = File.join(@options[:filename], '**/*.eml')
|
23
|
-
Dir.glob(search_pattern).count
|
24
|
-
end
|
8
|
+
register_connector do |r|
|
9
|
+
r.description = 'file or directory of files'
|
25
10
|
end
|
26
11
|
|
27
|
-
|
28
|
-
|
29
|
-
def extract_from_directory
|
30
|
-
search_pattern = File.join(@options[:filename], '**/*.eml')
|
31
|
-
filenames = Dir.glob(search_pattern)
|
12
|
+
def extract
|
32
13
|
filenames.each do |filename|
|
33
|
-
|
34
|
-
yield(file.read, {filename: file})
|
14
|
+
yield Chronicle::ETL::Extraction.new(data: filename)
|
35
15
|
end
|
36
16
|
end
|
37
17
|
|
38
|
-
def
|
39
|
-
|
40
|
-
yield(file.read, {filename: @options[:filename]})
|
18
|
+
def results_count
|
19
|
+
filenames.count
|
41
20
|
end
|
42
21
|
|
43
|
-
|
44
|
-
Pathname.new(@options[:filename]).directory?
|
45
|
-
end
|
22
|
+
private
|
46
23
|
|
47
|
-
def
|
48
|
-
|
24
|
+
def filenames
|
25
|
+
@filenames ||= filenames_in_directory(
|
26
|
+
path: @options[:filename],
|
27
|
+
dir_glob_pattern: @options[:dir_glob_pattern],
|
28
|
+
load_since: @options[:load_since],
|
29
|
+
load_until: @options[:load_until]
|
30
|
+
)
|
49
31
|
end
|
50
32
|
end
|
51
33
|
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
module Extractors
|
6
|
+
module Helpers
|
7
|
+
module FilesystemReader
|
8
|
+
|
9
|
+
def filenames_in_directory(...)
|
10
|
+
filenames = gather_files(...)
|
11
|
+
if block_given?
|
12
|
+
filenames.each do |filename|
|
13
|
+
yield filename
|
14
|
+
end
|
15
|
+
else
|
16
|
+
filenames
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def read_from_filesystem(filename:, yield_each_line: true, dir_glob_pattern: '**/*')
|
21
|
+
open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
|
22
|
+
if yield_each_line
|
23
|
+
file.each_line do |line|
|
24
|
+
yield line
|
25
|
+
end
|
26
|
+
else
|
27
|
+
yield file.read
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def open_from_filesystem(filename:, dir_glob_pattern: '**/*')
|
33
|
+
open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
|
34
|
+
yield file
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def results_count
|
39
|
+
raise NotImplementedError
|
40
|
+
# if file?
|
41
|
+
# return 1
|
42
|
+
# else
|
43
|
+
# search_pattern = File.join(@options[:filename], '**/*')
|
44
|
+
# Dir.glob(search_pattern).count
|
45
|
+
# end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def gather_files(path:, dir_glob_pattern: '**/*', load_since: nil, load_until: nil, smaller_than: nil, larger_than: nil, sort: :mtime)
|
51
|
+
search_pattern = File.join(path, '**', dir_glob_pattern)
|
52
|
+
files = Dir.glob(search_pattern)
|
53
|
+
|
54
|
+
files = files.keep_if {|f| (File.mtime(f) > load_since)} if load_since
|
55
|
+
files = files.keep_if {|f| (File.mtime(f) < load_until)} if load_until
|
56
|
+
|
57
|
+
# pass in file sizes in bytes
|
58
|
+
files = files.keep_if {|f| (File.size(f) < smaller_than)} if smaller_than
|
59
|
+
files = files.keep_if {|f| (File.size(f) > larger_than)} if larger_than
|
60
|
+
|
61
|
+
# TODO: incorporate sort argument
|
62
|
+
files.sort_by{ |f| File.mtime(f) }
|
63
|
+
end
|
64
|
+
|
65
|
+
def select_files_in_directory(path:, dir_glob_pattern: '**/*')
|
66
|
+
raise IOError.new("#{path} is not a directory.") unless directory?(path)
|
67
|
+
|
68
|
+
search_pattern = File.join(path, dir_glob_pattern)
|
69
|
+
Dir.glob(search_pattern).each do |filename|
|
70
|
+
yield(filename)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def open_files(filename:, dir_glob_pattern:)
|
75
|
+
if stdin?(filename)
|
76
|
+
yield $stdin
|
77
|
+
elsif directory?(filename)
|
78
|
+
search_pattern = File.join(filename, dir_glob_pattern)
|
79
|
+
filenames = Dir.glob(search_pattern)
|
80
|
+
filenames.each do |filename|
|
81
|
+
file = File.open(filename)
|
82
|
+
yield(file)
|
83
|
+
end
|
84
|
+
elsif file?(filename)
|
85
|
+
yield File.open(filename)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def stdin?(filename)
|
90
|
+
filename == $stdin
|
91
|
+
end
|
92
|
+
|
93
|
+
def directory?(filename)
|
94
|
+
Pathname.new(filename).directory?
|
95
|
+
end
|
96
|
+
|
97
|
+
def file?(filename)
|
98
|
+
Pathname.new(filename).file?
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
class JsonExtractor < Chronicle::ETL::Extractor
|
4
|
+
include Extractors::Helpers::FilesystemReader
|
5
|
+
|
6
|
+
register_connector do |r|
|
7
|
+
r.description = 'input as JSON'
|
8
|
+
end
|
9
|
+
|
10
|
+
DEFAULT_OPTIONS = {
|
11
|
+
filename: $stdin,
|
12
|
+
|
13
|
+
# We're expecting line-separated json objects
|
14
|
+
jsonl: true
|
15
|
+
}.freeze
|
16
|
+
|
17
|
+
def initialize(options = {})
|
18
|
+
super(DEFAULT_OPTIONS.merge(options))
|
19
|
+
end
|
20
|
+
|
21
|
+
def extract
|
22
|
+
load_input do |input|
|
23
|
+
parsed_data = parse_data(input)
|
24
|
+
yield Chronicle::ETL::Extraction.new(data: parsed_data) if parsed_data
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def results_count
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def parse_data data
|
34
|
+
JSON.parse(data)
|
35
|
+
rescue JSON::ParserError => e
|
36
|
+
end
|
37
|
+
|
38
|
+
def load_input
|
39
|
+
read_from_filesystem(filename: @options[:filename]) do |data|
|
40
|
+
yield data
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -1,9 +1,14 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
3
|
class StdinExtractor < Chronicle::ETL::Extractor
|
4
|
+
register_connector do |r|
|
5
|
+
r.description = 'stdin'
|
6
|
+
end
|
7
|
+
|
4
8
|
def extract
|
5
9
|
$stdin.read.each_line do |line|
|
6
|
-
|
10
|
+
data = { line: line.strip }
|
11
|
+
yield Chronicle::ETL::Extraction.new(data: data)
|
7
12
|
end
|
8
13
|
end
|
9
14
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
module Chronicle
|
3
|
+
module ETL
|
4
|
+
class Job
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
def_delegators :@job_definition, :dry_run?
|
8
|
+
|
9
|
+
attr_accessor :name,
|
10
|
+
:extractor_klass,
|
11
|
+
:extractor_options,
|
12
|
+
:transformer_klass,
|
13
|
+
:transformer_options,
|
14
|
+
:loader_klass,
|
15
|
+
:loader_options
|
16
|
+
|
17
|
+
# TODO: build a proper id system
|
18
|
+
alias id name
|
19
|
+
|
20
|
+
def initialize(job_definition)
|
21
|
+
@job_definition = job_definition
|
22
|
+
@name = @job_definition.definition[:name]
|
23
|
+
@extractor_options = @job_definition.extractor_options
|
24
|
+
@transformer_options = @job_definition.transformer_options
|
25
|
+
@loader_options = @job_definition.loader_options
|
26
|
+
|
27
|
+
set_continuation if use_continuation?
|
28
|
+
yield self if block_given?
|
29
|
+
end
|
30
|
+
|
31
|
+
def instantiate_extractor
|
32
|
+
@extractor_klass = @job_definition.extractor_klass
|
33
|
+
@extractor_klass.new(@extractor_options)
|
34
|
+
end
|
35
|
+
|
36
|
+
def instantiate_transformer(extraction)
|
37
|
+
@transformer_klass = @job_definition.transformer_klass
|
38
|
+
@transformer_klass.new(@transformer_options, extraction)
|
39
|
+
end
|
40
|
+
|
41
|
+
def instantiate_loader
|
42
|
+
@loader_klass = @job_definition.loader_klass
|
43
|
+
@loader_klass.new(@loader_options)
|
44
|
+
end
|
45
|
+
|
46
|
+
def save_log?
|
47
|
+
# TODO: this needs more nuance
|
48
|
+
return !id.nil?
|
49
|
+
end
|
50
|
+
|
51
|
+
def to_s
|
52
|
+
output = "Job"
|
53
|
+
output += " '#{name}'".bold if name
|
54
|
+
output += "\n"
|
55
|
+
output += " → Extracting from #{@job_definition.extractor_klass.description}\n"
|
56
|
+
output += " → Transforming #{@job_definition.transformer_klass.description}\n"
|
57
|
+
output += " → Loading to #{@job_definition.loader_klass.description}\n"
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def set_continuation
|
63
|
+
continuation = Chronicle::ETL::JobLogger.load_latest(@id)
|
64
|
+
@extractor_options[:continuation] = continuation
|
65
|
+
end
|
66
|
+
|
67
|
+
def use_continuation?
|
68
|
+
@job_definition.incremental?
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'active_support/core_ext/hash/deep_merge'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class JobDefinition
|
6
|
+
SKELETON_DEFINITION = {
|
7
|
+
incremental: false,
|
8
|
+
extractor: {
|
9
|
+
name: 'stdin',
|
10
|
+
options: {}
|
11
|
+
},
|
12
|
+
transformer: {
|
13
|
+
name: 'null',
|
14
|
+
options: {}
|
15
|
+
},
|
16
|
+
loader: {
|
17
|
+
name: 'stdout',
|
18
|
+
options: {}
|
19
|
+
}
|
20
|
+
}.freeze
|
21
|
+
|
22
|
+
attr_accessor :definition
|
23
|
+
|
24
|
+
def initialize()
|
25
|
+
@definition = SKELETON_DEFINITION
|
26
|
+
end
|
27
|
+
|
28
|
+
# Add config hash to this definition
|
29
|
+
def add_config(config = {})
|
30
|
+
@definition = @definition.deep_merge(config)
|
31
|
+
load_credentials
|
32
|
+
validate
|
33
|
+
end
|
34
|
+
|
35
|
+
# Is this job continuing from a previous run?
|
36
|
+
def incremental?
|
37
|
+
@definition[:incremental]
|
38
|
+
end
|
39
|
+
|
40
|
+
def dry_run?
|
41
|
+
@definition[:dry_run]
|
42
|
+
end
|
43
|
+
|
44
|
+
def extractor_klass
|
45
|
+
load_klass(:extractor, @definition[:extractor][:name])
|
46
|
+
end
|
47
|
+
|
48
|
+
def transformer_klass
|
49
|
+
load_klass(:transformer, @definition[:transformer][:name])
|
50
|
+
end
|
51
|
+
|
52
|
+
def loader_klass
|
53
|
+
load_klass(:loader, @definition[:loader][:name])
|
54
|
+
end
|
55
|
+
|
56
|
+
def extractor_options
|
57
|
+
@definition[:extractor][:options]
|
58
|
+
end
|
59
|
+
|
60
|
+
def transformer_options
|
61
|
+
@definition[:transformer][:options]
|
62
|
+
end
|
63
|
+
|
64
|
+
def loader_options
|
65
|
+
@definition[:loader][:options]
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def load_klass(phase, identifier)
|
71
|
+
Chronicle::ETL::Registry.find_by_phase_and_identifier(phase, identifier).klass
|
72
|
+
end
|
73
|
+
|
74
|
+
def load_credentials
|
75
|
+
Chronicle::ETL::Registry::PHASES.each do |phase|
|
76
|
+
credentials_name = @definition[phase].dig(:options, :credentials)
|
77
|
+
if credentials_name
|
78
|
+
credentials = Chronicle::ETL::Config.load_credentials(credentials_name)
|
79
|
+
@definition[phase][:options].deep_merge(credentials)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def validate
|
85
|
+
return true # TODO
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
# A record of what happened in the running of a job. We're interested in
|
6
|
+
# tracking when it ran, if it was successful, and what the latest record
|
7
|
+
# we found is (to use as a cursor for the next time)
|
8
|
+
class JobLog
|
9
|
+
extend Forwardable
|
10
|
+
|
11
|
+
attr_accessor :job,
|
12
|
+
:job_id,
|
13
|
+
:last_id,
|
14
|
+
:highest_timestamp,
|
15
|
+
:num_records_processed,
|
16
|
+
:started_at,
|
17
|
+
:finished_at,
|
18
|
+
:success
|
19
|
+
|
20
|
+
def_delegators :@job, :save_log?
|
21
|
+
|
22
|
+
# Create a new JobLog for a given Job
|
23
|
+
def initialize
|
24
|
+
@num_records_processed = 0
|
25
|
+
@success = false
|
26
|
+
yield self if block_given?
|
27
|
+
end
|
28
|
+
|
29
|
+
# Log the result of a single transformation in a job
|
30
|
+
# @param transformer [Chronicle::ETL::Tranformer] The transformer that ran
|
31
|
+
def log_transformation(transformer)
|
32
|
+
@last_id = transformer.id if transformer.id
|
33
|
+
|
34
|
+
# Save the highest timestamp that we've encountered so far
|
35
|
+
@highest_timestamp = [transformer.timestamp, @highest_timestamp].compact.max if transformer.timestamp
|
36
|
+
|
37
|
+
# TODO: a transformer might yield nil. We might also want certain transformers to explode
|
38
|
+
# records into multiple new ones. Therefore, this this variable will need more subtle behaviour
|
39
|
+
@num_records_processed += 1
|
40
|
+
end
|
41
|
+
|
42
|
+
# Indicate that a job has started
|
43
|
+
def start
|
44
|
+
@started_at = Time.now
|
45
|
+
end
|
46
|
+
|
47
|
+
# Indicate that a job has finished
|
48
|
+
def finish
|
49
|
+
@finished_at = Time.now
|
50
|
+
@success = true
|
51
|
+
end
|
52
|
+
|
53
|
+
def error
|
54
|
+
@finished_at = Time.now
|
55
|
+
end
|
56
|
+
|
57
|
+
def job= job
|
58
|
+
@job = job
|
59
|
+
@job_id = job.id
|
60
|
+
end
|
61
|
+
|
62
|
+
def duration
|
63
|
+
return unless @finished_at
|
64
|
+
|
65
|
+
@finished_at - @started_at
|
66
|
+
end
|
67
|
+
|
68
|
+
# Take a JobLog's instance variables and turn them into a hash representation
|
69
|
+
def serialize
|
70
|
+
{
|
71
|
+
job_id: @job_id,
|
72
|
+
last_id: @last_id,
|
73
|
+
highest_timestamp: @highest_timestamp,
|
74
|
+
num_records_processed: @num_records_processed,
|
75
|
+
started_at: @started_at,
|
76
|
+
finished_at: @finished_at,
|
77
|
+
success: @success
|
78
|
+
}
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
# Create a new JobLog and set its instance variables from a serialized hash
|
84
|
+
def self.build_from_serialized attrs
|
85
|
+
attrs.delete(:id)
|
86
|
+
new do |job_log|
|
87
|
+
attrs.each do |key, value|
|
88
|
+
setter = "#{key.to_s}=".to_sym
|
89
|
+
job_log.send(setter, value)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
module Chronicle
|
5
|
+
module ETL
|
6
|
+
# Saves JobLogs to db and loads previous ones
|
7
|
+
class JobLogger
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
def_delegators :@job_log, :start, :finish, :error, :log_transformation, :duration, :success
|
11
|
+
attr_accessor :job_log
|
12
|
+
|
13
|
+
# For a given `job_id`, return the last successful log
|
14
|
+
def self.load_latest(job_id)
|
15
|
+
with_db_connection do |db|
|
16
|
+
attrs = db[:job_logs].reverse_order(:finished_at).where(success: true).first
|
17
|
+
JobLog.build_from_serialized(attrs) if attrs
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.with_db_connection
|
22
|
+
initialize_db unless db_exists?
|
23
|
+
Sequel.connect("sqlite://#{db_filename}") do |db|
|
24
|
+
initialize_schema(db) unless schema_exists?(db)
|
25
|
+
yield db
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.db_exists?
|
30
|
+
File.exists?(db_filename)
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.schema_exists?(db)
|
34
|
+
return db.tables.include? :job_logs
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.db_filename
|
38
|
+
data = Runcom::Data.new "chronicle/etl/job_log.db"
|
39
|
+
filename = data.all[0].to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.initialize_db
|
43
|
+
FileUtils.mkdir_p(File.dirname(db_filename))
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.initialize_schema db
|
47
|
+
db.create_table :job_logs do
|
48
|
+
primary_key :id
|
49
|
+
String :job_id, null: false
|
50
|
+
String :last_id
|
51
|
+
Time :highest_timestamp
|
52
|
+
Integer :num_records_processed
|
53
|
+
boolean :success, default: false
|
54
|
+
Time :started_at
|
55
|
+
Time :finished_at
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Create a new JobLogger
|
60
|
+
def initialize(job)
|
61
|
+
@job_log = JobLog.new do |job_log|
|
62
|
+
job_log.job = job
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Save this JobLogger's JobLog to db
|
67
|
+
def save
|
68
|
+
return unless @job_log.save_log?
|
69
|
+
|
70
|
+
JobLogger.with_db_connection do |db|
|
71
|
+
dataset = db[:job_logs]
|
72
|
+
dataset.insert(@job_log.serialize)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def summarize
|
77
|
+
@job_log.inspect
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -3,17 +3,17 @@ require 'csv'
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
5
|
class CsvLoader < Chronicle::ETL::Loader
|
6
|
+
register_connector do |r|
|
7
|
+
r.description = 'CSV'
|
8
|
+
end
|
9
|
+
|
6
10
|
def initialize(options={})
|
7
11
|
super(options)
|
8
12
|
@rows = []
|
9
13
|
end
|
10
14
|
|
11
|
-
def load(
|
12
|
-
|
13
|
-
@rows << result.values
|
14
|
-
else
|
15
|
-
@rows << result
|
16
|
-
end
|
15
|
+
def load(record)
|
16
|
+
@rows << record.to_h_flattened.values
|
17
17
|
end
|
18
18
|
|
19
19
|
def finish
|
@@ -2,10 +2,10 @@ module Chronicle
|
|
2
2
|
module ETL
|
3
3
|
# Abstract class representing a Loader for an ETL job
|
4
4
|
class Loader
|
5
|
-
extend Chronicle::ETL::
|
5
|
+
extend Chronicle::ETL::Registry::SelfRegistering
|
6
6
|
|
7
7
|
# Construct a new instance of this loader. Options are passed in from a Runner
|
8
|
-
# ==
|
8
|
+
# == Parameters:
|
9
9
|
# options::
|
10
10
|
# Options for configuring this Loader
|
11
11
|
def initialize(options = {})
|
@@ -5,25 +5,32 @@ require 'json'
|
|
5
5
|
module Chronicle
|
6
6
|
module ETL
|
7
7
|
class RestLoader < Chronicle::ETL::Loader
|
8
|
-
|
8
|
+
register_connector do |r|
|
9
|
+
r.description = 'a REST endpoint'
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize( options={} )
|
9
13
|
super(options)
|
10
14
|
end
|
11
15
|
|
12
|
-
def load(
|
16
|
+
def load(record)
|
17
|
+
payload = Chronicle::ETL::JSONAPISerializer.serialize(record)
|
18
|
+
# have the outer data key that json-api expects
|
19
|
+
payload = { data: payload } unless payload[:data]
|
20
|
+
|
13
21
|
uri = URI.parse("#{@options[:hostname]}#{@options[:endpoint]}")
|
14
22
|
|
15
23
|
header = {
|
16
24
|
"Authorization" => "Bearer #{@options[:access_token]}",
|
17
25
|
"Content-Type": 'application/json'
|
18
26
|
}
|
27
|
+
use_ssl = uri.scheme == 'https'
|
19
28
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
response = http.request(request)
|
29
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: use_ssl) do |http|
|
30
|
+
request = Net::HTTP::Post.new(uri.request_uri, header)
|
31
|
+
request.body = payload.to_json
|
32
|
+
http.request(request)
|
33
|
+
end
|
27
34
|
end
|
28
35
|
end
|
29
36
|
end
|
@@ -1,9 +1,14 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
3
|
class StdoutLoader < Chronicle::ETL::Loader
|
4
|
-
|
5
|
-
|
4
|
+
register_connector do |r|
|
5
|
+
r.description = 'stdout'
|
6
|
+
end
|
7
|
+
|
8
|
+
def load(record)
|
9
|
+
serializer = Chronicle::ETL::JSONAPISerializer.new(record)
|
10
|
+
puts serializer.serializable_hash.to_json
|
6
11
|
end
|
7
12
|
end
|
8
13
|
end
|
9
|
-
end
|
14
|
+
end
|