chronicle-etl 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.rubocop.yml +3 -0
- data/README.md +20 -13
- data/chronicle-etl.gemspec +11 -8
- data/lib/chronicle/etl/cli/connectors.rb +19 -7
- data/lib/chronicle/etl/cli/jobs.rb +24 -18
- data/lib/chronicle/etl/cli/main.rb +10 -2
- data/lib/chronicle/etl/config.rb +1 -1
- data/lib/chronicle/etl/exceptions.rb +12 -1
- data/lib/chronicle/etl/extraction.rb +12 -0
- data/lib/chronicle/etl/extractors/csv_extractor.rb +43 -36
- data/lib/chronicle/etl/extractors/extractor.rb +9 -1
- data/lib/chronicle/etl/extractors/file_extractor.rb +15 -33
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +104 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +45 -0
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +6 -1
- data/lib/chronicle/etl/job.rb +30 -29
- data/lib/chronicle/etl/job_definition.rb +45 -7
- data/lib/chronicle/etl/job_log.rb +10 -0
- data/lib/chronicle/etl/job_logger.rb +23 -20
- data/lib/chronicle/etl/loaders/csv_loader.rb +4 -0
- data/lib/chronicle/etl/loaders/loader.rb +1 -1
- data/lib/chronicle/etl/loaders/rest_loader.rb +5 -1
- data/lib/chronicle/etl/loaders/stdout_loader.rb +6 -1
- data/lib/chronicle/etl/loaders/table_loader.rb +57 -7
- data/lib/chronicle/etl/logger.rb +48 -0
- data/lib/chronicle/etl/models/attachment.rb +14 -0
- data/lib/chronicle/etl/models/base.rb +23 -7
- data/lib/chronicle/etl/models/entity.rb +9 -3
- data/lib/chronicle/etl/registry/connector_registration.rb +61 -0
- data/lib/chronicle/etl/registry/registry.rb +52 -0
- data/lib/chronicle/etl/registry/self_registering.rb +25 -0
- data/lib/chronicle/etl/runner.rb +57 -7
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +25 -0
- data/lib/chronicle/etl/serializers/serializer.rb +27 -0
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +253 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +10 -1
- data/lib/chronicle/etl/transformers/transformer.rb +39 -9
- data/lib/chronicle/etl/utils/binary_attachments.rb +21 -0
- data/lib/chronicle/etl/utils/progress_bar.rb +3 -1
- data/lib/chronicle/etl/utils/text_recognition.rb +15 -0
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +7 -2
- metadata +96 -44
- data/Gemfile.lock +0 -91
- data/lib/chronicle/etl/catalog.rb +0 -108
- data/lib/chronicle/etl/utils/jsonapi.rb +0 -28
@@ -0,0 +1,45 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
class JsonExtractor < Chronicle::ETL::Extractor
|
4
|
+
include Extractors::Helpers::FilesystemReader
|
5
|
+
|
6
|
+
register_connector do |r|
|
7
|
+
r.description = 'input as JSON'
|
8
|
+
end
|
9
|
+
|
10
|
+
DEFAULT_OPTIONS = {
|
11
|
+
filename: $stdin,
|
12
|
+
|
13
|
+
# We're expecting line-separated json objects
|
14
|
+
jsonl: true
|
15
|
+
}.freeze
|
16
|
+
|
17
|
+
def initialize(options = {})
|
18
|
+
super(DEFAULT_OPTIONS.merge(options))
|
19
|
+
end
|
20
|
+
|
21
|
+
def extract
|
22
|
+
load_input do |input|
|
23
|
+
parsed_data = parse_data(input)
|
24
|
+
yield Chronicle::ETL::Extraction.new(data: parsed_data) if parsed_data
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def results_count
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def parse_data data
|
34
|
+
JSON.parse(data)
|
35
|
+
rescue JSON::ParserError => e
|
36
|
+
end
|
37
|
+
|
38
|
+
def load_input
|
39
|
+
read_from_filesystem(filename: @options[:filename]) do |data|
|
40
|
+
yield data
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -1,9 +1,14 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
3
|
class StdinExtractor < Chronicle::ETL::Extractor
|
4
|
+
register_connector do |r|
|
5
|
+
r.description = 'stdin'
|
6
|
+
end
|
7
|
+
|
4
8
|
def extract
|
5
9
|
$stdin.read.each_line do |line|
|
6
|
-
|
10
|
+
data = { line: line.strip }
|
11
|
+
yield Chronicle::ETL::Extraction.new(data: data)
|
7
12
|
end
|
8
13
|
end
|
9
14
|
end
|
data/lib/chronicle/etl/job.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
+
require 'forwardable'
|
1
2
|
module Chronicle
|
2
3
|
module ETL
|
3
4
|
class Job
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
def_delegators :@job_definition, :dry_run?
|
8
|
+
|
4
9
|
attr_accessor :name,
|
5
10
|
:extractor_klass,
|
6
11
|
:extractor_options,
|
@@ -12,32 +17,30 @@ module Chronicle
|
|
12
17
|
# TODO: build a proper id system
|
13
18
|
alias id name
|
14
19
|
|
15
|
-
def initialize(
|
16
|
-
|
17
|
-
@name = definition[:name]
|
18
|
-
@
|
19
|
-
@
|
20
|
-
|
21
|
-
@transformer_klass = load_klass(:transformer, definition[:transformer][:name])
|
22
|
-
@transformer_options = definition[:transformer][:options] || {}
|
23
|
-
|
24
|
-
@loader_klass = load_klass(:loader, definition[:loader][:name])
|
25
|
-
@loader_options = definition[:loader][:options] || {}
|
20
|
+
def initialize(job_definition)
|
21
|
+
@job_definition = job_definition
|
22
|
+
@name = @job_definition.definition[:name]
|
23
|
+
@extractor_options = @job_definition.extractor_options
|
24
|
+
@transformer_options = @job_definition.transformer_options
|
25
|
+
@loader_options = @job_definition.loader_options
|
26
26
|
|
27
|
-
set_continuation if
|
27
|
+
set_continuation if use_continuation?
|
28
28
|
yield self if block_given?
|
29
29
|
end
|
30
30
|
|
31
31
|
def instantiate_extractor
|
32
|
-
|
32
|
+
@extractor_klass = @job_definition.extractor_klass
|
33
|
+
@extractor_klass.new(@extractor_options)
|
33
34
|
end
|
34
35
|
|
35
|
-
def instantiate_transformer(
|
36
|
-
|
36
|
+
def instantiate_transformer(extraction)
|
37
|
+
@transformer_klass = @job_definition.transformer_klass
|
38
|
+
@transformer_klass.new(@transformer_options, extraction)
|
37
39
|
end
|
38
40
|
|
39
41
|
def instantiate_loader
|
40
|
-
|
42
|
+
@loader_klass = @job_definition.loader_klass
|
43
|
+
@loader_klass.new(@loader_options)
|
41
44
|
end
|
42
45
|
|
43
46
|
def save_log?
|
@@ -45,26 +48,24 @@ module Chronicle
|
|
45
48
|
return !id.nil?
|
46
49
|
end
|
47
50
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
51
|
+
def to_s
|
52
|
+
output = "Job"
|
53
|
+
output += " '#{name}'".bold if name
|
54
|
+
output += "\n"
|
55
|
+
output += " → Extracting from #{@job_definition.extractor_klass.description}\n"
|
56
|
+
output += " → Transforming #{@job_definition.transformer_klass.description}\n"
|
57
|
+
output += " → Loading to #{@job_definition.loader_klass.description}\n"
|
55
58
|
end
|
56
59
|
|
57
|
-
|
58
|
-
Chronicle::ETL::Catalog.phase_and_identifier_to_klass(phase, identifier)
|
59
|
-
end
|
60
|
+
private
|
60
61
|
|
61
62
|
def set_continuation
|
62
|
-
continuation = Chronicle::ETL::JobLogger.load_latest(@
|
63
|
+
continuation = Chronicle::ETL::JobLogger.load_latest(@id)
|
63
64
|
@extractor_options[:continuation] = continuation
|
64
65
|
end
|
65
66
|
|
66
|
-
def
|
67
|
-
|
67
|
+
def use_continuation?
|
68
|
+
@job_definition.incremental?
|
68
69
|
end
|
69
70
|
end
|
70
71
|
end
|
@@ -1,19 +1,20 @@
|
|
1
|
-
require 'deep_merge'
|
1
|
+
require 'active_support/core_ext/hash/deep_merge'
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
5
|
class JobDefinition
|
6
6
|
SKELETON_DEFINITION = {
|
7
|
+
incremental: false,
|
7
8
|
extractor: {
|
8
|
-
name:
|
9
|
+
name: 'stdin',
|
9
10
|
options: {}
|
10
11
|
},
|
11
12
|
transformer: {
|
12
|
-
name:
|
13
|
+
name: 'null',
|
13
14
|
options: {}
|
14
15
|
},
|
15
16
|
loader: {
|
16
|
-
name:
|
17
|
+
name: 'stdout',
|
17
18
|
options: {}
|
18
19
|
}
|
19
20
|
}.freeze
|
@@ -26,16 +27,53 @@ module Chronicle
|
|
26
27
|
|
27
28
|
# Add config hash to this definition
|
28
29
|
def add_config(config = {})
|
29
|
-
@definition =
|
30
|
+
@definition = @definition.deep_merge(config)
|
30
31
|
load_credentials
|
31
32
|
validate
|
32
33
|
end
|
33
34
|
|
35
|
+
# Is this job continuing from a previous run?
|
36
|
+
def incremental?
|
37
|
+
@definition[:incremental]
|
38
|
+
end
|
39
|
+
|
40
|
+
def dry_run?
|
41
|
+
@definition[:dry_run]
|
42
|
+
end
|
43
|
+
|
44
|
+
def extractor_klass
|
45
|
+
load_klass(:extractor, @definition[:extractor][:name])
|
46
|
+
end
|
47
|
+
|
48
|
+
def transformer_klass
|
49
|
+
load_klass(:transformer, @definition[:transformer][:name])
|
50
|
+
end
|
51
|
+
|
52
|
+
def loader_klass
|
53
|
+
load_klass(:loader, @definition[:loader][:name])
|
54
|
+
end
|
55
|
+
|
56
|
+
def extractor_options
|
57
|
+
@definition[:extractor][:options]
|
58
|
+
end
|
59
|
+
|
60
|
+
def transformer_options
|
61
|
+
@definition[:transformer][:options]
|
62
|
+
end
|
63
|
+
|
64
|
+
def loader_options
|
65
|
+
@definition[:loader][:options]
|
66
|
+
end
|
67
|
+
|
34
68
|
private
|
35
69
|
|
70
|
+
def load_klass(phase, identifier)
|
71
|
+
Chronicle::ETL::Registry.find_by_phase_and_identifier(phase, identifier).klass
|
72
|
+
end
|
73
|
+
|
36
74
|
def load_credentials
|
37
|
-
Chronicle::ETL::
|
38
|
-
credentials_name = @definition[phase]
|
75
|
+
Chronicle::ETL::Registry::PHASES.each do |phase|
|
76
|
+
credentials_name = @definition[phase].dig(:options, :credentials)
|
39
77
|
if credentials_name
|
40
78
|
credentials = Chronicle::ETL::Config.load_credentials(credentials_name)
|
41
79
|
@definition[phase][:options].deep_merge(credentials)
|
@@ -50,11 +50,21 @@ module Chronicle
|
|
50
50
|
@success = true
|
51
51
|
end
|
52
52
|
|
53
|
+
def error
|
54
|
+
@finished_at = Time.now
|
55
|
+
end
|
56
|
+
|
53
57
|
def job= job
|
54
58
|
@job = job
|
55
59
|
@job_id = job.id
|
56
60
|
end
|
57
61
|
|
62
|
+
def duration
|
63
|
+
return unless @finished_at
|
64
|
+
|
65
|
+
@finished_at - @started_at
|
66
|
+
end
|
67
|
+
|
58
68
|
# Take a JobLog's instance variables and turn them into a hash representation
|
59
69
|
def serialize
|
60
70
|
{
|
@@ -1,32 +1,14 @@
|
|
1
1
|
require 'sequel'
|
2
2
|
require 'forwardable'
|
3
3
|
|
4
|
-
require 'pry'
|
5
|
-
|
6
4
|
module Chronicle
|
7
5
|
module ETL
|
8
6
|
# Saves JobLogs to db and loads previous ones
|
9
7
|
class JobLogger
|
10
8
|
extend Forwardable
|
11
9
|
|
12
|
-
def_delegators :@job_log, :start, :finish, :log_transformation
|
13
|
-
|
14
|
-
# Create a new JobLogger
|
15
|
-
def initialize(job)
|
16
|
-
@job_log = JobLog.new do |job_log|
|
17
|
-
job_log.job = job
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
# Save this JobLogger's JobLog to db
|
22
|
-
def save
|
23
|
-
return unless @job_log.save_log?
|
24
|
-
|
25
|
-
JobLogger.with_db_connection do |db|
|
26
|
-
dataset = db[:job_logs]
|
27
|
-
dataset.insert(@job_log.serialize)
|
28
|
-
end
|
29
|
-
end
|
10
|
+
def_delegators :@job_log, :start, :finish, :error, :log_transformation, :duration, :success
|
11
|
+
attr_accessor :job_log
|
30
12
|
|
31
13
|
# For a given `job_id`, return the last successful log
|
32
14
|
def self.load_latest(job_id)
|
@@ -73,6 +55,27 @@ module Chronicle
|
|
73
55
|
Time :finished_at
|
74
56
|
end
|
75
57
|
end
|
58
|
+
|
59
|
+
# Create a new JobLogger
|
60
|
+
def initialize(job)
|
61
|
+
@job_log = JobLog.new do |job_log|
|
62
|
+
job_log.job = job
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Save this JobLogger's JobLog to db
|
67
|
+
def save
|
68
|
+
return unless @job_log.save_log?
|
69
|
+
|
70
|
+
JobLogger.with_db_connection do |db|
|
71
|
+
dataset = db[:job_logs]
|
72
|
+
dataset.insert(@job_log.serialize)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def summarize
|
77
|
+
@job_log.inspect
|
78
|
+
end
|
76
79
|
end
|
77
80
|
end
|
78
81
|
end
|
@@ -2,7 +2,7 @@ module Chronicle
|
|
2
2
|
module ETL
|
3
3
|
# Abstract class representing a Loader for an ETL job
|
4
4
|
class Loader
|
5
|
-
extend Chronicle::ETL::
|
5
|
+
extend Chronicle::ETL::Registry::SelfRegistering
|
6
6
|
|
7
7
|
# Construct a new instance of this loader. Options are passed in from a Runner
|
8
8
|
# == Parameters:
|
@@ -5,12 +5,16 @@ require 'json'
|
|
5
5
|
module Chronicle
|
6
6
|
module ETL
|
7
7
|
class RestLoader < Chronicle::ETL::Loader
|
8
|
+
register_connector do |r|
|
9
|
+
r.description = 'a REST endpoint'
|
10
|
+
end
|
11
|
+
|
8
12
|
def initialize( options={} )
|
9
13
|
super(options)
|
10
14
|
end
|
11
15
|
|
12
16
|
def load(record)
|
13
|
-
payload = Chronicle::ETL::
|
17
|
+
payload = Chronicle::ETL::JSONAPISerializer.serialize(record)
|
14
18
|
# have the outer data key that json-api expects
|
15
19
|
payload = { data: payload } unless payload[:data]
|
16
20
|
|
@@ -1,8 +1,13 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
3
|
class StdoutLoader < Chronicle::ETL::Loader
|
4
|
+
register_connector do |r|
|
5
|
+
r.description = 'stdout'
|
6
|
+
end
|
7
|
+
|
4
8
|
def load(record)
|
5
|
-
|
9
|
+
serializer = Chronicle::ETL::JSONAPISerializer.new(record)
|
10
|
+
puts serializer.serializable_hash.to_json
|
6
11
|
end
|
7
12
|
end
|
8
13
|
end
|
@@ -1,21 +1,71 @@
|
|
1
1
|
require 'tty/table'
|
2
|
+
require 'active_support/core_ext/string/filters'
|
3
|
+
require 'active_support/core_ext/hash/reverse_merge'
|
2
4
|
|
3
5
|
module Chronicle
|
4
6
|
module ETL
|
5
7
|
class TableLoader < Chronicle::ETL::Loader
|
6
|
-
|
7
|
-
|
8
|
+
register_connector do |r|
|
9
|
+
r.description = 'an ASCII table'
|
10
|
+
end
|
11
|
+
|
12
|
+
DEFAULT_OPTIONS = {
|
13
|
+
fields_limit: nil,
|
14
|
+
fields_exclude: ['lids', 'type'],
|
15
|
+
fields_include: [],
|
16
|
+
truncate_values_at: nil,
|
17
|
+
table_renderer: :basic
|
18
|
+
}.freeze
|
19
|
+
|
20
|
+
def initialize(options={})
|
21
|
+
@options = options.reverse_merge(DEFAULT_OPTIONS)
|
22
|
+
@records = []
|
8
23
|
end
|
9
24
|
|
10
25
|
def load(record)
|
11
|
-
|
12
|
-
@table ||= TTY::Table.new(header: record_hash.keys)
|
13
|
-
values = record_hash.values.map{|x| x.to_s[0..30]}
|
14
|
-
@table << values
|
26
|
+
@records << record.to_h_flattened
|
15
27
|
end
|
16
28
|
|
17
29
|
def finish
|
18
|
-
|
30
|
+
return if @records.empty?
|
31
|
+
|
32
|
+
headers = build_headers(@records)
|
33
|
+
rows = build_rows(@records, headers)
|
34
|
+
|
35
|
+
@table = TTY::Table.new(header: headers, rows: rows)
|
36
|
+
puts @table.render(
|
37
|
+
@options[:table_renderer].to_sym,
|
38
|
+
padding: [0, 2, 0, 0]
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def build_headers(records)
|
45
|
+
headers =
|
46
|
+
if @options[:fields_include].any?
|
47
|
+
Set[*@options[:fields_include]]
|
48
|
+
else
|
49
|
+
# use all the keys of the flattened record hash
|
50
|
+
Set[*records.map(&:keys).flatten.map(&:to_s).uniq]
|
51
|
+
end
|
52
|
+
|
53
|
+
headers = headers.delete_if { |header| header.end_with?(*@options[:fields_exclude]) } if @options[:fields_exclude].any?
|
54
|
+
headers = headers.first(@options[:fields_limit]) if @options[:fields_limit]
|
55
|
+
|
56
|
+
headers.to_a.map(&:to_sym)
|
57
|
+
end
|
58
|
+
|
59
|
+
def build_rows(records, headers)
|
60
|
+
records.map do |record|
|
61
|
+
values = record.values_at(*headers).map{|value| value.to_s }
|
62
|
+
|
63
|
+
if @options[:truncate_values_at]
|
64
|
+
values = values.map{ |value| value.truncate(@options[:truncate_values_at]) }
|
65
|
+
end
|
66
|
+
|
67
|
+
values
|
68
|
+
end
|
19
69
|
end
|
20
70
|
end
|
21
71
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module Logger
|
4
|
+
extend self
|
5
|
+
|
6
|
+
DEBUG = 0
|
7
|
+
INFO = 1
|
8
|
+
WARN = 2
|
9
|
+
ERROR = 3
|
10
|
+
FATAL = 4
|
11
|
+
|
12
|
+
attr_accessor :log_level
|
13
|
+
|
14
|
+
@log_level = INFO
|
15
|
+
@destination = $stderr
|
16
|
+
|
17
|
+
def output message, level
|
18
|
+
return unless level >= @log_level
|
19
|
+
|
20
|
+
if @progress_bar
|
21
|
+
@progress_bar.log(message)
|
22
|
+
else
|
23
|
+
@destination.puts(message)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def error(message)
|
28
|
+
output(message, ERROR)
|
29
|
+
end
|
30
|
+
|
31
|
+
def info(message)
|
32
|
+
output(message, INFO)
|
33
|
+
end
|
34
|
+
|
35
|
+
def debug(message)
|
36
|
+
output(message, DEBUG)
|
37
|
+
end
|
38
|
+
|
39
|
+
def attach_to_progress_bar(progress_bar)
|
40
|
+
@progress_bar = progress_bar
|
41
|
+
end
|
42
|
+
|
43
|
+
def detach_from_progress_bar
|
44
|
+
@progress_bar = nil
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'chronicle/etl/models/base'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
module Models
|
6
|
+
class Attachment < Chronicle::ETL::Models::Base
|
7
|
+
TYPE = 'attachments'.freeze
|
8
|
+
ATTRIBUTES = [:url_original, :data].freeze
|
9
|
+
|
10
|
+
attr_accessor(*ATTRIBUTES)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -6,7 +6,7 @@ module Chronicle
|
|
6
6
|
# Represents a record that's been transformed by a Transformer and
|
7
7
|
# ready to be loaded. Loosely based on ActiveModel.
|
8
8
|
class Base
|
9
|
-
ATTRIBUTES = [:provider, :provider_id, :lat, :lng].freeze
|
9
|
+
ATTRIBUTES = [:provider, :provider_id, :lat, :lng, :metadata].freeze
|
10
10
|
ASSOCIATIONS = [].freeze
|
11
11
|
|
12
12
|
attr_accessor(:id, :dedupe_on, *ATTRIBUTES)
|
@@ -14,6 +14,7 @@ module Chronicle
|
|
14
14
|
def initialize(attributes = {})
|
15
15
|
assign_attributes(attributes) if attributes
|
16
16
|
@dedupe_on = []
|
17
|
+
@metadata = {}
|
17
18
|
end
|
18
19
|
|
19
20
|
# A unique identifier for this model is formed from a type
|
@@ -36,6 +37,8 @@ module Chronicle
|
|
36
37
|
# For a given set of fields of this model, generate a
|
37
38
|
# unique local id by hashing the field values
|
38
39
|
def generate_lid fields
|
40
|
+
raise ArgumentError.new("Must provide an array of symbolized fields") unless fields.is_a?(Array)
|
41
|
+
|
39
42
|
values = fields.sort.map do |field|
|
40
43
|
instance_variable = "@#{field.to_s}"
|
41
44
|
self.instance_variable_get(instance_variable)
|
@@ -75,9 +78,21 @@ module Chronicle
|
|
75
78
|
end
|
76
79
|
|
77
80
|
def associations_hash
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
+
associations.map do |k, v|
|
82
|
+
if v.is_a?(Array)
|
83
|
+
[k, v.map(&:to_h)]
|
84
|
+
else
|
85
|
+
[k, v.to_h]
|
86
|
+
end
|
87
|
+
end.to_h
|
88
|
+
end
|
89
|
+
|
90
|
+
def meta_hash
|
91
|
+
{
|
92
|
+
meta: {
|
93
|
+
dedupe_on: @dedupe_on.map{|d| d.map(&:to_s).join(",")}
|
94
|
+
}
|
95
|
+
}
|
81
96
|
end
|
82
97
|
|
83
98
|
# FIXME: move this to a Utils module
|
@@ -86,11 +101,12 @@ module Chronicle
|
|
86
101
|
end
|
87
102
|
|
88
103
|
def to_h
|
89
|
-
identifier_hash
|
104
|
+
identifier_hash
|
105
|
+
.merge(attributes)
|
106
|
+
.merge(associations_hash)
|
107
|
+
.merge(meta_hash)
|
90
108
|
end
|
91
109
|
|
92
|
-
private
|
93
|
-
|
94
110
|
def assign_attributes attributes
|
95
111
|
attributes.each do |k, v|
|
96
112
|
setter = :"#{k}="
|
@@ -5,10 +5,16 @@ module Chronicle
|
|
5
5
|
module Models
|
6
6
|
class Entity < Chronicle::ETL::Models::Base
|
7
7
|
TYPE = 'entities'.freeze
|
8
|
-
ATTRIBUTES = [:title, :body, :represents, :slug].freeze
|
9
|
-
ASSOCIATIONS = [
|
8
|
+
ATTRIBUTES = [:title, :body, :represents, :slug, :myself, :metadata].freeze
|
9
|
+
ASSOCIATIONS = [
|
10
|
+
:attachments,
|
11
|
+
:abouts,
|
12
|
+
:depicts,
|
13
|
+
:consumers,
|
14
|
+
:contains
|
15
|
+
].freeze # TODO: add these to reflect Chronicle Schema
|
10
16
|
|
11
|
-
attr_accessor(*ATTRIBUTES)
|
17
|
+
attr_accessor(*ATTRIBUTES, *ASSOCIATIONS)
|
12
18
|
end
|
13
19
|
end
|
14
20
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module Registry
|
4
|
+
# Records details about a connector such as its provider and a description
|
5
|
+
class ConnectorRegistration
|
6
|
+
attr_accessor :identifier, :provider, :klass, :description
|
7
|
+
|
8
|
+
def initialize(klass)
|
9
|
+
@klass = klass
|
10
|
+
end
|
11
|
+
|
12
|
+
def phase
|
13
|
+
if klass.ancestors.include? Chronicle::ETL::Extractor
|
14
|
+
:extractor
|
15
|
+
elsif klass.ancestors.include? Chronicle::ETL::Transformer
|
16
|
+
:transformer
|
17
|
+
elsif klass.ancestors.include? Chronicle::ETL::Loader
|
18
|
+
:loader
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s
|
23
|
+
"#{phase}-#{identifier}"
|
24
|
+
end
|
25
|
+
|
26
|
+
def built_in?
|
27
|
+
@klass.to_s.include? 'Chronicle::ETL'
|
28
|
+
end
|
29
|
+
|
30
|
+
def klass_name
|
31
|
+
@klass.to_s
|
32
|
+
end
|
33
|
+
|
34
|
+
def identifier
|
35
|
+
@identifier || @klass.to_s.split('::').last.gsub!(/(Extractor$|Loader$|Transformer$)/, '').downcase
|
36
|
+
end
|
37
|
+
|
38
|
+
def description
|
39
|
+
@description || @klass.to_s.split('::').last
|
40
|
+
end
|
41
|
+
|
42
|
+
def provider
|
43
|
+
@provider || (built_in? ? 'chronicle' : '')
|
44
|
+
end
|
45
|
+
|
46
|
+
def descriptive_phrase
|
47
|
+
prefix = case phase
|
48
|
+
when :extractor
|
49
|
+
"Extracts from"
|
50
|
+
when :transformer
|
51
|
+
"Transforms"
|
52
|
+
when :loader
|
53
|
+
"Loads to"
|
54
|
+
end
|
55
|
+
|
56
|
+
"#{prefix} #{description}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|