chronicle-etl 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.rubocop.yml +3 -0
- data/README.md +20 -13
- data/chronicle-etl.gemspec +11 -8
- data/lib/chronicle/etl/cli/connectors.rb +19 -7
- data/lib/chronicle/etl/cli/jobs.rb +24 -18
- data/lib/chronicle/etl/cli/main.rb +10 -2
- data/lib/chronicle/etl/config.rb +1 -1
- data/lib/chronicle/etl/exceptions.rb +12 -1
- data/lib/chronicle/etl/extraction.rb +12 -0
- data/lib/chronicle/etl/extractors/csv_extractor.rb +43 -36
- data/lib/chronicle/etl/extractors/extractor.rb +9 -1
- data/lib/chronicle/etl/extractors/file_extractor.rb +15 -33
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +104 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +45 -0
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +6 -1
- data/lib/chronicle/etl/job.rb +30 -29
- data/lib/chronicle/etl/job_definition.rb +45 -7
- data/lib/chronicle/etl/job_log.rb +10 -0
- data/lib/chronicle/etl/job_logger.rb +23 -20
- data/lib/chronicle/etl/loaders/csv_loader.rb +4 -0
- data/lib/chronicle/etl/loaders/loader.rb +1 -1
- data/lib/chronicle/etl/loaders/rest_loader.rb +5 -1
- data/lib/chronicle/etl/loaders/stdout_loader.rb +6 -1
- data/lib/chronicle/etl/loaders/table_loader.rb +57 -7
- data/lib/chronicle/etl/logger.rb +48 -0
- data/lib/chronicle/etl/models/attachment.rb +14 -0
- data/lib/chronicle/etl/models/base.rb +23 -7
- data/lib/chronicle/etl/models/entity.rb +9 -3
- data/lib/chronicle/etl/registry/connector_registration.rb +61 -0
- data/lib/chronicle/etl/registry/registry.rb +52 -0
- data/lib/chronicle/etl/registry/self_registering.rb +25 -0
- data/lib/chronicle/etl/runner.rb +57 -7
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +25 -0
- data/lib/chronicle/etl/serializers/serializer.rb +27 -0
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +253 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +10 -1
- data/lib/chronicle/etl/transformers/transformer.rb +39 -9
- data/lib/chronicle/etl/utils/binary_attachments.rb +21 -0
- data/lib/chronicle/etl/utils/progress_bar.rb +3 -1
- data/lib/chronicle/etl/utils/text_recognition.rb +15 -0
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +7 -2
- metadata +96 -44
- data/Gemfile.lock +0 -91
- data/lib/chronicle/etl/catalog.rb +0 -108
- data/lib/chronicle/etl/utils/jsonapi.rb +0 -28
@@ -0,0 +1,45 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
class JsonExtractor < Chronicle::ETL::Extractor
|
4
|
+
include Extractors::Helpers::FilesystemReader
|
5
|
+
|
6
|
+
register_connector do |r|
|
7
|
+
r.description = 'input as JSON'
|
8
|
+
end
|
9
|
+
|
10
|
+
DEFAULT_OPTIONS = {
|
11
|
+
filename: $stdin,
|
12
|
+
|
13
|
+
# We're expecting line-separated json objects
|
14
|
+
jsonl: true
|
15
|
+
}.freeze
|
16
|
+
|
17
|
+
def initialize(options = {})
|
18
|
+
super(DEFAULT_OPTIONS.merge(options))
|
19
|
+
end
|
20
|
+
|
21
|
+
def extract
|
22
|
+
load_input do |input|
|
23
|
+
parsed_data = parse_data(input)
|
24
|
+
yield Chronicle::ETL::Extraction.new(data: parsed_data) if parsed_data
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def results_count
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def parse_data data
|
34
|
+
JSON.parse(data)
|
35
|
+
rescue JSON::ParserError => e
|
36
|
+
end
|
37
|
+
|
38
|
+
def load_input
|
39
|
+
read_from_filesystem(filename: @options[:filename]) do |data|
|
40
|
+
yield data
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -1,9 +1,14 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
3
|
class StdinExtractor < Chronicle::ETL::Extractor
|
4
|
+
register_connector do |r|
|
5
|
+
r.description = 'stdin'
|
6
|
+
end
|
7
|
+
|
4
8
|
def extract
|
5
9
|
$stdin.read.each_line do |line|
|
6
|
-
|
10
|
+
data = { line: line.strip }
|
11
|
+
yield Chronicle::ETL::Extraction.new(data: data)
|
7
12
|
end
|
8
13
|
end
|
9
14
|
end
|
data/lib/chronicle/etl/job.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
+
require 'forwardable'
|
1
2
|
module Chronicle
|
2
3
|
module ETL
|
3
4
|
class Job
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
def_delegators :@job_definition, :dry_run?
|
8
|
+
|
4
9
|
attr_accessor :name,
|
5
10
|
:extractor_klass,
|
6
11
|
:extractor_options,
|
@@ -12,32 +17,30 @@ module Chronicle
|
|
12
17
|
# TODO: build a proper id system
|
13
18
|
alias id name
|
14
19
|
|
15
|
-
def initialize(
|
16
|
-
|
17
|
-
@name = definition[:name]
|
18
|
-
@
|
19
|
-
@
|
20
|
-
|
21
|
-
@transformer_klass = load_klass(:transformer, definition[:transformer][:name])
|
22
|
-
@transformer_options = definition[:transformer][:options] || {}
|
23
|
-
|
24
|
-
@loader_klass = load_klass(:loader, definition[:loader][:name])
|
25
|
-
@loader_options = definition[:loader][:options] || {}
|
20
|
+
def initialize(job_definition)
|
21
|
+
@job_definition = job_definition
|
22
|
+
@name = @job_definition.definition[:name]
|
23
|
+
@extractor_options = @job_definition.extractor_options
|
24
|
+
@transformer_options = @job_definition.transformer_options
|
25
|
+
@loader_options = @job_definition.loader_options
|
26
26
|
|
27
|
-
set_continuation if
|
27
|
+
set_continuation if use_continuation?
|
28
28
|
yield self if block_given?
|
29
29
|
end
|
30
30
|
|
31
31
|
def instantiate_extractor
|
32
|
-
|
32
|
+
@extractor_klass = @job_definition.extractor_klass
|
33
|
+
@extractor_klass.new(@extractor_options)
|
33
34
|
end
|
34
35
|
|
35
|
-
def instantiate_transformer(
|
36
|
-
|
36
|
+
def instantiate_transformer(extraction)
|
37
|
+
@transformer_klass = @job_definition.transformer_klass
|
38
|
+
@transformer_klass.new(@transformer_options, extraction)
|
37
39
|
end
|
38
40
|
|
39
41
|
def instantiate_loader
|
40
|
-
|
42
|
+
@loader_klass = @job_definition.loader_klass
|
43
|
+
@loader_klass.new(@loader_options)
|
41
44
|
end
|
42
45
|
|
43
46
|
def save_log?
|
@@ -45,26 +48,24 @@ module Chronicle
|
|
45
48
|
return !id.nil?
|
46
49
|
end
|
47
50
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
51
|
+
def to_s
|
52
|
+
output = "Job"
|
53
|
+
output += " '#{name}'".bold if name
|
54
|
+
output += "\n"
|
55
|
+
output += " → Extracting from #{@job_definition.extractor_klass.description}\n"
|
56
|
+
output += " → Transforming #{@job_definition.transformer_klass.description}\n"
|
57
|
+
output += " → Loading to #{@job_definition.loader_klass.description}\n"
|
55
58
|
end
|
56
59
|
|
57
|
-
|
58
|
-
Chronicle::ETL::Catalog.phase_and_identifier_to_klass(phase, identifier)
|
59
|
-
end
|
60
|
+
private
|
60
61
|
|
61
62
|
def set_continuation
|
62
|
-
continuation = Chronicle::ETL::JobLogger.load_latest(@
|
63
|
+
continuation = Chronicle::ETL::JobLogger.load_latest(@id)
|
63
64
|
@extractor_options[:continuation] = continuation
|
64
65
|
end
|
65
66
|
|
66
|
-
def
|
67
|
-
|
67
|
+
def use_continuation?
|
68
|
+
@job_definition.incremental?
|
68
69
|
end
|
69
70
|
end
|
70
71
|
end
|
@@ -1,19 +1,20 @@
|
|
1
|
-
require 'deep_merge'
|
1
|
+
require 'active_support/core_ext/hash/deep_merge'
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
5
5
|
class JobDefinition
|
6
6
|
SKELETON_DEFINITION = {
|
7
|
+
incremental: false,
|
7
8
|
extractor: {
|
8
|
-
name:
|
9
|
+
name: 'stdin',
|
9
10
|
options: {}
|
10
11
|
},
|
11
12
|
transformer: {
|
12
|
-
name:
|
13
|
+
name: 'null',
|
13
14
|
options: {}
|
14
15
|
},
|
15
16
|
loader: {
|
16
|
-
name:
|
17
|
+
name: 'stdout',
|
17
18
|
options: {}
|
18
19
|
}
|
19
20
|
}.freeze
|
@@ -26,16 +27,53 @@ module Chronicle
|
|
26
27
|
|
27
28
|
# Add config hash to this definition
|
28
29
|
def add_config(config = {})
|
29
|
-
@definition =
|
30
|
+
@definition = @definition.deep_merge(config)
|
30
31
|
load_credentials
|
31
32
|
validate
|
32
33
|
end
|
33
34
|
|
35
|
+
# Is this job continuing from a previous run?
|
36
|
+
def incremental?
|
37
|
+
@definition[:incremental]
|
38
|
+
end
|
39
|
+
|
40
|
+
def dry_run?
|
41
|
+
@definition[:dry_run]
|
42
|
+
end
|
43
|
+
|
44
|
+
def extractor_klass
|
45
|
+
load_klass(:extractor, @definition[:extractor][:name])
|
46
|
+
end
|
47
|
+
|
48
|
+
def transformer_klass
|
49
|
+
load_klass(:transformer, @definition[:transformer][:name])
|
50
|
+
end
|
51
|
+
|
52
|
+
def loader_klass
|
53
|
+
load_klass(:loader, @definition[:loader][:name])
|
54
|
+
end
|
55
|
+
|
56
|
+
def extractor_options
|
57
|
+
@definition[:extractor][:options]
|
58
|
+
end
|
59
|
+
|
60
|
+
def transformer_options
|
61
|
+
@definition[:transformer][:options]
|
62
|
+
end
|
63
|
+
|
64
|
+
def loader_options
|
65
|
+
@definition[:loader][:options]
|
66
|
+
end
|
67
|
+
|
34
68
|
private
|
35
69
|
|
70
|
+
def load_klass(phase, identifier)
|
71
|
+
Chronicle::ETL::Registry.find_by_phase_and_identifier(phase, identifier).klass
|
72
|
+
end
|
73
|
+
|
36
74
|
def load_credentials
|
37
|
-
Chronicle::ETL::
|
38
|
-
credentials_name = @definition[phase]
|
75
|
+
Chronicle::ETL::Registry::PHASES.each do |phase|
|
76
|
+
credentials_name = @definition[phase].dig(:options, :credentials)
|
39
77
|
if credentials_name
|
40
78
|
credentials = Chronicle::ETL::Config.load_credentials(credentials_name)
|
41
79
|
@definition[phase][:options].deep_merge(credentials)
|
@@ -50,11 +50,21 @@ module Chronicle
|
|
50
50
|
@success = true
|
51
51
|
end
|
52
52
|
|
53
|
+
def error
|
54
|
+
@finished_at = Time.now
|
55
|
+
end
|
56
|
+
|
53
57
|
def job= job
|
54
58
|
@job = job
|
55
59
|
@job_id = job.id
|
56
60
|
end
|
57
61
|
|
62
|
+
def duration
|
63
|
+
return unless @finished_at
|
64
|
+
|
65
|
+
@finished_at - @started_at
|
66
|
+
end
|
67
|
+
|
58
68
|
# Take a JobLog's instance variables and turn them into a hash representation
|
59
69
|
def serialize
|
60
70
|
{
|
@@ -1,32 +1,14 @@
|
|
1
1
|
require 'sequel'
|
2
2
|
require 'forwardable'
|
3
3
|
|
4
|
-
require 'pry'
|
5
|
-
|
6
4
|
module Chronicle
|
7
5
|
module ETL
|
8
6
|
# Saves JobLogs to db and loads previous ones
|
9
7
|
class JobLogger
|
10
8
|
extend Forwardable
|
11
9
|
|
12
|
-
def_delegators :@job_log, :start, :finish, :log_transformation
|
13
|
-
|
14
|
-
# Create a new JobLogger
|
15
|
-
def initialize(job)
|
16
|
-
@job_log = JobLog.new do |job_log|
|
17
|
-
job_log.job = job
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
# Save this JobLogger's JobLog to db
|
22
|
-
def save
|
23
|
-
return unless @job_log.save_log?
|
24
|
-
|
25
|
-
JobLogger.with_db_connection do |db|
|
26
|
-
dataset = db[:job_logs]
|
27
|
-
dataset.insert(@job_log.serialize)
|
28
|
-
end
|
29
|
-
end
|
10
|
+
def_delegators :@job_log, :start, :finish, :error, :log_transformation, :duration, :success
|
11
|
+
attr_accessor :job_log
|
30
12
|
|
31
13
|
# For a given `job_id`, return the last successful log
|
32
14
|
def self.load_latest(job_id)
|
@@ -73,6 +55,27 @@ module Chronicle
|
|
73
55
|
Time :finished_at
|
74
56
|
end
|
75
57
|
end
|
58
|
+
|
59
|
+
# Create a new JobLogger
|
60
|
+
def initialize(job)
|
61
|
+
@job_log = JobLog.new do |job_log|
|
62
|
+
job_log.job = job
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Save this JobLogger's JobLog to db
|
67
|
+
def save
|
68
|
+
return unless @job_log.save_log?
|
69
|
+
|
70
|
+
JobLogger.with_db_connection do |db|
|
71
|
+
dataset = db[:job_logs]
|
72
|
+
dataset.insert(@job_log.serialize)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def summarize
|
77
|
+
@job_log.inspect
|
78
|
+
end
|
76
79
|
end
|
77
80
|
end
|
78
81
|
end
|
@@ -2,7 +2,7 @@ module Chronicle
|
|
2
2
|
module ETL
|
3
3
|
# Abstract class representing a Loader for an ETL job
|
4
4
|
class Loader
|
5
|
-
extend Chronicle::ETL::
|
5
|
+
extend Chronicle::ETL::Registry::SelfRegistering
|
6
6
|
|
7
7
|
# Construct a new instance of this loader. Options are passed in from a Runner
|
8
8
|
# == Parameters:
|
@@ -5,12 +5,16 @@ require 'json'
|
|
5
5
|
module Chronicle
|
6
6
|
module ETL
|
7
7
|
class RestLoader < Chronicle::ETL::Loader
|
8
|
+
register_connector do |r|
|
9
|
+
r.description = 'a REST endpoint'
|
10
|
+
end
|
11
|
+
|
8
12
|
def initialize( options={} )
|
9
13
|
super(options)
|
10
14
|
end
|
11
15
|
|
12
16
|
def load(record)
|
13
|
-
payload = Chronicle::ETL::
|
17
|
+
payload = Chronicle::ETL::JSONAPISerializer.serialize(record)
|
14
18
|
# have the outer data key that json-api expects
|
15
19
|
payload = { data: payload } unless payload[:data]
|
16
20
|
|
@@ -1,8 +1,13 @@
|
|
1
1
|
module Chronicle
|
2
2
|
module ETL
|
3
3
|
class StdoutLoader < Chronicle::ETL::Loader
|
4
|
+
register_connector do |r|
|
5
|
+
r.description = 'stdout'
|
6
|
+
end
|
7
|
+
|
4
8
|
def load(record)
|
5
|
-
|
9
|
+
serializer = Chronicle::ETL::JSONAPISerializer.new(record)
|
10
|
+
puts serializer.serializable_hash.to_json
|
6
11
|
end
|
7
12
|
end
|
8
13
|
end
|
@@ -1,21 +1,71 @@
|
|
1
1
|
require 'tty/table'
|
2
|
+
require 'active_support/core_ext/string/filters'
|
3
|
+
require 'active_support/core_ext/hash/reverse_merge'
|
2
4
|
|
3
5
|
module Chronicle
|
4
6
|
module ETL
|
5
7
|
class TableLoader < Chronicle::ETL::Loader
|
6
|
-
|
7
|
-
|
8
|
+
register_connector do |r|
|
9
|
+
r.description = 'an ASCII table'
|
10
|
+
end
|
11
|
+
|
12
|
+
DEFAULT_OPTIONS = {
|
13
|
+
fields_limit: nil,
|
14
|
+
fields_exclude: ['lids', 'type'],
|
15
|
+
fields_include: [],
|
16
|
+
truncate_values_at: nil,
|
17
|
+
table_renderer: :basic
|
18
|
+
}.freeze
|
19
|
+
|
20
|
+
def initialize(options={})
|
21
|
+
@options = options.reverse_merge(DEFAULT_OPTIONS)
|
22
|
+
@records = []
|
8
23
|
end
|
9
24
|
|
10
25
|
def load(record)
|
11
|
-
|
12
|
-
@table ||= TTY::Table.new(header: record_hash.keys)
|
13
|
-
values = record_hash.values.map{|x| x.to_s[0..30]}
|
14
|
-
@table << values
|
26
|
+
@records << record.to_h_flattened
|
15
27
|
end
|
16
28
|
|
17
29
|
def finish
|
18
|
-
|
30
|
+
return if @records.empty?
|
31
|
+
|
32
|
+
headers = build_headers(@records)
|
33
|
+
rows = build_rows(@records, headers)
|
34
|
+
|
35
|
+
@table = TTY::Table.new(header: headers, rows: rows)
|
36
|
+
puts @table.render(
|
37
|
+
@options[:table_renderer].to_sym,
|
38
|
+
padding: [0, 2, 0, 0]
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def build_headers(records)
|
45
|
+
headers =
|
46
|
+
if @options[:fields_include].any?
|
47
|
+
Set[*@options[:fields_include]]
|
48
|
+
else
|
49
|
+
# use all the keys of the flattened record hash
|
50
|
+
Set[*records.map(&:keys).flatten.map(&:to_s).uniq]
|
51
|
+
end
|
52
|
+
|
53
|
+
headers = headers.delete_if { |header| header.end_with?(*@options[:fields_exclude]) } if @options[:fields_exclude].any?
|
54
|
+
headers = headers.first(@options[:fields_limit]) if @options[:fields_limit]
|
55
|
+
|
56
|
+
headers.to_a.map(&:to_sym)
|
57
|
+
end
|
58
|
+
|
59
|
+
def build_rows(records, headers)
|
60
|
+
records.map do |record|
|
61
|
+
values = record.values_at(*headers).map{|value| value.to_s }
|
62
|
+
|
63
|
+
if @options[:truncate_values_at]
|
64
|
+
values = values.map{ |value| value.truncate(@options[:truncate_values_at]) }
|
65
|
+
end
|
66
|
+
|
67
|
+
values
|
68
|
+
end
|
19
69
|
end
|
20
70
|
end
|
21
71
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module Logger
|
4
|
+
extend self
|
5
|
+
|
6
|
+
DEBUG = 0
|
7
|
+
INFO = 1
|
8
|
+
WARN = 2
|
9
|
+
ERROR = 3
|
10
|
+
FATAL = 4
|
11
|
+
|
12
|
+
attr_accessor :log_level
|
13
|
+
|
14
|
+
@log_level = INFO
|
15
|
+
@destination = $stderr
|
16
|
+
|
17
|
+
def output message, level
|
18
|
+
return unless level >= @log_level
|
19
|
+
|
20
|
+
if @progress_bar
|
21
|
+
@progress_bar.log(message)
|
22
|
+
else
|
23
|
+
@destination.puts(message)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def error(message)
|
28
|
+
output(message, ERROR)
|
29
|
+
end
|
30
|
+
|
31
|
+
def info(message)
|
32
|
+
output(message, INFO)
|
33
|
+
end
|
34
|
+
|
35
|
+
def debug(message)
|
36
|
+
output(message, DEBUG)
|
37
|
+
end
|
38
|
+
|
39
|
+
def attach_to_progress_bar(progress_bar)
|
40
|
+
@progress_bar = progress_bar
|
41
|
+
end
|
42
|
+
|
43
|
+
def detach_from_progress_bar
|
44
|
+
@progress_bar = nil
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'chronicle/etl/models/base'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
module Models
|
6
|
+
class Attachment < Chronicle::ETL::Models::Base
|
7
|
+
TYPE = 'attachments'.freeze
|
8
|
+
ATTRIBUTES = [:url_original, :data].freeze
|
9
|
+
|
10
|
+
attr_accessor(*ATTRIBUTES)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -6,7 +6,7 @@ module Chronicle
|
|
6
6
|
# Represents a record that's been transformed by a Transformer and
|
7
7
|
# ready to be loaded. Loosely based on ActiveModel.
|
8
8
|
class Base
|
9
|
-
ATTRIBUTES = [:provider, :provider_id, :lat, :lng].freeze
|
9
|
+
ATTRIBUTES = [:provider, :provider_id, :lat, :lng, :metadata].freeze
|
10
10
|
ASSOCIATIONS = [].freeze
|
11
11
|
|
12
12
|
attr_accessor(:id, :dedupe_on, *ATTRIBUTES)
|
@@ -14,6 +14,7 @@ module Chronicle
|
|
14
14
|
def initialize(attributes = {})
|
15
15
|
assign_attributes(attributes) if attributes
|
16
16
|
@dedupe_on = []
|
17
|
+
@metadata = {}
|
17
18
|
end
|
18
19
|
|
19
20
|
# A unique identifier for this model is formed from a type
|
@@ -36,6 +37,8 @@ module Chronicle
|
|
36
37
|
# For a given set of fields of this model, generate a
|
37
38
|
# unique local id by hashing the field values
|
38
39
|
def generate_lid fields
|
40
|
+
raise ArgumentError.new("Must provide an array of symbolized fields") unless fields.is_a?(Array)
|
41
|
+
|
39
42
|
values = fields.sort.map do |field|
|
40
43
|
instance_variable = "@#{field.to_s}"
|
41
44
|
self.instance_variable_get(instance_variable)
|
@@ -75,9 +78,21 @@ module Chronicle
|
|
75
78
|
end
|
76
79
|
|
77
80
|
def associations_hash
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
+
associations.map do |k, v|
|
82
|
+
if v.is_a?(Array)
|
83
|
+
[k, v.map(&:to_h)]
|
84
|
+
else
|
85
|
+
[k, v.to_h]
|
86
|
+
end
|
87
|
+
end.to_h
|
88
|
+
end
|
89
|
+
|
90
|
+
def meta_hash
|
91
|
+
{
|
92
|
+
meta: {
|
93
|
+
dedupe_on: @dedupe_on.map{|d| d.map(&:to_s).join(",")}
|
94
|
+
}
|
95
|
+
}
|
81
96
|
end
|
82
97
|
|
83
98
|
# FIXME: move this to a Utils module
|
@@ -86,11 +101,12 @@ module Chronicle
|
|
86
101
|
end
|
87
102
|
|
88
103
|
def to_h
|
89
|
-
identifier_hash
|
104
|
+
identifier_hash
|
105
|
+
.merge(attributes)
|
106
|
+
.merge(associations_hash)
|
107
|
+
.merge(meta_hash)
|
90
108
|
end
|
91
109
|
|
92
|
-
private
|
93
|
-
|
94
110
|
def assign_attributes attributes
|
95
111
|
attributes.each do |k, v|
|
96
112
|
setter = :"#{k}="
|
@@ -5,10 +5,16 @@ module Chronicle
|
|
5
5
|
module Models
|
6
6
|
class Entity < Chronicle::ETL::Models::Base
|
7
7
|
TYPE = 'entities'.freeze
|
8
|
-
ATTRIBUTES = [:title, :body, :represents, :slug].freeze
|
9
|
-
ASSOCIATIONS = [
|
8
|
+
ATTRIBUTES = [:title, :body, :represents, :slug, :myself, :metadata].freeze
|
9
|
+
ASSOCIATIONS = [
|
10
|
+
:attachments,
|
11
|
+
:abouts,
|
12
|
+
:depicts,
|
13
|
+
:consumers,
|
14
|
+
:contains
|
15
|
+
].freeze # TODO: add these to reflect Chronicle Schema
|
10
16
|
|
11
|
-
attr_accessor(*ATTRIBUTES)
|
17
|
+
attr_accessor(*ATTRIBUTES, *ASSOCIATIONS)
|
12
18
|
end
|
13
19
|
end
|
14
20
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module Registry
|
4
|
+
# Records details about a connector such as its provider and a description
|
5
|
+
class ConnectorRegistration
|
6
|
+
attr_accessor :identifier, :provider, :klass, :description
|
7
|
+
|
8
|
+
def initialize(klass)
|
9
|
+
@klass = klass
|
10
|
+
end
|
11
|
+
|
12
|
+
def phase
|
13
|
+
if klass.ancestors.include? Chronicle::ETL::Extractor
|
14
|
+
:extractor
|
15
|
+
elsif klass.ancestors.include? Chronicle::ETL::Transformer
|
16
|
+
:transformer
|
17
|
+
elsif klass.ancestors.include? Chronicle::ETL::Loader
|
18
|
+
:loader
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s
|
23
|
+
"#{phase}-#{identifier}"
|
24
|
+
end
|
25
|
+
|
26
|
+
def built_in?
|
27
|
+
@klass.to_s.include? 'Chronicle::ETL'
|
28
|
+
end
|
29
|
+
|
30
|
+
def klass_name
|
31
|
+
@klass.to_s
|
32
|
+
end
|
33
|
+
|
34
|
+
def identifier
|
35
|
+
@identifier || @klass.to_s.split('::').last.gsub!(/(Extractor$|Loader$|Transformer$)/, '').downcase
|
36
|
+
end
|
37
|
+
|
38
|
+
def description
|
39
|
+
@description || @klass.to_s.split('::').last
|
40
|
+
end
|
41
|
+
|
42
|
+
def provider
|
43
|
+
@provider || (built_in? ? 'chronicle' : '')
|
44
|
+
end
|
45
|
+
|
46
|
+
def descriptive_phrase
|
47
|
+
prefix = case phase
|
48
|
+
when :extractor
|
49
|
+
"Extracts from"
|
50
|
+
when :transformer
|
51
|
+
"Transforms"
|
52
|
+
when :loader
|
53
|
+
"Loads to"
|
54
|
+
end
|
55
|
+
|
56
|
+
"#{prefix} #{description}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|