chronicle-etl 0.1.4 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.yardopts +1 -0
- data/Gemfile.lock +15 -1
- data/README.md +31 -13
- data/chronicle-etl.gemspec +6 -1
- data/exe/chronicle-etl +2 -2
- data/lib/chronicle/etl.rb +15 -2
- data/lib/chronicle/etl/catalog.rb +67 -17
- data/lib/chronicle/etl/cli/connectors.rb +32 -0
- data/lib/chronicle/etl/cli/jobs.rb +116 -0
- data/lib/chronicle/etl/cli/main.rb +83 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
- data/lib/chronicle/etl/config.rb +53 -0
- data/lib/chronicle/etl/exceptions.rb +19 -0
- data/lib/chronicle/etl/extractors/csv_extractor.rb +2 -3
- data/lib/chronicle/etl/extractors/extractor.rb +21 -5
- data/lib/chronicle/etl/extractors/file_extractor.rb +2 -2
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +2 -2
- data/lib/chronicle/etl/job.rb +71 -0
- data/lib/chronicle/etl/job_definition.rb +51 -0
- data/lib/chronicle/etl/job_log.rb +85 -0
- data/lib/chronicle/etl/job_logger.rb +78 -0
- data/lib/chronicle/etl/loaders/csv_loader.rb +4 -8
- data/lib/chronicle/etl/loaders/loader.rb +11 -2
- data/lib/chronicle/etl/loaders/rest_loader.rb +33 -0
- data/lib/chronicle/etl/loaders/stdout_loader.rb +5 -5
- data/lib/chronicle/etl/loaders/table_loader.rb +7 -6
- data/lib/chronicle/etl/models/activity.rb +15 -0
- data/lib/chronicle/etl/models/base.rb +103 -0
- data/lib/chronicle/etl/models/entity.rb +15 -0
- data/lib/chronicle/etl/models/generic.rb +23 -0
- data/lib/chronicle/etl/runner.rb +24 -46
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -6
- data/lib/chronicle/etl/transformers/transformer.rb +23 -7
- data/lib/chronicle/etl/utils/hash_utilities.rb +19 -0
- data/lib/chronicle/etl/utils/jsonapi.rb +28 -0
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -2
- data/lib/chronicle/etl/version.rb +2 -2
- metadata +91 -5
- data/CHANGELOG.md +0 -23
- data/lib/chronicle/etl/cli.rb +0 -56
- data/lib/chronicle/etl/transformers/json_transformer.rb +0 -11
@@ -1,10 +1,9 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
3
|
-
class NullTransformer < Chronicle::
|
4
|
-
def transform
|
5
|
-
|
2
|
+
module ETL
|
3
|
+
class NullTransformer < Chronicle::ETL::Transformer
|
4
|
+
def transform
|
5
|
+
Chronicle::ETL::Models::Generic.new(@data)
|
6
6
|
end
|
7
7
|
end
|
8
|
-
|
9
8
|
end
|
10
|
-
end
|
9
|
+
end
|
@@ -1,18 +1,34 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
2
|
+
module ETL
|
3
|
+
# Abstract class representing an Transformer for an ETL job
|
3
4
|
class Transformer
|
4
|
-
extend Chronicle::
|
5
|
+
extend Chronicle::ETL::Catalog
|
5
6
|
|
6
|
-
|
7
|
+
# Construct a new instance of this transformer. Options are passed in from a Runner
|
8
|
+
# == Paramters:
|
9
|
+
# options::
|
10
|
+
# Options for configuring this Transformer
|
11
|
+
def initialize(options = {}, data)
|
7
12
|
@options = options
|
13
|
+
@data = data
|
14
|
+
@record = Chronicle::ETL::Models::Activity.new
|
8
15
|
end
|
9
16
|
|
10
|
-
|
11
|
-
|
12
|
-
|
17
|
+
# @abstract Subclass is expected to implement #transform
|
18
|
+
# @!method transform
|
19
|
+
# The main entrypoint for transforming a record. Called by a Runner on each extracted record
|
20
|
+
|
21
|
+
# The domain or provider-specific id of the record this transformer is working on.
|
22
|
+
# Used for building a cursor so an extractor doesn't have to start from the beginning of a
|
23
|
+
# data source from the beginning.
|
24
|
+
def id; end
|
25
|
+
|
26
|
+
# The domain or provider-specific timestamp of the record this transformer is working on.
|
27
|
+
# Used for building a cursor so an extractor doesn't have to start from the beginning of a
|
28
|
+
# data source from the beginning.
|
29
|
+
def timestamp; end
|
13
30
|
end
|
14
31
|
end
|
15
32
|
end
|
16
33
|
|
17
|
-
require_relative 'json_transformer'
|
18
34
|
require_relative 'null_transformer'
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module Utils
|
4
|
+
module HashUtilities
|
5
|
+
def self.flatten_hash(hash)
|
6
|
+
hash.each_with_object({}) do |(k, v), h|
|
7
|
+
if v.is_a? Hash
|
8
|
+
flatten_hash(v).map do |h_k, h_v|
|
9
|
+
h["#{k}.#{h_k}".to_sym] = h_v
|
10
|
+
end
|
11
|
+
else
|
12
|
+
h[k] = v
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module Utils
|
4
|
+
module JSONAPI
|
5
|
+
# For a given Chronicle::ETL::Model, serialize it as jsonapi
|
6
|
+
def self.serialize(record)
|
7
|
+
return unless record.is_a? Chronicle::ETL::Models::Base
|
8
|
+
|
9
|
+
obj = record.identifier_hash
|
10
|
+
obj[:attributes] = record.attributes
|
11
|
+
|
12
|
+
relationships = Hash[record.associations.map do |k, v|
|
13
|
+
if v.is_a?(Array)
|
14
|
+
data = { data: v.map{ |association| serialize(association) } }
|
15
|
+
else
|
16
|
+
data = { data: serialize(v) }
|
17
|
+
end
|
18
|
+
|
19
|
+
[k, data]
|
20
|
+
end]
|
21
|
+
|
22
|
+
obj[:relationships] = relationships if relationships.any?
|
23
|
+
obj
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -2,7 +2,7 @@ require 'tty/progressbar'
|
|
2
2
|
require 'colorize'
|
3
3
|
|
4
4
|
module Chronicle
|
5
|
-
module
|
5
|
+
module ETL
|
6
6
|
module Utils
|
7
7
|
|
8
8
|
class ProgressBar
|
@@ -64,7 +64,7 @@ module Chronicle
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def log(message)
|
67
|
-
@pbar.log message
|
67
|
+
@pbar.log message
|
68
68
|
end
|
69
69
|
|
70
70
|
def finish
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chronicle-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Louis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -66,6 +66,34 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.17'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sequel
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '5.35'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '5.35'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: deep_merge
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.2'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.2'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
98
|
name: bundler
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,6 +150,48 @@ dependencies:
|
|
122
150
|
- - "~>"
|
123
151
|
- !ruby/object:Gem::Version
|
124
152
|
version: '3.9'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: runcom
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '6.2'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '6.2'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: redcarpet
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '3.5'
|
174
|
+
type: :development
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '3.5'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: sqlite3
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - "~>"
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '1.4'
|
188
|
+
type: :development
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '1.4'
|
125
195
|
description: Chronicle-ETL allows you to extract personal data from a variety of services,
|
126
196
|
transformer it, and load it.
|
127
197
|
email:
|
@@ -133,9 +203,10 @@ extra_rdoc_files: []
|
|
133
203
|
files:
|
134
204
|
- ".gitignore"
|
135
205
|
- ".rspec"
|
206
|
+
- ".rubocop.yml"
|
136
207
|
- ".ruby-version"
|
137
208
|
- ".travis.yml"
|
138
|
-
-
|
209
|
+
- ".yardopts"
|
139
210
|
- CODE_OF_CONDUCT.md
|
140
211
|
- Gemfile
|
141
212
|
- Gemfile.lock
|
@@ -148,19 +219,34 @@ files:
|
|
148
219
|
- exe/chronicle-etl
|
149
220
|
- lib/chronicle/etl.rb
|
150
221
|
- lib/chronicle/etl/catalog.rb
|
151
|
-
- lib/chronicle/etl/cli.rb
|
222
|
+
- lib/chronicle/etl/cli/connectors.rb
|
223
|
+
- lib/chronicle/etl/cli/jobs.rb
|
224
|
+
- lib/chronicle/etl/cli/main.rb
|
225
|
+
- lib/chronicle/etl/cli/subcommand_base.rb
|
226
|
+
- lib/chronicle/etl/config.rb
|
227
|
+
- lib/chronicle/etl/exceptions.rb
|
152
228
|
- lib/chronicle/etl/extractors/csv_extractor.rb
|
153
229
|
- lib/chronicle/etl/extractors/extractor.rb
|
154
230
|
- lib/chronicle/etl/extractors/file_extractor.rb
|
155
231
|
- lib/chronicle/etl/extractors/stdin_extractor.rb
|
232
|
+
- lib/chronicle/etl/job.rb
|
233
|
+
- lib/chronicle/etl/job_definition.rb
|
234
|
+
- lib/chronicle/etl/job_log.rb
|
235
|
+
- lib/chronicle/etl/job_logger.rb
|
156
236
|
- lib/chronicle/etl/loaders/csv_loader.rb
|
157
237
|
- lib/chronicle/etl/loaders/loader.rb
|
238
|
+
- lib/chronicle/etl/loaders/rest_loader.rb
|
158
239
|
- lib/chronicle/etl/loaders/stdout_loader.rb
|
159
240
|
- lib/chronicle/etl/loaders/table_loader.rb
|
241
|
+
- lib/chronicle/etl/models/activity.rb
|
242
|
+
- lib/chronicle/etl/models/base.rb
|
243
|
+
- lib/chronicle/etl/models/entity.rb
|
244
|
+
- lib/chronicle/etl/models/generic.rb
|
160
245
|
- lib/chronicle/etl/runner.rb
|
161
|
-
- lib/chronicle/etl/transformers/json_transformer.rb
|
162
246
|
- lib/chronicle/etl/transformers/null_transformer.rb
|
163
247
|
- lib/chronicle/etl/transformers/transformer.rb
|
248
|
+
- lib/chronicle/etl/utils/hash_utilities.rb
|
249
|
+
- lib/chronicle/etl/utils/jsonapi.rb
|
164
250
|
- lib/chronicle/etl/utils/progress_bar.rb
|
165
251
|
- lib/chronicle/etl/version.rb
|
166
252
|
homepage: https://github.com/chronicle-app
|
data/CHANGELOG.md
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
# Changelog
|
2
|
-
|
3
|
-
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
4
|
-
|
5
|
-
## [0.1.4] - 2020-08-18
|
6
|
-
### Updated
|
7
|
-
- Better display of available ETL classes
|
8
|
-
- Updated documentation
|
9
|
-
|
10
|
-
## [0.1.3] - 2020-08-13
|
11
|
-
### Added
|
12
|
-
- Ability to list all available ETL classes
|
13
|
-
- Refactored E, T, L module and class structure
|
14
|
-
- Better progress bar
|
15
|
-
|
16
|
-
## [0.1.2] - 2020-08-02
|
17
|
-
### Added
|
18
|
-
- This changelog
|
19
|
-
- Ability to use extractors, transformers, and loaders from other gems
|
20
|
-
|
21
|
-
## [0.1.0] - 2020-08-01
|
22
|
-
### Added
|
23
|
-
- Basic job runner and ETL classes
|
data/lib/chronicle/etl/cli.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
require 'thor'
|
2
|
-
require 'chronicle/etl'
|
3
|
-
require 'colorize'
|
4
|
-
|
5
|
-
module Chronicle
|
6
|
-
module Etl
|
7
|
-
class CLI < Thor
|
8
|
-
default_task :job
|
9
|
-
|
10
|
-
desc 'job', 'Runs an ETL job'
|
11
|
-
method_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
|
12
|
-
method_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
13
|
-
method_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
|
14
|
-
method_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
15
|
-
method_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
|
16
|
-
method_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
17
|
-
method_option :job, aliases: '-j', desc: 'Job configuration file'
|
18
|
-
|
19
|
-
def job
|
20
|
-
runner_options = {
|
21
|
-
extractor: {
|
22
|
-
name: options[:extractor],
|
23
|
-
options: options[:'extractor-opts']
|
24
|
-
},
|
25
|
-
transformer: {
|
26
|
-
name: options[:transformer],
|
27
|
-
options: options[:'transformer-opts']
|
28
|
-
},
|
29
|
-
loader: {
|
30
|
-
name: options[:loader],
|
31
|
-
options: options[:'loader-opts']
|
32
|
-
}
|
33
|
-
}
|
34
|
-
|
35
|
-
runner = Runner.new(runner_options)
|
36
|
-
runner.run!
|
37
|
-
end
|
38
|
-
|
39
|
-
# FIXME: namespace this differently
|
40
|
-
desc 'list', 'List all ETL classes'
|
41
|
-
def list
|
42
|
-
klasses = Chronicle::Etl::Catalog.available_classes
|
43
|
-
klasses = klasses.sort_by do |a|
|
44
|
-
[a[:built_in].to_s, a[:provider], a[:phase]]
|
45
|
-
end
|
46
|
-
|
47
|
-
headers = klasses.first.keys.map do |key|
|
48
|
-
key.to_s.capitalize.light_white
|
49
|
-
end
|
50
|
-
|
51
|
-
table = TTY::Table.new(headers, klasses.map(&:values))
|
52
|
-
puts table.render(padding: [0, 2])
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|