chronicle-etl 0.1.4 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.yardopts +1 -0
- data/Gemfile.lock +15 -1
- data/README.md +31 -13
- data/chronicle-etl.gemspec +6 -1
- data/exe/chronicle-etl +2 -2
- data/lib/chronicle/etl.rb +15 -2
- data/lib/chronicle/etl/catalog.rb +67 -17
- data/lib/chronicle/etl/cli/connectors.rb +32 -0
- data/lib/chronicle/etl/cli/jobs.rb +116 -0
- data/lib/chronicle/etl/cli/main.rb +83 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
- data/lib/chronicle/etl/config.rb +53 -0
- data/lib/chronicle/etl/exceptions.rb +19 -0
- data/lib/chronicle/etl/extractors/csv_extractor.rb +2 -3
- data/lib/chronicle/etl/extractors/extractor.rb +21 -5
- data/lib/chronicle/etl/extractors/file_extractor.rb +2 -2
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +2 -2
- data/lib/chronicle/etl/job.rb +71 -0
- data/lib/chronicle/etl/job_definition.rb +51 -0
- data/lib/chronicle/etl/job_log.rb +85 -0
- data/lib/chronicle/etl/job_logger.rb +78 -0
- data/lib/chronicle/etl/loaders/csv_loader.rb +4 -8
- data/lib/chronicle/etl/loaders/loader.rb +11 -2
- data/lib/chronicle/etl/loaders/rest_loader.rb +33 -0
- data/lib/chronicle/etl/loaders/stdout_loader.rb +5 -5
- data/lib/chronicle/etl/loaders/table_loader.rb +7 -6
- data/lib/chronicle/etl/models/activity.rb +15 -0
- data/lib/chronicle/etl/models/base.rb +103 -0
- data/lib/chronicle/etl/models/entity.rb +15 -0
- data/lib/chronicle/etl/models/generic.rb +23 -0
- data/lib/chronicle/etl/runner.rb +24 -46
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -6
- data/lib/chronicle/etl/transformers/transformer.rb +23 -7
- data/lib/chronicle/etl/utils/hash_utilities.rb +19 -0
- data/lib/chronicle/etl/utils/jsonapi.rb +28 -0
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -2
- data/lib/chronicle/etl/version.rb +2 -2
- metadata +91 -5
- data/CHANGELOG.md +0 -23
- data/lib/chronicle/etl/cli.rb +0 -56
- data/lib/chronicle/etl/transformers/json_transformer.rb +0 -11
@@ -1,10 +1,9 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
3
|
-
class NullTransformer < Chronicle::
|
4
|
-
def transform
|
5
|
-
|
2
|
+
module ETL
|
3
|
+
class NullTransformer < Chronicle::ETL::Transformer
|
4
|
+
def transform
|
5
|
+
Chronicle::ETL::Models::Generic.new(@data)
|
6
6
|
end
|
7
7
|
end
|
8
|
-
|
9
8
|
end
|
10
|
-
end
|
9
|
+
end
|
@@ -1,18 +1,34 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
2
|
+
module ETL
|
3
|
+
# Abstract class representing an Transformer for an ETL job
|
3
4
|
class Transformer
|
4
|
-
extend Chronicle::
|
5
|
+
extend Chronicle::ETL::Catalog
|
5
6
|
|
6
|
-
|
7
|
+
# Construct a new instance of this transformer. Options are passed in from a Runner
|
8
|
+
# == Paramters:
|
9
|
+
# options::
|
10
|
+
# Options for configuring this Transformer
|
11
|
+
def initialize(options = {}, data)
|
7
12
|
@options = options
|
13
|
+
@data = data
|
14
|
+
@record = Chronicle::ETL::Models::Activity.new
|
8
15
|
end
|
9
16
|
|
10
|
-
|
11
|
-
|
12
|
-
|
17
|
+
# @abstract Subclass is expected to implement #transform
|
18
|
+
# @!method transform
|
19
|
+
# The main entrypoint for transforming a record. Called by a Runner on each extracted record
|
20
|
+
|
21
|
+
# The domain or provider-specific id of the record this transformer is working on.
|
22
|
+
# Used for building a cursor so an extractor doesn't have to start from the beginning of a
|
23
|
+
# data source from the beginning.
|
24
|
+
def id; end
|
25
|
+
|
26
|
+
# The domain or provider-specific timestamp of the record this transformer is working on.
|
27
|
+
# Used for building a cursor so an extractor doesn't have to start from the beginning of a
|
28
|
+
# data source from the beginning.
|
29
|
+
def timestamp; end
|
13
30
|
end
|
14
31
|
end
|
15
32
|
end
|
16
33
|
|
17
|
-
require_relative 'json_transformer'
|
18
34
|
require_relative 'null_transformer'
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module Utils
|
4
|
+
module HashUtilities
|
5
|
+
def self.flatten_hash(hash)
|
6
|
+
hash.each_with_object({}) do |(k, v), h|
|
7
|
+
if v.is_a? Hash
|
8
|
+
flatten_hash(v).map do |h_k, h_v|
|
9
|
+
h["#{k}.#{h_k}".to_sym] = h_v
|
10
|
+
end
|
11
|
+
else
|
12
|
+
h[k] = v
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module Utils
|
4
|
+
module JSONAPI
|
5
|
+
# For a given Chronicle::ETL::Model, serialize it as jsonapi
|
6
|
+
def self.serialize(record)
|
7
|
+
return unless record.is_a? Chronicle::ETL::Models::Base
|
8
|
+
|
9
|
+
obj = record.identifier_hash
|
10
|
+
obj[:attributes] = record.attributes
|
11
|
+
|
12
|
+
relationships = Hash[record.associations.map do |k, v|
|
13
|
+
if v.is_a?(Array)
|
14
|
+
data = { data: v.map{ |association| serialize(association) } }
|
15
|
+
else
|
16
|
+
data = { data: serialize(v) }
|
17
|
+
end
|
18
|
+
|
19
|
+
[k, data]
|
20
|
+
end]
|
21
|
+
|
22
|
+
obj[:relationships] = relationships if relationships.any?
|
23
|
+
obj
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -2,7 +2,7 @@ require 'tty/progressbar'
|
|
2
2
|
require 'colorize'
|
3
3
|
|
4
4
|
module Chronicle
|
5
|
-
module
|
5
|
+
module ETL
|
6
6
|
module Utils
|
7
7
|
|
8
8
|
class ProgressBar
|
@@ -64,7 +64,7 @@ module Chronicle
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def log(message)
|
67
|
-
@pbar.log message
|
67
|
+
@pbar.log message
|
68
68
|
end
|
69
69
|
|
70
70
|
def finish
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chronicle-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Louis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -66,6 +66,34 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.17'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sequel
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '5.35'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '5.35'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: deep_merge
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.2'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.2'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
98
|
name: bundler
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,6 +150,48 @@ dependencies:
|
|
122
150
|
- - "~>"
|
123
151
|
- !ruby/object:Gem::Version
|
124
152
|
version: '3.9'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: runcom
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '6.2'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '6.2'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: redcarpet
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '3.5'
|
174
|
+
type: :development
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '3.5'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: sqlite3
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - "~>"
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '1.4'
|
188
|
+
type: :development
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '1.4'
|
125
195
|
description: Chronicle-ETL allows you to extract personal data from a variety of services,
|
126
196
|
transformer it, and load it.
|
127
197
|
email:
|
@@ -133,9 +203,10 @@ extra_rdoc_files: []
|
|
133
203
|
files:
|
134
204
|
- ".gitignore"
|
135
205
|
- ".rspec"
|
206
|
+
- ".rubocop.yml"
|
136
207
|
- ".ruby-version"
|
137
208
|
- ".travis.yml"
|
138
|
-
-
|
209
|
+
- ".yardopts"
|
139
210
|
- CODE_OF_CONDUCT.md
|
140
211
|
- Gemfile
|
141
212
|
- Gemfile.lock
|
@@ -148,19 +219,34 @@ files:
|
|
148
219
|
- exe/chronicle-etl
|
149
220
|
- lib/chronicle/etl.rb
|
150
221
|
- lib/chronicle/etl/catalog.rb
|
151
|
-
- lib/chronicle/etl/cli.rb
|
222
|
+
- lib/chronicle/etl/cli/connectors.rb
|
223
|
+
- lib/chronicle/etl/cli/jobs.rb
|
224
|
+
- lib/chronicle/etl/cli/main.rb
|
225
|
+
- lib/chronicle/etl/cli/subcommand_base.rb
|
226
|
+
- lib/chronicle/etl/config.rb
|
227
|
+
- lib/chronicle/etl/exceptions.rb
|
152
228
|
- lib/chronicle/etl/extractors/csv_extractor.rb
|
153
229
|
- lib/chronicle/etl/extractors/extractor.rb
|
154
230
|
- lib/chronicle/etl/extractors/file_extractor.rb
|
155
231
|
- lib/chronicle/etl/extractors/stdin_extractor.rb
|
232
|
+
- lib/chronicle/etl/job.rb
|
233
|
+
- lib/chronicle/etl/job_definition.rb
|
234
|
+
- lib/chronicle/etl/job_log.rb
|
235
|
+
- lib/chronicle/etl/job_logger.rb
|
156
236
|
- lib/chronicle/etl/loaders/csv_loader.rb
|
157
237
|
- lib/chronicle/etl/loaders/loader.rb
|
238
|
+
- lib/chronicle/etl/loaders/rest_loader.rb
|
158
239
|
- lib/chronicle/etl/loaders/stdout_loader.rb
|
159
240
|
- lib/chronicle/etl/loaders/table_loader.rb
|
241
|
+
- lib/chronicle/etl/models/activity.rb
|
242
|
+
- lib/chronicle/etl/models/base.rb
|
243
|
+
- lib/chronicle/etl/models/entity.rb
|
244
|
+
- lib/chronicle/etl/models/generic.rb
|
160
245
|
- lib/chronicle/etl/runner.rb
|
161
|
-
- lib/chronicle/etl/transformers/json_transformer.rb
|
162
246
|
- lib/chronicle/etl/transformers/null_transformer.rb
|
163
247
|
- lib/chronicle/etl/transformers/transformer.rb
|
248
|
+
- lib/chronicle/etl/utils/hash_utilities.rb
|
249
|
+
- lib/chronicle/etl/utils/jsonapi.rb
|
164
250
|
- lib/chronicle/etl/utils/progress_bar.rb
|
165
251
|
- lib/chronicle/etl/version.rb
|
166
252
|
homepage: https://github.com/chronicle-app
|
data/CHANGELOG.md
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
# Changelog
|
2
|
-
|
3
|
-
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
4
|
-
|
5
|
-
## [0.1.4] - 2020-08-18
|
6
|
-
### Updated
|
7
|
-
- Better display of available ETL classes
|
8
|
-
- Updated documentation
|
9
|
-
|
10
|
-
## [0.1.3] - 2020-08-13
|
11
|
-
### Added
|
12
|
-
- Ability to list all available ETL classes
|
13
|
-
- Refactored E, T, L module and class structure
|
14
|
-
- Better progress bar
|
15
|
-
|
16
|
-
## [0.1.2] - 2020-08-02
|
17
|
-
### Added
|
18
|
-
- This changelog
|
19
|
-
- Ability to use extractors, transformers, and loaders from other gems
|
20
|
-
|
21
|
-
## [0.1.0] - 2020-08-01
|
22
|
-
### Added
|
23
|
-
- Basic job runner and ETL classes
|
data/lib/chronicle/etl/cli.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
require 'thor'
|
2
|
-
require 'chronicle/etl'
|
3
|
-
require 'colorize'
|
4
|
-
|
5
|
-
module Chronicle
|
6
|
-
module Etl
|
7
|
-
class CLI < Thor
|
8
|
-
default_task :job
|
9
|
-
|
10
|
-
desc 'job', 'Runs an ETL job'
|
11
|
-
method_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
|
12
|
-
method_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
13
|
-
method_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
|
14
|
-
method_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
15
|
-
method_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
|
16
|
-
method_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
17
|
-
method_option :job, aliases: '-j', desc: 'Job configuration file'
|
18
|
-
|
19
|
-
def job
|
20
|
-
runner_options = {
|
21
|
-
extractor: {
|
22
|
-
name: options[:extractor],
|
23
|
-
options: options[:'extractor-opts']
|
24
|
-
},
|
25
|
-
transformer: {
|
26
|
-
name: options[:transformer],
|
27
|
-
options: options[:'transformer-opts']
|
28
|
-
},
|
29
|
-
loader: {
|
30
|
-
name: options[:loader],
|
31
|
-
options: options[:'loader-opts']
|
32
|
-
}
|
33
|
-
}
|
34
|
-
|
35
|
-
runner = Runner.new(runner_options)
|
36
|
-
runner.run!
|
37
|
-
end
|
38
|
-
|
39
|
-
# FIXME: namespace this differently
|
40
|
-
desc 'list', 'List all ETL classes'
|
41
|
-
def list
|
42
|
-
klasses = Chronicle::Etl::Catalog.available_classes
|
43
|
-
klasses = klasses.sort_by do |a|
|
44
|
-
[a[:built_in].to_s, a[:provider], a[:phase]]
|
45
|
-
end
|
46
|
-
|
47
|
-
headers = klasses.first.keys.map do |key|
|
48
|
-
key.to_s.capitalize.light_white
|
49
|
-
end
|
50
|
-
|
51
|
-
table = TTY::Table.new(headers, klasses.map(&:values))
|
52
|
-
puts table.render(padding: [0, 2])
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|