chronicle-etl 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.ruby-version +1 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +23 -0
- data/Gemfile.lock +42 -10
- data/README.md +64 -11
- data/bin/console +16 -4
- data/chronicle-etl.gemspec +9 -7
- data/exe/chronicle-etl +2 -2
- data/lib/chronicle/etl.rb +5 -2
- data/lib/chronicle/etl/catalog.rb +62 -0
- data/lib/chronicle/etl/cli/connectors.rb +32 -0
- data/lib/chronicle/etl/cli/jobs.rb +111 -0
- data/lib/chronicle/etl/cli/main.rb +83 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
- data/lib/chronicle/etl/config.rb +32 -0
- data/lib/chronicle/etl/extractors/{csv.rb → csv_extractor.rb} +3 -3
- data/lib/chronicle/etl/extractors/extractor.rb +23 -12
- data/lib/chronicle/etl/extractors/file_extractor.rb +52 -0
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +11 -0
- data/lib/chronicle/etl/loaders/csv_loader.rb +29 -0
- data/lib/chronicle/etl/loaders/loader.rb +23 -16
- data/lib/chronicle/etl/loaders/rest_loader.rb +30 -0
- data/lib/chronicle/etl/loaders/stdout_loader.rb +9 -0
- data/lib/chronicle/etl/loaders/table_loader.rb +21 -0
- data/lib/chronicle/etl/runner.rb +33 -11
- data/lib/chronicle/etl/transformers/json_transformer.rb +11 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +10 -0
- data/lib/chronicle/etl/transformers/transformer.rb +27 -11
- data/lib/chronicle/etl/utils/progress_bar.rb +76 -0
- data/lib/chronicle/etl/version.rb +2 -2
- metadata +69 -30
- data/lib/chronicle/etl/cli.rb +0 -38
- data/lib/chronicle/etl/extractors/stdin.rb +0 -13
- data/lib/chronicle/etl/loaders/csv.rb +0 -31
- data/lib/chronicle/etl/loaders/stdout.rb +0 -11
- data/lib/chronicle/etl/loaders/table.rb +0 -22
- data/lib/chronicle/etl/transformers/json.rb +0 -13
- data/lib/chronicle/etl/transformers/null.rb +0 -11
- data/lib/chronicle/etl/utils/progress_bar_wrapper.rb +0 -43
@@ -1,18 +1,34 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
@options = options
|
7
|
-
end
|
2
|
+
module ETL
|
3
|
+
# Abstract class representing an Transformer for an ETL job
|
4
|
+
class Transformer
|
5
|
+
extend Chronicle::ETL::Catalog
|
8
6
|
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
# Construct a new instance of this transformer. Options are passed in from a Runner
|
8
|
+
# == Paramters:
|
9
|
+
# options::
|
10
|
+
# Options for configuring this Transformer
|
11
|
+
def initialize(options = {})
|
12
|
+
@options = options
|
12
13
|
end
|
14
|
+
|
15
|
+
# The main entrypoint for transforming a record. Called by a Runner on each extracted record
|
16
|
+
def transform data
|
17
|
+
raise NotImplementedError
|
18
|
+
end
|
19
|
+
|
20
|
+
# The domain or provider-specific id of the record this transformer is working on.
|
21
|
+
# Used for building a cursor so an extractor doesn't have to start from the beginning of a
|
22
|
+
# data source from the beginning.
|
23
|
+
def id; end
|
24
|
+
|
25
|
+
# The domain or provider-specific timestamp of the record this transformer is working on.
|
26
|
+
# Used for building a cursor so an extractor doesn't have to start from the beginning of a
|
27
|
+
# data source from the beginning.
|
28
|
+
def timestamp; end
|
13
29
|
end
|
14
30
|
end
|
15
31
|
end
|
16
32
|
|
17
|
-
require_relative '
|
18
|
-
require_relative '
|
33
|
+
require_relative 'json_transformer'
|
34
|
+
require_relative 'null_transformer'
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'tty/progressbar'
|
2
|
+
require 'colorize'
|
3
|
+
|
4
|
+
module Chronicle
|
5
|
+
module ETL
|
6
|
+
module Utils
|
7
|
+
|
8
|
+
class ProgressBar
|
9
|
+
FORMAT_WITH_TOTAL = [
|
10
|
+
':bar ',
|
11
|
+
':percent'.light_white,
|
12
|
+
' | '.light_black,
|
13
|
+
':current'.light_white,
|
14
|
+
'/'.light_black,
|
15
|
+
':total'.light_white,
|
16
|
+
' ('.light_black,
|
17
|
+
'ELAPSED:'.light_black,
|
18
|
+
':elapsed'.light_white,
|
19
|
+
' | ETA:'.light_black,
|
20
|
+
':eta'.light_white,
|
21
|
+
' | RATE: '.light_black,
|
22
|
+
':mean_rate'.light_white,
|
23
|
+
'/s) '.light_black
|
24
|
+
].join.freeze
|
25
|
+
|
26
|
+
FORMAT_WITHOUT_TOTAL = [
|
27
|
+
':current'.light_white,
|
28
|
+
'/'.light_black,
|
29
|
+
'???'.light_white,
|
30
|
+
' ('.light_black,
|
31
|
+
'ELAPSED:'.light_black,
|
32
|
+
':elapsed'.light_white,
|
33
|
+
' | ETA:'.light_black,
|
34
|
+
'??:??'.light_white,
|
35
|
+
' | RATE: '.light_black,
|
36
|
+
':mean_rate'.light_white,
|
37
|
+
'/s) '.light_black
|
38
|
+
].join.freeze
|
39
|
+
|
40
|
+
def initialize(title: 'Loading', total:)
|
41
|
+
opts = {
|
42
|
+
clear: true,
|
43
|
+
complete: '▓'.light_blue,
|
44
|
+
incomplete: '░'.blue,
|
45
|
+
frequency: 10
|
46
|
+
}
|
47
|
+
|
48
|
+
if total
|
49
|
+
opts[:total] = total
|
50
|
+
format_str = "#{title} #{FORMAT_WITH_TOTAL}"
|
51
|
+
@pbar = TTY::ProgressBar.new(FORMAT_WITH_TOTAL, opts)
|
52
|
+
else
|
53
|
+
format_str = "#{title} #{FORMAT_WITHOUT_TOTAL}"
|
54
|
+
opts[:no_width] = true
|
55
|
+
end
|
56
|
+
|
57
|
+
@pbar = TTY::ProgressBar.new(format_str, opts)
|
58
|
+
|
59
|
+
@pbar.resize
|
60
|
+
end
|
61
|
+
|
62
|
+
def increment
|
63
|
+
@pbar.advance(1)
|
64
|
+
end
|
65
|
+
|
66
|
+
def log(message)
|
67
|
+
@pbar.log message.inspect
|
68
|
+
end
|
69
|
+
|
70
|
+
def finish
|
71
|
+
@pbar.finish
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chronicle-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Louis
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -39,75 +39,75 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 0.8.1
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: tty-table
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
47
|
+
version: '0.11'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
54
|
+
version: '0.11'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: tty-progressbar
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '0.17'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
68
|
+
version: '0.17'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: bundler
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '1
|
75
|
+
version: '2.1'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '1
|
82
|
+
version: '2.1'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rake
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
89
|
+
version: '13.0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
96
|
+
version: '13.0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: rspec
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '3.
|
103
|
+
version: '3.9'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '3.
|
110
|
+
version: '3.9'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: pry-byebug
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,6 +122,34 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '3.9'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: runcom
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '6.2'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '6.2'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: redcarpet
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '3.5'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '3.5'
|
125
153
|
description: Chronicle-ETL allows you to extract personal data from a variety of services,
|
126
154
|
transformer it, and load it.
|
127
155
|
email:
|
@@ -133,7 +161,11 @@ extra_rdoc_files: []
|
|
133
161
|
files:
|
134
162
|
- ".gitignore"
|
135
163
|
- ".rspec"
|
164
|
+
- ".rubocop.yml"
|
165
|
+
- ".ruby-version"
|
136
166
|
- ".travis.yml"
|
167
|
+
- ".yardopts"
|
168
|
+
- CHANGELOG.md
|
137
169
|
- CODE_OF_CONDUCT.md
|
138
170
|
- Gemfile
|
139
171
|
- Gemfile.lock
|
@@ -145,19 +177,26 @@ files:
|
|
145
177
|
- chronicle-etl.gemspec
|
146
178
|
- exe/chronicle-etl
|
147
179
|
- lib/chronicle/etl.rb
|
148
|
-
- lib/chronicle/etl/
|
149
|
-
- lib/chronicle/etl/
|
180
|
+
- lib/chronicle/etl/catalog.rb
|
181
|
+
- lib/chronicle/etl/cli/connectors.rb
|
182
|
+
- lib/chronicle/etl/cli/jobs.rb
|
183
|
+
- lib/chronicle/etl/cli/main.rb
|
184
|
+
- lib/chronicle/etl/cli/subcommand_base.rb
|
185
|
+
- lib/chronicle/etl/config.rb
|
186
|
+
- lib/chronicle/etl/extractors/csv_extractor.rb
|
150
187
|
- lib/chronicle/etl/extractors/extractor.rb
|
151
|
-
- lib/chronicle/etl/extractors/
|
152
|
-
- lib/chronicle/etl/
|
188
|
+
- lib/chronicle/etl/extractors/file_extractor.rb
|
189
|
+
- lib/chronicle/etl/extractors/stdin_extractor.rb
|
190
|
+
- lib/chronicle/etl/loaders/csv_loader.rb
|
153
191
|
- lib/chronicle/etl/loaders/loader.rb
|
154
|
-
- lib/chronicle/etl/loaders/
|
155
|
-
- lib/chronicle/etl/loaders/
|
192
|
+
- lib/chronicle/etl/loaders/rest_loader.rb
|
193
|
+
- lib/chronicle/etl/loaders/stdout_loader.rb
|
194
|
+
- lib/chronicle/etl/loaders/table_loader.rb
|
156
195
|
- lib/chronicle/etl/runner.rb
|
157
|
-
- lib/chronicle/etl/transformers/
|
158
|
-
- lib/chronicle/etl/transformers/
|
196
|
+
- lib/chronicle/etl/transformers/json_transformer.rb
|
197
|
+
- lib/chronicle/etl/transformers/null_transformer.rb
|
159
198
|
- lib/chronicle/etl/transformers/transformer.rb
|
160
|
-
- lib/chronicle/etl/utils/
|
199
|
+
- lib/chronicle/etl/utils/progress_bar.rb
|
161
200
|
- lib/chronicle/etl/version.rb
|
162
201
|
homepage: https://github.com/chronicle-app
|
163
202
|
licenses:
|
@@ -165,8 +204,8 @@ licenses:
|
|
165
204
|
metadata:
|
166
205
|
homepage_uri: https://github.com/chronicle-app
|
167
206
|
source_code_uri: https://github.com/chronicle-app/chronicle-etl
|
168
|
-
changelog_uri: https://github.com/chronicle-app/chronicle-etl
|
169
|
-
post_install_message:
|
207
|
+
changelog_uri: https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md
|
208
|
+
post_install_message:
|
170
209
|
rdoc_options: []
|
171
210
|
require_paths:
|
172
211
|
- lib
|
@@ -181,8 +220,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
181
220
|
- !ruby/object:Gem::Version
|
182
221
|
version: '0'
|
183
222
|
requirements: []
|
184
|
-
rubygems_version: 3.
|
185
|
-
signing_key:
|
223
|
+
rubygems_version: 3.1.2
|
224
|
+
signing_key:
|
186
225
|
specification_version: 4
|
187
226
|
summary: ETL tool for personal data
|
188
227
|
test_files: []
|
data/lib/chronicle/etl/cli.rb
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
require 'thor'
|
2
|
-
require 'chronicle/etl'
|
3
|
-
|
4
|
-
module Chronicle
|
5
|
-
module Etl
|
6
|
-
class CLI < Thor
|
7
|
-
default_task :job
|
8
|
-
|
9
|
-
desc 'job', 'Runs an ETL job'
|
10
|
-
method_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
|
11
|
-
method_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
12
|
-
method_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
|
13
|
-
method_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
14
|
-
method_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
|
15
|
-
method_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
16
|
-
method_option :job, aliases: '-j', desc: 'Job configuration file'
|
17
|
-
def job
|
18
|
-
runner_options = {
|
19
|
-
extractor: {
|
20
|
-
name: options[:extractor],
|
21
|
-
options: options[:'extractor-opts']
|
22
|
-
},
|
23
|
-
transformer: {
|
24
|
-
name: options[:transformer],
|
25
|
-
options: options[:'transformer-opts']
|
26
|
-
},
|
27
|
-
loader: {
|
28
|
-
name: options[:loader],
|
29
|
-
options: options[:'loader-opts']
|
30
|
-
}
|
31
|
-
}
|
32
|
-
|
33
|
-
runner = Runner.new(runner_options)
|
34
|
-
runner.run!
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
@@ -1,31 +0,0 @@
|
|
1
|
-
require 'csv'
|
2
|
-
|
3
|
-
module Chronicle
|
4
|
-
module Etl
|
5
|
-
module Loaders
|
6
|
-
class Csv < Chronicle::Etl::Loaders::Loader
|
7
|
-
def initialize(options={})
|
8
|
-
super(options)
|
9
|
-
@rows = []
|
10
|
-
end
|
11
|
-
|
12
|
-
def load(result)
|
13
|
-
if (result.values)
|
14
|
-
@rows << result.values
|
15
|
-
else
|
16
|
-
@rows << result
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def finish
|
21
|
-
z = $stdout
|
22
|
-
CSV(z) do |csv|
|
23
|
-
@rows.each do |row|
|
24
|
-
csv << row
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|