chronicle-etl 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -0
  3. data/.ruby-version +1 -0
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +23 -0
  6. data/Gemfile.lock +42 -10
  7. data/README.md +64 -11
  8. data/bin/console +16 -4
  9. data/chronicle-etl.gemspec +9 -7
  10. data/exe/chronicle-etl +2 -2
  11. data/lib/chronicle/etl.rb +5 -2
  12. data/lib/chronicle/etl/catalog.rb +62 -0
  13. data/lib/chronicle/etl/cli/connectors.rb +32 -0
  14. data/lib/chronicle/etl/cli/jobs.rb +111 -0
  15. data/lib/chronicle/etl/cli/main.rb +83 -0
  16. data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
  17. data/lib/chronicle/etl/config.rb +32 -0
  18. data/lib/chronicle/etl/extractors/{csv.rb → csv_extractor.rb} +3 -3
  19. data/lib/chronicle/etl/extractors/extractor.rb +23 -12
  20. data/lib/chronicle/etl/extractors/file_extractor.rb +52 -0
  21. data/lib/chronicle/etl/extractors/stdin_extractor.rb +11 -0
  22. data/lib/chronicle/etl/loaders/csv_loader.rb +29 -0
  23. data/lib/chronicle/etl/loaders/loader.rb +23 -16
  24. data/lib/chronicle/etl/loaders/rest_loader.rb +30 -0
  25. data/lib/chronicle/etl/loaders/stdout_loader.rb +9 -0
  26. data/lib/chronicle/etl/loaders/table_loader.rb +21 -0
  27. data/lib/chronicle/etl/runner.rb +33 -11
  28. data/lib/chronicle/etl/transformers/json_transformer.rb +11 -0
  29. data/lib/chronicle/etl/transformers/null_transformer.rb +10 -0
  30. data/lib/chronicle/etl/transformers/transformer.rb +27 -11
  31. data/lib/chronicle/etl/utils/progress_bar.rb +76 -0
  32. data/lib/chronicle/etl/version.rb +2 -2
  33. metadata +69 -30
  34. data/lib/chronicle/etl/cli.rb +0 -38
  35. data/lib/chronicle/etl/extractors/stdin.rb +0 -13
  36. data/lib/chronicle/etl/loaders/csv.rb +0 -31
  37. data/lib/chronicle/etl/loaders/stdout.rb +0 -11
  38. data/lib/chronicle/etl/loaders/table.rb +0 -22
  39. data/lib/chronicle/etl/transformers/json.rb +0 -13
  40. data/lib/chronicle/etl/transformers/null.rb +0 -11
  41. data/lib/chronicle/etl/utils/progress_bar_wrapper.rb +0 -43
@@ -0,0 +1,11 @@
1
+ require 'json'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ class JsonTransformer < Chronicle::ETL::Transformer
6
+ def transform data
7
+ return JSON.parse(data)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,10 @@
1
+ module Chronicle
2
+ module ETL
3
+ class NullTransformer < Chronicle::ETL::Transformer
4
+ def transform data
5
+ return data
6
+ end
7
+ end
8
+
9
+ end
10
+ end
@@ -1,18 +1,34 @@
1
1
  module Chronicle
2
- module Etl
3
- module Transformers
4
- class Transformer
5
- def initialize(options = {})
6
- @options = options
7
- end
2
+ module ETL
3
+ # Abstract class representing an Transformer for an ETL job
4
+ class Transformer
5
+ extend Chronicle::ETL::Catalog
8
6
 
9
- def transform data
10
- raise NotImplementedError
11
- end
7
+ # Construct a new instance of this transformer. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Transformer
11
+ def initialize(options = {})
12
+ @options = options
12
13
  end
14
+
15
+ # The main entrypoint for transforming a record. Called by a Runner on each extracted record
16
+ def transform data
17
+ raise NotImplementedError
18
+ end
19
+
20
+ # The domain or provider-specific id of the record this transformer is working on.
21
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
22
+ # data source from the beginning.
23
+ def id; end
24
+
25
+ # The domain or provider-specific timestamp of the record this transformer is working on.
26
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
27
+ # data source from the beginning.
28
+ def timestamp; end
13
29
  end
14
30
  end
15
31
  end
16
32
 
17
- require_relative 'null'
18
- require_relative 'json'
33
+ require_relative 'json_transformer'
34
+ require_relative 'null_transformer'
@@ -0,0 +1,76 @@
1
+ require 'tty/progressbar'
2
+ require 'colorize'
3
+
4
+ module Chronicle
5
+ module ETL
6
+ module Utils
7
+
8
+ class ProgressBar
9
+ FORMAT_WITH_TOTAL = [
10
+ ':bar ',
11
+ ':percent'.light_white,
12
+ ' | '.light_black,
13
+ ':current'.light_white,
14
+ '/'.light_black,
15
+ ':total'.light_white,
16
+ ' ('.light_black,
17
+ 'ELAPSED:'.light_black,
18
+ ':elapsed'.light_white,
19
+ ' | ETA:'.light_black,
20
+ ':eta'.light_white,
21
+ ' | RATE: '.light_black,
22
+ ':mean_rate'.light_white,
23
+ '/s) '.light_black
24
+ ].join.freeze
25
+
26
+ FORMAT_WITHOUT_TOTAL = [
27
+ ':current'.light_white,
28
+ '/'.light_black,
29
+ '???'.light_white,
30
+ ' ('.light_black,
31
+ 'ELAPSED:'.light_black,
32
+ ':elapsed'.light_white,
33
+ ' | ETA:'.light_black,
34
+ '??:??'.light_white,
35
+ ' | RATE: '.light_black,
36
+ ':mean_rate'.light_white,
37
+ '/s) '.light_black
38
+ ].join.freeze
39
+
40
+ def initialize(title: 'Loading', total:)
41
+ opts = {
42
+ clear: true,
43
+ complete: '▓'.light_blue,
44
+ incomplete: '░'.blue,
45
+ frequency: 10
46
+ }
47
+
48
+ if total
49
+ opts[:total] = total
50
+ format_str = "#{title} #{FORMAT_WITH_TOTAL}"
51
+ @pbar = TTY::ProgressBar.new(FORMAT_WITH_TOTAL, opts)
52
+ else
53
+ format_str = "#{title} #{FORMAT_WITHOUT_TOTAL}"
54
+ opts[:no_width] = true
55
+ end
56
+
57
+ @pbar = TTY::ProgressBar.new(format_str, opts)
58
+
59
+ @pbar.resize
60
+ end
61
+
62
+ def increment
63
+ @pbar.advance(1)
64
+ end
65
+
66
+ def log(message)
67
+ @pbar.log message.inspect
68
+ end
69
+
70
+ def finish
71
+ @pbar.finish
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
- module Etl
3
- VERSION = "0.1.1"
2
+ module ETL
3
+ VERSION = "0.2.1"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-02 00:00:00.000000000 Z
11
+ date: 2020-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -39,75 +39,75 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.8.1
41
41
  - !ruby/object:Gem::Dependency
42
- name: table_print
42
+ name: tty-table
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '0.11'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '0.11'
55
55
  - !ruby/object:Gem::Dependency
56
- name: ruby-progressbar
56
+ name: tty-progressbar
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.10'
61
+ version: '0.17'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.10'
68
+ version: '0.17'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '1.17'
75
+ version: '2.1'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '1.17'
82
+ version: '2.1'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: rake
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '10.0'
89
+ version: '13.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '10.0'
96
+ version: '13.0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: rspec
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '3.0'
103
+ version: '3.9'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '3.0'
110
+ version: '3.9'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: pry-byebug
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +122,34 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '3.9'
125
+ - !ruby/object:Gem::Dependency
126
+ name: runcom
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '6.2'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '6.2'
139
+ - !ruby/object:Gem::Dependency
140
+ name: redcarpet
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '3.5'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '3.5'
125
153
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
126
154
  transformer it, and load it.
127
155
  email:
@@ -133,7 +161,11 @@ extra_rdoc_files: []
133
161
  files:
134
162
  - ".gitignore"
135
163
  - ".rspec"
164
+ - ".rubocop.yml"
165
+ - ".ruby-version"
136
166
  - ".travis.yml"
167
+ - ".yardopts"
168
+ - CHANGELOG.md
137
169
  - CODE_OF_CONDUCT.md
138
170
  - Gemfile
139
171
  - Gemfile.lock
@@ -145,19 +177,26 @@ files:
145
177
  - chronicle-etl.gemspec
146
178
  - exe/chronicle-etl
147
179
  - lib/chronicle/etl.rb
148
- - lib/chronicle/etl/cli.rb
149
- - lib/chronicle/etl/extractors/csv.rb
180
+ - lib/chronicle/etl/catalog.rb
181
+ - lib/chronicle/etl/cli/connectors.rb
182
+ - lib/chronicle/etl/cli/jobs.rb
183
+ - lib/chronicle/etl/cli/main.rb
184
+ - lib/chronicle/etl/cli/subcommand_base.rb
185
+ - lib/chronicle/etl/config.rb
186
+ - lib/chronicle/etl/extractors/csv_extractor.rb
150
187
  - lib/chronicle/etl/extractors/extractor.rb
151
- - lib/chronicle/etl/extractors/stdin.rb
152
- - lib/chronicle/etl/loaders/csv.rb
188
+ - lib/chronicle/etl/extractors/file_extractor.rb
189
+ - lib/chronicle/etl/extractors/stdin_extractor.rb
190
+ - lib/chronicle/etl/loaders/csv_loader.rb
153
191
  - lib/chronicle/etl/loaders/loader.rb
154
- - lib/chronicle/etl/loaders/stdout.rb
155
- - lib/chronicle/etl/loaders/table.rb
192
+ - lib/chronicle/etl/loaders/rest_loader.rb
193
+ - lib/chronicle/etl/loaders/stdout_loader.rb
194
+ - lib/chronicle/etl/loaders/table_loader.rb
156
195
  - lib/chronicle/etl/runner.rb
157
- - lib/chronicle/etl/transformers/json.rb
158
- - lib/chronicle/etl/transformers/null.rb
196
+ - lib/chronicle/etl/transformers/json_transformer.rb
197
+ - lib/chronicle/etl/transformers/null_transformer.rb
159
198
  - lib/chronicle/etl/transformers/transformer.rb
160
- - lib/chronicle/etl/utils/progress_bar_wrapper.rb
199
+ - lib/chronicle/etl/utils/progress_bar.rb
161
200
  - lib/chronicle/etl/version.rb
162
201
  homepage: https://github.com/chronicle-app
163
202
  licenses:
@@ -165,8 +204,8 @@ licenses:
165
204
  metadata:
166
205
  homepage_uri: https://github.com/chronicle-app
167
206
  source_code_uri: https://github.com/chronicle-app/chronicle-etl
168
- changelog_uri: https://github.com/chronicle-app/chronicle-etl
169
- post_install_message:
207
+ changelog_uri: https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md
208
+ post_install_message:
170
209
  rdoc_options: []
171
210
  require_paths:
172
211
  - lib
@@ -181,8 +220,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
181
220
  - !ruby/object:Gem::Version
182
221
  version: '0'
183
222
  requirements: []
184
- rubygems_version: 3.0.3
185
- signing_key:
223
+ rubygems_version: 3.1.2
224
+ signing_key:
186
225
  specification_version: 4
187
226
  summary: ETL tool for personal data
188
227
  test_files: []
@@ -1,38 +0,0 @@
1
- require 'thor'
2
- require 'chronicle/etl'
3
-
4
- module Chronicle
5
- module Etl
6
- class CLI < Thor
7
- default_task :job
8
-
9
- desc 'job', 'Runs an ETL job'
10
- method_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
11
- method_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
12
- method_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
13
- method_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
14
- method_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
15
- method_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
16
- method_option :job, aliases: '-j', desc: 'Job configuration file'
17
- def job
18
- runner_options = {
19
- extractor: {
20
- name: options[:extractor],
21
- options: options[:'extractor-opts']
22
- },
23
- transformer: {
24
- name: options[:transformer],
25
- options: options[:'transformer-opts']
26
- },
27
- loader: {
28
- name: options[:loader],
29
- options: options[:'loader-opts']
30
- }
31
- }
32
-
33
- runner = Runner.new(runner_options)
34
- runner.run!
35
- end
36
- end
37
- end
38
- end
@@ -1,13 +0,0 @@
1
- module Chronicle
2
- module Etl
3
- module Extractors
4
- class Stdin < Chronicle::Etl::Extractors::Extractor
5
- def extract
6
- $stdin.read.each_line do |line|
7
- yield line
8
- end
9
- end
10
- end
11
- end
12
- end
13
- end
@@ -1,31 +0,0 @@
1
- require 'csv'
2
-
3
- module Chronicle
4
- module Etl
5
- module Loaders
6
- class Csv < Chronicle::Etl::Loaders::Loader
7
- def initialize(options={})
8
- super(options)
9
- @rows = []
10
- end
11
-
12
- def load(result)
13
- if (result.values)
14
- @rows << result.values
15
- else
16
- @rows << result
17
- end
18
- end
19
-
20
- def finish
21
- z = $stdout
22
- CSV(z) do |csv|
23
- @rows.each do |row|
24
- csv << row
25
- end
26
- end
27
- end
28
- end
29
- end
30
- end
31
- end