chronicle-etl 0.1.1 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -0
  3. data/.ruby-version +1 -0
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +23 -0
  6. data/Gemfile.lock +42 -10
  7. data/README.md +64 -11
  8. data/bin/console +16 -4
  9. data/chronicle-etl.gemspec +9 -7
  10. data/exe/chronicle-etl +2 -2
  11. data/lib/chronicle/etl.rb +5 -2
  12. data/lib/chronicle/etl/catalog.rb +62 -0
  13. data/lib/chronicle/etl/cli/connectors.rb +32 -0
  14. data/lib/chronicle/etl/cli/jobs.rb +111 -0
  15. data/lib/chronicle/etl/cli/main.rb +83 -0
  16. data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
  17. data/lib/chronicle/etl/config.rb +32 -0
  18. data/lib/chronicle/etl/extractors/{csv.rb → csv_extractor.rb} +3 -3
  19. data/lib/chronicle/etl/extractors/extractor.rb +23 -12
  20. data/lib/chronicle/etl/extractors/file_extractor.rb +52 -0
  21. data/lib/chronicle/etl/extractors/stdin_extractor.rb +11 -0
  22. data/lib/chronicle/etl/loaders/csv_loader.rb +29 -0
  23. data/lib/chronicle/etl/loaders/loader.rb +23 -16
  24. data/lib/chronicle/etl/loaders/rest_loader.rb +30 -0
  25. data/lib/chronicle/etl/loaders/stdout_loader.rb +9 -0
  26. data/lib/chronicle/etl/loaders/table_loader.rb +21 -0
  27. data/lib/chronicle/etl/runner.rb +33 -11
  28. data/lib/chronicle/etl/transformers/json_transformer.rb +11 -0
  29. data/lib/chronicle/etl/transformers/null_transformer.rb +10 -0
  30. data/lib/chronicle/etl/transformers/transformer.rb +27 -11
  31. data/lib/chronicle/etl/utils/progress_bar.rb +76 -0
  32. data/lib/chronicle/etl/version.rb +2 -2
  33. metadata +69 -30
  34. data/lib/chronicle/etl/cli.rb +0 -38
  35. data/lib/chronicle/etl/extractors/stdin.rb +0 -13
  36. data/lib/chronicle/etl/loaders/csv.rb +0 -31
  37. data/lib/chronicle/etl/loaders/stdout.rb +0 -11
  38. data/lib/chronicle/etl/loaders/table.rb +0 -22
  39. data/lib/chronicle/etl/transformers/json.rb +0 -13
  40. data/lib/chronicle/etl/transformers/null.rb +0 -11
  41. data/lib/chronicle/etl/utils/progress_bar_wrapper.rb +0 -43
@@ -0,0 +1,11 @@
1
+ require 'json'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ class JsonTransformer < Chronicle::ETL::Transformer
6
+ def transform data
7
+ return JSON.parse(data)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,10 @@
1
+ module Chronicle
2
+ module ETL
3
+ class NullTransformer < Chronicle::ETL::Transformer
4
+ def transform data
5
+ return data
6
+ end
7
+ end
8
+
9
+ end
10
+ end
@@ -1,18 +1,34 @@
1
1
  module Chronicle
2
- module Etl
3
- module Transformers
4
- class Transformer
5
- def initialize(options = {})
6
- @options = options
7
- end
2
+ module ETL
3
+ # Abstract class representing an Transformer for an ETL job
4
+ class Transformer
5
+ extend Chronicle::ETL::Catalog
8
6
 
9
- def transform data
10
- raise NotImplementedError
11
- end
7
+ # Construct a new instance of this transformer. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Transformer
11
+ def initialize(options = {})
12
+ @options = options
12
13
  end
14
+
15
+ # The main entrypoint for transforming a record. Called by a Runner on each extracted record
16
+ def transform data
17
+ raise NotImplementedError
18
+ end
19
+
20
+ # The domain or provider-specific id of the record this transformer is working on.
21
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
22
+ # data source from the beginning.
23
+ def id; end
24
+
25
+ # The domain or provider-specific timestamp of the record this transformer is working on.
26
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
27
+ # data source from the beginning.
28
+ def timestamp; end
13
29
  end
14
30
  end
15
31
  end
16
32
 
17
- require_relative 'null'
18
- require_relative 'json'
33
+ require_relative 'json_transformer'
34
+ require_relative 'null_transformer'
@@ -0,0 +1,76 @@
1
+ require 'tty/progressbar'
2
+ require 'colorize'
3
+
4
+ module Chronicle
5
+ module ETL
6
+ module Utils
7
+
8
+ class ProgressBar
9
+ FORMAT_WITH_TOTAL = [
10
+ ':bar ',
11
+ ':percent'.light_white,
12
+ ' | '.light_black,
13
+ ':current'.light_white,
14
+ '/'.light_black,
15
+ ':total'.light_white,
16
+ ' ('.light_black,
17
+ 'ELAPSED:'.light_black,
18
+ ':elapsed'.light_white,
19
+ ' | ETA:'.light_black,
20
+ ':eta'.light_white,
21
+ ' | RATE: '.light_black,
22
+ ':mean_rate'.light_white,
23
+ '/s) '.light_black
24
+ ].join.freeze
25
+
26
+ FORMAT_WITHOUT_TOTAL = [
27
+ ':current'.light_white,
28
+ '/'.light_black,
29
+ '???'.light_white,
30
+ ' ('.light_black,
31
+ 'ELAPSED:'.light_black,
32
+ ':elapsed'.light_white,
33
+ ' | ETA:'.light_black,
34
+ '??:??'.light_white,
35
+ ' | RATE: '.light_black,
36
+ ':mean_rate'.light_white,
37
+ '/s) '.light_black
38
+ ].join.freeze
39
+
40
+ def initialize(title: 'Loading', total:)
41
+ opts = {
42
+ clear: true,
43
+ complete: '▓'.light_blue,
44
+ incomplete: '░'.blue,
45
+ frequency: 10
46
+ }
47
+
48
+ if total
49
+ opts[:total] = total
50
+ format_str = "#{title} #{FORMAT_WITH_TOTAL}"
51
+ @pbar = TTY::ProgressBar.new(FORMAT_WITH_TOTAL, opts)
52
+ else
53
+ format_str = "#{title} #{FORMAT_WITHOUT_TOTAL}"
54
+ opts[:no_width] = true
55
+ end
56
+
57
+ @pbar = TTY::ProgressBar.new(format_str, opts)
58
+
59
+ @pbar.resize
60
+ end
61
+
62
+ def increment
63
+ @pbar.advance(1)
64
+ end
65
+
66
+ def log(message)
67
+ @pbar.log message.inspect
68
+ end
69
+
70
+ def finish
71
+ @pbar.finish
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
- module Etl
3
- VERSION = "0.1.1"
2
+ module ETL
3
+ VERSION = "0.2.1"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-02 00:00:00.000000000 Z
11
+ date: 2020-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -39,75 +39,75 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.8.1
41
41
  - !ruby/object:Gem::Dependency
42
- name: table_print
42
+ name: tty-table
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '0.11'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '0.11'
55
55
  - !ruby/object:Gem::Dependency
56
- name: ruby-progressbar
56
+ name: tty-progressbar
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.10'
61
+ version: '0.17'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.10'
68
+ version: '0.17'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '1.17'
75
+ version: '2.1'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '1.17'
82
+ version: '2.1'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: rake
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '10.0'
89
+ version: '13.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '10.0'
96
+ version: '13.0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: rspec
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '3.0'
103
+ version: '3.9'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '3.0'
110
+ version: '3.9'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: pry-byebug
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +122,34 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '3.9'
125
+ - !ruby/object:Gem::Dependency
126
+ name: runcom
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '6.2'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '6.2'
139
+ - !ruby/object:Gem::Dependency
140
+ name: redcarpet
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '3.5'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '3.5'
125
153
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
126
154
  transformer it, and load it.
127
155
  email:
@@ -133,7 +161,11 @@ extra_rdoc_files: []
133
161
  files:
134
162
  - ".gitignore"
135
163
  - ".rspec"
164
+ - ".rubocop.yml"
165
+ - ".ruby-version"
136
166
  - ".travis.yml"
167
+ - ".yardopts"
168
+ - CHANGELOG.md
137
169
  - CODE_OF_CONDUCT.md
138
170
  - Gemfile
139
171
  - Gemfile.lock
@@ -145,19 +177,26 @@ files:
145
177
  - chronicle-etl.gemspec
146
178
  - exe/chronicle-etl
147
179
  - lib/chronicle/etl.rb
148
- - lib/chronicle/etl/cli.rb
149
- - lib/chronicle/etl/extractors/csv.rb
180
+ - lib/chronicle/etl/catalog.rb
181
+ - lib/chronicle/etl/cli/connectors.rb
182
+ - lib/chronicle/etl/cli/jobs.rb
183
+ - lib/chronicle/etl/cli/main.rb
184
+ - lib/chronicle/etl/cli/subcommand_base.rb
185
+ - lib/chronicle/etl/config.rb
186
+ - lib/chronicle/etl/extractors/csv_extractor.rb
150
187
  - lib/chronicle/etl/extractors/extractor.rb
151
- - lib/chronicle/etl/extractors/stdin.rb
152
- - lib/chronicle/etl/loaders/csv.rb
188
+ - lib/chronicle/etl/extractors/file_extractor.rb
189
+ - lib/chronicle/etl/extractors/stdin_extractor.rb
190
+ - lib/chronicle/etl/loaders/csv_loader.rb
153
191
  - lib/chronicle/etl/loaders/loader.rb
154
- - lib/chronicle/etl/loaders/stdout.rb
155
- - lib/chronicle/etl/loaders/table.rb
192
+ - lib/chronicle/etl/loaders/rest_loader.rb
193
+ - lib/chronicle/etl/loaders/stdout_loader.rb
194
+ - lib/chronicle/etl/loaders/table_loader.rb
156
195
  - lib/chronicle/etl/runner.rb
157
- - lib/chronicle/etl/transformers/json.rb
158
- - lib/chronicle/etl/transformers/null.rb
196
+ - lib/chronicle/etl/transformers/json_transformer.rb
197
+ - lib/chronicle/etl/transformers/null_transformer.rb
159
198
  - lib/chronicle/etl/transformers/transformer.rb
160
- - lib/chronicle/etl/utils/progress_bar_wrapper.rb
199
+ - lib/chronicle/etl/utils/progress_bar.rb
161
200
  - lib/chronicle/etl/version.rb
162
201
  homepage: https://github.com/chronicle-app
163
202
  licenses:
@@ -165,8 +204,8 @@ licenses:
165
204
  metadata:
166
205
  homepage_uri: https://github.com/chronicle-app
167
206
  source_code_uri: https://github.com/chronicle-app/chronicle-etl
168
- changelog_uri: https://github.com/chronicle-app/chronicle-etl
169
- post_install_message:
207
+ changelog_uri: https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md
208
+ post_install_message:
170
209
  rdoc_options: []
171
210
  require_paths:
172
211
  - lib
@@ -181,8 +220,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
181
220
  - !ruby/object:Gem::Version
182
221
  version: '0'
183
222
  requirements: []
184
- rubygems_version: 3.0.3
185
- signing_key:
223
+ rubygems_version: 3.1.2
224
+ signing_key:
186
225
  specification_version: 4
187
226
  summary: ETL tool for personal data
188
227
  test_files: []
@@ -1,38 +0,0 @@
1
- require 'thor'
2
- require 'chronicle/etl'
3
-
4
- module Chronicle
5
- module Etl
6
- class CLI < Thor
7
- default_task :job
8
-
9
- desc 'job', 'Runs an ETL job'
10
- method_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
11
- method_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
12
- method_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
13
- method_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
14
- method_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
15
- method_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
16
- method_option :job, aliases: '-j', desc: 'Job configuration file'
17
- def job
18
- runner_options = {
19
- extractor: {
20
- name: options[:extractor],
21
- options: options[:'extractor-opts']
22
- },
23
- transformer: {
24
- name: options[:transformer],
25
- options: options[:'transformer-opts']
26
- },
27
- loader: {
28
- name: options[:loader],
29
- options: options[:'loader-opts']
30
- }
31
- }
32
-
33
- runner = Runner.new(runner_options)
34
- runner.run!
35
- end
36
- end
37
- end
38
- end
@@ -1,13 +0,0 @@
1
- module Chronicle
2
- module Etl
3
- module Extractors
4
- class Stdin < Chronicle::Etl::Extractors::Extractor
5
- def extract
6
- $stdin.read.each_line do |line|
7
- yield line
8
- end
9
- end
10
- end
11
- end
12
- end
13
- end
@@ -1,31 +0,0 @@
1
- require 'csv'
2
-
3
- module Chronicle
4
- module Etl
5
- module Loaders
6
- class Csv < Chronicle::Etl::Loaders::Loader
7
- def initialize(options={})
8
- super(options)
9
- @rows = []
10
- end
11
-
12
- def load(result)
13
- if (result.values)
14
- @rows << result.values
15
- else
16
- @rows << result
17
- end
18
- end
19
-
20
- def finish
21
- z = $stdout
22
- CSV(z) do |csv|
23
- @rows.each do |row|
24
- csv << row
25
- end
26
- end
27
- end
28
- end
29
- end
30
- end
31
- end