chronicle-etl 0.1.2 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -0
  3. data/.ruby-version +1 -0
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +11 -0
  6. data/Gemfile.lock +42 -10
  7. data/README.md +64 -11
  8. data/bin/console +16 -4
  9. data/chronicle-etl.gemspec +8 -6
  10. data/exe/chronicle-etl +2 -2
  11. data/lib/chronicle/etl.rb +6 -2
  12. data/lib/chronicle/etl/catalog.rb +102 -0
  13. data/lib/chronicle/etl/cli/connectors.rb +32 -0
  14. data/lib/chronicle/etl/cli/jobs.rb +110 -0
  15. data/lib/chronicle/etl/cli/main.rb +83 -0
  16. data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
  17. data/lib/chronicle/etl/config.rb +32 -0
  18. data/lib/chronicle/etl/exceptions.rb +17 -0
  19. data/lib/chronicle/etl/extractors/{csv.rb → csv_extractor.rb} +3 -3
  20. data/lib/chronicle/etl/extractors/extractor.rb +23 -12
  21. data/lib/chronicle/etl/extractors/file_extractor.rb +52 -0
  22. data/lib/chronicle/etl/extractors/stdin_extractor.rb +11 -0
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +29 -0
  24. data/lib/chronicle/etl/loaders/loader.rb +23 -16
  25. data/lib/chronicle/etl/loaders/rest_loader.rb +30 -0
  26. data/lib/chronicle/etl/loaders/stdout_loader.rb +9 -0
  27. data/lib/chronicle/etl/loaders/table_loader.rb +21 -0
  28. data/lib/chronicle/etl/runner.rb +27 -38
  29. data/lib/chronicle/etl/transformers/json_transformer.rb +11 -0
  30. data/lib/chronicle/etl/transformers/null_transformer.rb +10 -0
  31. data/lib/chronicle/etl/transformers/transformer.rb +28 -11
  32. data/lib/chronicle/etl/utils/progress_bar.rb +76 -0
  33. data/lib/chronicle/etl/version.rb +2 -2
  34. metadata +68 -29
  35. data/lib/chronicle/etl/cli.rb +0 -38
  36. data/lib/chronicle/etl/extractors/stdin.rb +0 -13
  37. data/lib/chronicle/etl/loaders/csv.rb +0 -31
  38. data/lib/chronicle/etl/loaders/stdout.rb +0 -11
  39. data/lib/chronicle/etl/loaders/table.rb +0 -22
  40. data/lib/chronicle/etl/transformers/json.rb +0 -13
  41. data/lib/chronicle/etl/transformers/null.rb +0 -11
  42. data/lib/chronicle/etl/utils/progress_bar_wrapper.rb +0 -43
@@ -1,25 +1,32 @@
1
1
  module Chronicle
2
- module Etl
3
- module Loaders
4
- class Loader
5
- def initialize(options = {})
6
- @options = options
7
- end
2
+ module ETL
3
+ # Abstract class representing a Loader for an ETL job
4
+ class Loader
5
+ extend Chronicle::ETL::Catalog
8
6
 
9
- def start; end
10
-
11
- def first_load; end
7
+ # Construct a new instance of this loader. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Loader
11
+ def initialize(options = {})
12
+ @options = options
13
+ end
12
14
 
13
- def load
14
- raise NotImplementedError
15
- end
15
+ # Called once before processing records
16
+ def start; end
16
17
 
17
- def finish; end
18
+ # Load a single record
19
+ def load
20
+ raise NotImplementedError
18
21
  end
22
+
23
+ # Called once there are no more records to process
24
+ def finish; end
19
25
  end
20
26
  end
21
27
  end
22
28
 
23
- require_relative 'stdout'
24
- require_relative 'csv'
25
- require_relative 'table'
29
+ require_relative 'csv_loader'
30
+ require_relative 'rest_loader'
31
+ require_relative 'stdout_loader'
32
+ require_relative 'table_loader'
@@ -0,0 +1,30 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'json'
4
+
5
+ module Chronicle
6
+ module ETL
7
+ class RestLoader < Chronicle::ETL::Loader
8
+ def initialize(options={})
9
+ super(options)
10
+ end
11
+
12
+ def load(result)
13
+ uri = URI.parse("#{@options[:hostname]}#{@options[:endpoint]}")
14
+
15
+ header = {
16
+ "Authorization" => "Bearer #{@options[:access_token]}",
17
+ "Content-Type": 'application/json'
18
+ }
19
+
20
+ http = Net::HTTP.new(uri.host, uri.port)
21
+ request = Net::HTTP::Post.new(uri.request_uri, header)
22
+
23
+ obj = {data: result} unless result[:data]
24
+ request.body = obj.to_json
25
+
26
+ response = http.request(request)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,9 @@
1
+ module Chronicle
2
+ module ETL
3
+ class StdoutLoader < Chronicle::ETL::Loader
4
+ def load(result)
5
+ puts result.inspect
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,21 @@
1
+ require 'tty/table'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ class TableLoader < Chronicle::ETL::Loader
6
+ def initialize(options)
7
+ super(options)
8
+ end
9
+
10
+ def load(result)
11
+ @table ||= TTY::Table.new(header: result.keys)
12
+ values = result.values.map{|x| x.to_s[0..30]}
13
+ @table << values
14
+ end
15
+
16
+ def finish
17
+ puts @table.render(:ascii, padding: [0, 1])
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,57 +1,46 @@
1
- class Chronicle::Etl::Runner
2
- BUILTIN = {
3
- extractor: ['stdin', 'json', 'csv'],
4
- transformer: ['null'],
5
- loader: ['stdout', 'csv', 'table']
6
- }.freeze
7
-
8
- def initialize(options)
9
- @options = options
1
+ require 'colorize'
10
2
 
11
- instantiate_etl_classes
3
+ class Chronicle::ETL::Runner
4
+ def initialize(options = {})
5
+ @options = options
12
6
  end
13
7
 
14
8
  def run!
15
- progress_bar = Chronicle::Etl::Utils::ProgressBarWrapper.new(@extractor.results_count)
16
- @loader.start
9
+ extractor = instantiate_klass(:extractor)
10
+ loader = instantiate_klass(:loader)
17
11
 
18
- @extractor.extract do |result, i|
19
- @loader.first_load(result) if i == 0
12
+ total = extractor.results_count
13
+ progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
20
14
 
21
- transformed_data = @transformer.transform(result)
22
- @loader.load(transformed_data)
15
+ loader.start
23
16
 
17
+ extractor.extract do |data, metadata|
18
+ transformer = instantiate_klass(:transformer, data)
19
+ transformed_data = transformer.transform
20
+
21
+ loader.load(transformed_data)
24
22
  progress_bar.increment
25
23
  end
26
24
 
27
25
  progress_bar.finish
28
- @loader.finish
26
+ loader.finish
29
27
  end
30
28
 
31
29
  private
32
30
 
33
- def instantiate_etl_classes
34
- @extractor = load_etl_class(:extractor, @options[:extractor][:name]).new(@options[:extractor][:options])
35
- @transformer = load_etl_class(:transformer, @options[:transformer][:name]).new(@options[:transformer][:options])
36
- @loader = load_etl_class(:loader, @options[:loader][:name]).new(@options[:loader][:options])
31
+ def instantiate_klass(phase, *args)
32
+ klass = load_etl_class(phase, @options[phase][:name])
33
+ klass.new(@options[phase][:options], *args)
37
34
  end
38
35
 
39
- def load_etl_class(phase, name)
40
- if BUILTIN[phase].include? name
41
- klass_name = "Chronicle::Etl::#{phase.to_s.capitalize}s::#{name.capitalize}"
42
- else
43
- # TODO: come up with syntax for specifying a particular extractor in a provider library
44
- # provider, extractor = name.split(":")
45
- provider = name
46
- begin
47
- require "chronicle/#{provider}"
48
- rescue LoadError => e
49
- warn("Error loading #{phase} '#{provider}'")
50
- warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{provider}`")
51
- exit(false)
52
- end
53
- klass_name = "Chronicle::#{name.capitalize}::Extractor"
54
- end
55
- Object.const_get(klass_name)
36
+ def load_etl_class(phase, identifier)
37
+ Chronicle::ETL::Catalog.identifier_to_klass(phase: phase, identifier: identifier)
38
+ rescue Chronicle::ETL::ProviderNotAvailableError => e
39
+ warn(e.message.red)
40
+ warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{e.provider}`")
41
+ exit(false)
42
+ rescue Chronicle::ETL::ConnectorNotAvailableError => e
43
+ warn(e.message.red)
44
+ exit(false)
56
45
  end
57
46
  end
@@ -0,0 +1,11 @@
1
+ require 'json'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ class JsonTransformer < Chronicle::ETL::Transformer
6
+ def transform data
7
+ return JSON.parse(data)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,10 @@
1
+ module Chronicle
2
+ module ETL
3
+ class NullTransformer < Chronicle::ETL::Transformer
4
+ def transform
5
+ return @data
6
+ end
7
+ end
8
+
9
+ end
10
+ end
@@ -1,18 +1,35 @@
1
1
  module Chronicle
2
- module Etl
3
- module Transformers
4
- class Transformer
5
- def initialize(options = {})
6
- @options = options
7
- end
2
+ module ETL
3
+ # Abstract class representing an Transformer for an ETL job
4
+ class Transformer
5
+ extend Chronicle::ETL::Catalog
8
6
 
9
- def transform data
10
- raise NotImplementedError
11
- end
7
+ # Construct a new instance of this transformer. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Transformer
11
+ def initialize(options = {}, data)
12
+ @options = options
13
+ @data = data
12
14
  end
15
+
16
+ # The main entrypoint for transforming a record. Called by a Runner on each extracted record
17
+ def transform
18
+ raise NotImplementedError
19
+ end
20
+
21
+ # The domain or provider-specific id of the record this transformer is working on.
22
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
23
+ # data source from the beginning.
24
+ def id; end
25
+
26
+ # The domain or provider-specific timestamp of the record this transformer is working on.
27
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
28
+ # data source from the beginning.
29
+ def timestamp; end
13
30
  end
14
31
  end
15
32
  end
16
33
 
17
- require_relative 'null'
18
- require_relative 'json'
34
+ require_relative 'json_transformer'
35
+ require_relative 'null_transformer'
@@ -0,0 +1,76 @@
1
+ require 'tty/progressbar'
2
+ require 'colorize'
3
+
4
+ module Chronicle
5
+ module ETL
6
+ module Utils
7
+
8
+ class ProgressBar
9
+ FORMAT_WITH_TOTAL = [
10
+ ':bar ',
11
+ ':percent'.light_white,
12
+ ' | '.light_black,
13
+ ':current'.light_white,
14
+ '/'.light_black,
15
+ ':total'.light_white,
16
+ ' ('.light_black,
17
+ 'ELAPSED:'.light_black,
18
+ ':elapsed'.light_white,
19
+ ' | ETA:'.light_black,
20
+ ':eta'.light_white,
21
+ ' | RATE: '.light_black,
22
+ ':mean_rate'.light_white,
23
+ '/s) '.light_black
24
+ ].join.freeze
25
+
26
+ FORMAT_WITHOUT_TOTAL = [
27
+ ':current'.light_white,
28
+ '/'.light_black,
29
+ '???'.light_white,
30
+ ' ('.light_black,
31
+ 'ELAPSED:'.light_black,
32
+ ':elapsed'.light_white,
33
+ ' | ETA:'.light_black,
34
+ '??:??'.light_white,
35
+ ' | RATE: '.light_black,
36
+ ':mean_rate'.light_white,
37
+ '/s) '.light_black
38
+ ].join.freeze
39
+
40
+ def initialize(title: 'Loading', total:)
41
+ opts = {
42
+ clear: true,
43
+ complete: '▓'.light_blue,
44
+ incomplete: '░'.blue,
45
+ frequency: 10
46
+ }
47
+
48
+ if total
49
+ opts[:total] = total
50
+ format_str = "#{title} #{FORMAT_WITH_TOTAL}"
51
+ @pbar = TTY::ProgressBar.new(FORMAT_WITH_TOTAL, opts)
52
+ else
53
+ format_str = "#{title} #{FORMAT_WITHOUT_TOTAL}"
54
+ opts[:no_width] = true
55
+ end
56
+
57
+ @pbar = TTY::ProgressBar.new(format_str, opts)
58
+
59
+ @pbar.resize
60
+ end
61
+
62
+ def increment
63
+ @pbar.advance(1)
64
+ end
65
+
66
+ def log(message)
67
+ @pbar.log message.inspect
68
+ end
69
+
70
+ def finish
71
+ @pbar.finish
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
- module Etl
3
- VERSION = "0.1.2"
2
+ module ETL
3
+ VERSION = "0.2.2"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-02 00:00:00.000000000 Z
11
+ date: 2020-08-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -39,75 +39,75 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.8.1
41
41
  - !ruby/object:Gem::Dependency
42
- name: table_print
42
+ name: tty-table
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '0.11'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '0.11'
55
55
  - !ruby/object:Gem::Dependency
56
- name: ruby-progressbar
56
+ name: tty-progressbar
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.10'
61
+ version: '0.17'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.10'
68
+ version: '0.17'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '1.17'
75
+ version: '2.1'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '1.17'
82
+ version: '2.1'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: rake
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '10.0'
89
+ version: '13.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '10.0'
96
+ version: '13.0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: rspec
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '3.0'
103
+ version: '3.9'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '3.0'
110
+ version: '3.9'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: pry-byebug
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +122,34 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '3.9'
125
+ - !ruby/object:Gem::Dependency
126
+ name: runcom
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '6.2'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '6.2'
139
+ - !ruby/object:Gem::Dependency
140
+ name: redcarpet
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '3.5'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '3.5'
125
153
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
126
154
  transformer it, and load it.
127
155
  email:
@@ -133,7 +161,10 @@ extra_rdoc_files: []
133
161
  files:
134
162
  - ".gitignore"
135
163
  - ".rspec"
164
+ - ".rubocop.yml"
165
+ - ".ruby-version"
136
166
  - ".travis.yml"
167
+ - ".yardopts"
137
168
  - CHANGELOG.md
138
169
  - CODE_OF_CONDUCT.md
139
170
  - Gemfile
@@ -146,19 +177,27 @@ files:
146
177
  - chronicle-etl.gemspec
147
178
  - exe/chronicle-etl
148
179
  - lib/chronicle/etl.rb
149
- - lib/chronicle/etl/cli.rb
150
- - lib/chronicle/etl/extractors/csv.rb
180
+ - lib/chronicle/etl/catalog.rb
181
+ - lib/chronicle/etl/cli/connectors.rb
182
+ - lib/chronicle/etl/cli/jobs.rb
183
+ - lib/chronicle/etl/cli/main.rb
184
+ - lib/chronicle/etl/cli/subcommand_base.rb
185
+ - lib/chronicle/etl/config.rb
186
+ - lib/chronicle/etl/exceptions.rb
187
+ - lib/chronicle/etl/extractors/csv_extractor.rb
151
188
  - lib/chronicle/etl/extractors/extractor.rb
152
- - lib/chronicle/etl/extractors/stdin.rb
153
- - lib/chronicle/etl/loaders/csv.rb
189
+ - lib/chronicle/etl/extractors/file_extractor.rb
190
+ - lib/chronicle/etl/extractors/stdin_extractor.rb
191
+ - lib/chronicle/etl/loaders/csv_loader.rb
154
192
  - lib/chronicle/etl/loaders/loader.rb
155
- - lib/chronicle/etl/loaders/stdout.rb
156
- - lib/chronicle/etl/loaders/table.rb
193
+ - lib/chronicle/etl/loaders/rest_loader.rb
194
+ - lib/chronicle/etl/loaders/stdout_loader.rb
195
+ - lib/chronicle/etl/loaders/table_loader.rb
157
196
  - lib/chronicle/etl/runner.rb
158
- - lib/chronicle/etl/transformers/json.rb
159
- - lib/chronicle/etl/transformers/null.rb
197
+ - lib/chronicle/etl/transformers/json_transformer.rb
198
+ - lib/chronicle/etl/transformers/null_transformer.rb
160
199
  - lib/chronicle/etl/transformers/transformer.rb
161
- - lib/chronicle/etl/utils/progress_bar_wrapper.rb
200
+ - lib/chronicle/etl/utils/progress_bar.rb
162
201
  - lib/chronicle/etl/version.rb
163
202
  homepage: https://github.com/chronicle-app
164
203
  licenses:
@@ -167,7 +206,7 @@ metadata:
167
206
  homepage_uri: https://github.com/chronicle-app
168
207
  source_code_uri: https://github.com/chronicle-app/chronicle-etl
169
208
  changelog_uri: https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md
170
- post_install_message:
209
+ post_install_message:
171
210
  rdoc_options: []
172
211
  require_paths:
173
212
  - lib
@@ -182,8 +221,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
182
221
  - !ruby/object:Gem::Version
183
222
  version: '0'
184
223
  requirements: []
185
- rubygems_version: 3.0.3
186
- signing_key:
224
+ rubygems_version: 3.1.2
225
+ signing_key:
187
226
  specification_version: 4
188
227
  summary: ETL tool for personal data
189
228
  test_files: []