chronicle-etl 0.1.2 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -0
  3. data/.ruby-version +1 -0
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +11 -0
  6. data/Gemfile.lock +42 -10
  7. data/README.md +64 -11
  8. data/bin/console +16 -4
  9. data/chronicle-etl.gemspec +8 -6
  10. data/exe/chronicle-etl +2 -2
  11. data/lib/chronicle/etl.rb +6 -2
  12. data/lib/chronicle/etl/catalog.rb +102 -0
  13. data/lib/chronicle/etl/cli/connectors.rb +32 -0
  14. data/lib/chronicle/etl/cli/jobs.rb +110 -0
  15. data/lib/chronicle/etl/cli/main.rb +83 -0
  16. data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
  17. data/lib/chronicle/etl/config.rb +32 -0
  18. data/lib/chronicle/etl/exceptions.rb +17 -0
  19. data/lib/chronicle/etl/extractors/{csv.rb → csv_extractor.rb} +3 -3
  20. data/lib/chronicle/etl/extractors/extractor.rb +23 -12
  21. data/lib/chronicle/etl/extractors/file_extractor.rb +52 -0
  22. data/lib/chronicle/etl/extractors/stdin_extractor.rb +11 -0
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +29 -0
  24. data/lib/chronicle/etl/loaders/loader.rb +23 -16
  25. data/lib/chronicle/etl/loaders/rest_loader.rb +30 -0
  26. data/lib/chronicle/etl/loaders/stdout_loader.rb +9 -0
  27. data/lib/chronicle/etl/loaders/table_loader.rb +21 -0
  28. data/lib/chronicle/etl/runner.rb +27 -38
  29. data/lib/chronicle/etl/transformers/json_transformer.rb +11 -0
  30. data/lib/chronicle/etl/transformers/null_transformer.rb +10 -0
  31. data/lib/chronicle/etl/transformers/transformer.rb +28 -11
  32. data/lib/chronicle/etl/utils/progress_bar.rb +76 -0
  33. data/lib/chronicle/etl/version.rb +2 -2
  34. metadata +68 -29
  35. data/lib/chronicle/etl/cli.rb +0 -38
  36. data/lib/chronicle/etl/extractors/stdin.rb +0 -13
  37. data/lib/chronicle/etl/loaders/csv.rb +0 -31
  38. data/lib/chronicle/etl/loaders/stdout.rb +0 -11
  39. data/lib/chronicle/etl/loaders/table.rb +0 -22
  40. data/lib/chronicle/etl/transformers/json.rb +0 -13
  41. data/lib/chronicle/etl/transformers/null.rb +0 -11
  42. data/lib/chronicle/etl/utils/progress_bar_wrapper.rb +0 -43
@@ -1,25 +1,32 @@
1
1
  module Chronicle
2
- module Etl
3
- module Loaders
4
- class Loader
5
- def initialize(options = {})
6
- @options = options
7
- end
2
+ module ETL
3
+ # Abstract class representing a Loader for an ETL job
4
+ class Loader
5
+ extend Chronicle::ETL::Catalog
8
6
 
9
- def start; end
10
-
11
- def first_load; end
7
+ # Construct a new instance of this loader. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Loader
11
+ def initialize(options = {})
12
+ @options = options
13
+ end
12
14
 
13
- def load
14
- raise NotImplementedError
15
- end
15
+ # Called once before processing records
16
+ def start; end
16
17
 
17
- def finish; end
18
+ # Load a single record
19
+ def load
20
+ raise NotImplementedError
18
21
  end
22
+
23
+ # Called once there are no more records to process
24
+ def finish; end
19
25
  end
20
26
  end
21
27
  end
22
28
 
23
- require_relative 'stdout'
24
- require_relative 'csv'
25
- require_relative 'table'
29
+ require_relative 'csv_loader'
30
+ require_relative 'rest_loader'
31
+ require_relative 'stdout_loader'
32
+ require_relative 'table_loader'
@@ -0,0 +1,30 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'json'
4
+
5
+ module Chronicle
6
+ module ETL
7
+ class RestLoader < Chronicle::ETL::Loader
8
+ def initialize(options={})
9
+ super(options)
10
+ end
11
+
12
+ def load(result)
13
+ uri = URI.parse("#{@options[:hostname]}#{@options[:endpoint]}")
14
+
15
+ header = {
16
+ "Authorization" => "Bearer #{@options[:access_token]}",
17
+ "Content-Type": 'application/json'
18
+ }
19
+
20
+ http = Net::HTTP.new(uri.host, uri.port)
21
+ request = Net::HTTP::Post.new(uri.request_uri, header)
22
+
23
+ obj = {data: result} unless result[:data]
24
+ request.body = obj.to_json
25
+
26
+ response = http.request(request)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,9 @@
1
+ module Chronicle
2
+ module ETL
3
+ class StdoutLoader < Chronicle::ETL::Loader
4
+ def load(result)
5
+ puts result.inspect
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,21 @@
1
+ require 'tty/table'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ class TableLoader < Chronicle::ETL::Loader
6
+ def initialize(options)
7
+ super(options)
8
+ end
9
+
10
+ def load(result)
11
+ @table ||= TTY::Table.new(header: result.keys)
12
+ values = result.values.map{|x| x.to_s[0..30]}
13
+ @table << values
14
+ end
15
+
16
+ def finish
17
+ puts @table.render(:ascii, padding: [0, 1])
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,57 +1,46 @@
1
- class Chronicle::Etl::Runner
2
- BUILTIN = {
3
- extractor: ['stdin', 'json', 'csv'],
4
- transformer: ['null'],
5
- loader: ['stdout', 'csv', 'table']
6
- }.freeze
7
-
8
- def initialize(options)
9
- @options = options
1
+ require 'colorize'
10
2
 
11
- instantiate_etl_classes
3
+ class Chronicle::ETL::Runner
4
+ def initialize(options = {})
5
+ @options = options
12
6
  end
13
7
 
14
8
  def run!
15
- progress_bar = Chronicle::Etl::Utils::ProgressBarWrapper.new(@extractor.results_count)
16
- @loader.start
9
+ extractor = instantiate_klass(:extractor)
10
+ loader = instantiate_klass(:loader)
17
11
 
18
- @extractor.extract do |result, i|
19
- @loader.first_load(result) if i == 0
12
+ total = extractor.results_count
13
+ progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
20
14
 
21
- transformed_data = @transformer.transform(result)
22
- @loader.load(transformed_data)
15
+ loader.start
23
16
 
17
+ extractor.extract do |data, metadata|
18
+ transformer = instantiate_klass(:transformer, data)
19
+ transformed_data = transformer.transform
20
+
21
+ loader.load(transformed_data)
24
22
  progress_bar.increment
25
23
  end
26
24
 
27
25
  progress_bar.finish
28
- @loader.finish
26
+ loader.finish
29
27
  end
30
28
 
31
29
  private
32
30
 
33
- def instantiate_etl_classes
34
- @extractor = load_etl_class(:extractor, @options[:extractor][:name]).new(@options[:extractor][:options])
35
- @transformer = load_etl_class(:transformer, @options[:transformer][:name]).new(@options[:transformer][:options])
36
- @loader = load_etl_class(:loader, @options[:loader][:name]).new(@options[:loader][:options])
31
+ def instantiate_klass(phase, *args)
32
+ klass = load_etl_class(phase, @options[phase][:name])
33
+ klass.new(@options[phase][:options], *args)
37
34
  end
38
35
 
39
- def load_etl_class(phase, name)
40
- if BUILTIN[phase].include? name
41
- klass_name = "Chronicle::Etl::#{phase.to_s.capitalize}s::#{name.capitalize}"
42
- else
43
- # TODO: come up with syntax for specifying a particular extractor in a provider library
44
- # provider, extractor = name.split(":")
45
- provider = name
46
- begin
47
- require "chronicle/#{provider}"
48
- rescue LoadError => e
49
- warn("Error loading #{phase} '#{provider}'")
50
- warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{provider}`")
51
- exit(false)
52
- end
53
- klass_name = "Chronicle::#{name.capitalize}::Extractor"
54
- end
55
- Object.const_get(klass_name)
36
+ def load_etl_class(phase, identifier)
37
+ Chronicle::ETL::Catalog.identifier_to_klass(phase: phase, identifier: identifier)
38
+ rescue Chronicle::ETL::ProviderNotAvailableError => e
39
+ warn(e.message.red)
40
+ warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{e.provider}`")
41
+ exit(false)
42
+ rescue Chronicle::ETL::ConnectorNotAvailableError => e
43
+ warn(e.message.red)
44
+ exit(false)
56
45
  end
57
46
  end
@@ -0,0 +1,11 @@
1
+ require 'json'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ class JsonTransformer < Chronicle::ETL::Transformer
6
+ def transform data
7
+ return JSON.parse(data)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,10 @@
1
+ module Chronicle
2
+ module ETL
3
+ class NullTransformer < Chronicle::ETL::Transformer
4
+ def transform
5
+ return @data
6
+ end
7
+ end
8
+
9
+ end
10
+ end
@@ -1,18 +1,35 @@
1
1
  module Chronicle
2
- module Etl
3
- module Transformers
4
- class Transformer
5
- def initialize(options = {})
6
- @options = options
7
- end
2
+ module ETL
3
+ # Abstract class representing an Transformer for an ETL job
4
+ class Transformer
5
+ extend Chronicle::ETL::Catalog
8
6
 
9
- def transform data
10
- raise NotImplementedError
11
- end
7
+ # Construct a new instance of this transformer. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Transformer
11
+ def initialize(options = {}, data)
12
+ @options = options
13
+ @data = data
12
14
  end
15
+
16
+ # The main entrypoint for transforming a record. Called by a Runner on each extracted record
17
+ def transform
18
+ raise NotImplementedError
19
+ end
20
+
21
+ # The domain or provider-specific id of the record this transformer is working on.
22
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
23
+ # data source from the beginning.
24
+ def id; end
25
+
26
+ # The domain or provider-specific timestamp of the record this transformer is working on.
27
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
28
+ # data source from the beginning.
29
+ def timestamp; end
13
30
  end
14
31
  end
15
32
  end
16
33
 
17
- require_relative 'null'
18
- require_relative 'json'
34
+ require_relative 'json_transformer'
35
+ require_relative 'null_transformer'
@@ -0,0 +1,76 @@
1
+ require 'tty/progressbar'
2
+ require 'colorize'
3
+
4
+ module Chronicle
5
+ module ETL
6
+ module Utils
7
+
8
+ class ProgressBar
9
+ FORMAT_WITH_TOTAL = [
10
+ ':bar ',
11
+ ':percent'.light_white,
12
+ ' | '.light_black,
13
+ ':current'.light_white,
14
+ '/'.light_black,
15
+ ':total'.light_white,
16
+ ' ('.light_black,
17
+ 'ELAPSED:'.light_black,
18
+ ':elapsed'.light_white,
19
+ ' | ETA:'.light_black,
20
+ ':eta'.light_white,
21
+ ' | RATE: '.light_black,
22
+ ':mean_rate'.light_white,
23
+ '/s) '.light_black
24
+ ].join.freeze
25
+
26
+ FORMAT_WITHOUT_TOTAL = [
27
+ ':current'.light_white,
28
+ '/'.light_black,
29
+ '???'.light_white,
30
+ ' ('.light_black,
31
+ 'ELAPSED:'.light_black,
32
+ ':elapsed'.light_white,
33
+ ' | ETA:'.light_black,
34
+ '??:??'.light_white,
35
+ ' | RATE: '.light_black,
36
+ ':mean_rate'.light_white,
37
+ '/s) '.light_black
38
+ ].join.freeze
39
+
40
+ def initialize(title: 'Loading', total:)
41
+ opts = {
42
+ clear: true,
43
+ complete: '▓'.light_blue,
44
+ incomplete: '░'.blue,
45
+ frequency: 10
46
+ }
47
+
48
+ if total
49
+ opts[:total] = total
50
+ format_str = "#{title} #{FORMAT_WITH_TOTAL}"
51
+ @pbar = TTY::ProgressBar.new(FORMAT_WITH_TOTAL, opts)
52
+ else
53
+ format_str = "#{title} #{FORMAT_WITHOUT_TOTAL}"
54
+ opts[:no_width] = true
55
+ end
56
+
57
+ @pbar = TTY::ProgressBar.new(format_str, opts)
58
+
59
+ @pbar.resize
60
+ end
61
+
62
+ def increment
63
+ @pbar.advance(1)
64
+ end
65
+
66
+ def log(message)
67
+ @pbar.log message.inspect
68
+ end
69
+
70
+ def finish
71
+ @pbar.finish
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
- module Etl
3
- VERSION = "0.1.2"
2
+ module ETL
3
+ VERSION = "0.2.2"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-02 00:00:00.000000000 Z
11
+ date: 2020-08-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -39,75 +39,75 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.8.1
41
41
  - !ruby/object:Gem::Dependency
42
- name: table_print
42
+ name: tty-table
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '0.11'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '0.11'
55
55
  - !ruby/object:Gem::Dependency
56
- name: ruby-progressbar
56
+ name: tty-progressbar
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.10'
61
+ version: '0.17'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.10'
68
+ version: '0.17'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '1.17'
75
+ version: '2.1'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '1.17'
82
+ version: '2.1'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: rake
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '10.0'
89
+ version: '13.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '10.0'
96
+ version: '13.0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: rspec
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '3.0'
103
+ version: '3.9'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '3.0'
110
+ version: '3.9'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: pry-byebug
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +122,34 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '3.9'
125
+ - !ruby/object:Gem::Dependency
126
+ name: runcom
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '6.2'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '6.2'
139
+ - !ruby/object:Gem::Dependency
140
+ name: redcarpet
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '3.5'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '3.5'
125
153
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
126
154
  transformer it, and load it.
127
155
  email:
@@ -133,7 +161,10 @@ extra_rdoc_files: []
133
161
  files:
134
162
  - ".gitignore"
135
163
  - ".rspec"
164
+ - ".rubocop.yml"
165
+ - ".ruby-version"
136
166
  - ".travis.yml"
167
+ - ".yardopts"
137
168
  - CHANGELOG.md
138
169
  - CODE_OF_CONDUCT.md
139
170
  - Gemfile
@@ -146,19 +177,27 @@ files:
146
177
  - chronicle-etl.gemspec
147
178
  - exe/chronicle-etl
148
179
  - lib/chronicle/etl.rb
149
- - lib/chronicle/etl/cli.rb
150
- - lib/chronicle/etl/extractors/csv.rb
180
+ - lib/chronicle/etl/catalog.rb
181
+ - lib/chronicle/etl/cli/connectors.rb
182
+ - lib/chronicle/etl/cli/jobs.rb
183
+ - lib/chronicle/etl/cli/main.rb
184
+ - lib/chronicle/etl/cli/subcommand_base.rb
185
+ - lib/chronicle/etl/config.rb
186
+ - lib/chronicle/etl/exceptions.rb
187
+ - lib/chronicle/etl/extractors/csv_extractor.rb
151
188
  - lib/chronicle/etl/extractors/extractor.rb
152
- - lib/chronicle/etl/extractors/stdin.rb
153
- - lib/chronicle/etl/loaders/csv.rb
189
+ - lib/chronicle/etl/extractors/file_extractor.rb
190
+ - lib/chronicle/etl/extractors/stdin_extractor.rb
191
+ - lib/chronicle/etl/loaders/csv_loader.rb
154
192
  - lib/chronicle/etl/loaders/loader.rb
155
- - lib/chronicle/etl/loaders/stdout.rb
156
- - lib/chronicle/etl/loaders/table.rb
193
+ - lib/chronicle/etl/loaders/rest_loader.rb
194
+ - lib/chronicle/etl/loaders/stdout_loader.rb
195
+ - lib/chronicle/etl/loaders/table_loader.rb
157
196
  - lib/chronicle/etl/runner.rb
158
- - lib/chronicle/etl/transformers/json.rb
159
- - lib/chronicle/etl/transformers/null.rb
197
+ - lib/chronicle/etl/transformers/json_transformer.rb
198
+ - lib/chronicle/etl/transformers/null_transformer.rb
160
199
  - lib/chronicle/etl/transformers/transformer.rb
161
- - lib/chronicle/etl/utils/progress_bar_wrapper.rb
200
+ - lib/chronicle/etl/utils/progress_bar.rb
162
201
  - lib/chronicle/etl/version.rb
163
202
  homepage: https://github.com/chronicle-app
164
203
  licenses:
@@ -167,7 +206,7 @@ metadata:
167
206
  homepage_uri: https://github.com/chronicle-app
168
207
  source_code_uri: https://github.com/chronicle-app/chronicle-etl
169
208
  changelog_uri: https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md
170
- post_install_message:
209
+ post_install_message:
171
210
  rdoc_options: []
172
211
  require_paths:
173
212
  - lib
@@ -182,8 +221,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
182
221
  - !ruby/object:Gem::Version
183
222
  version: '0'
184
223
  requirements: []
185
- rubygems_version: 3.0.3
186
- signing_key:
224
+ rubygems_version: 3.1.2
225
+ signing_key:
187
226
  specification_version: 4
188
227
  summary: ETL tool for personal data
189
228
  test_files: []