chronicle-etl 0.1.4 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile.lock +15 -1
  5. data/README.md +31 -13
  6. data/chronicle-etl.gemspec +6 -1
  7. data/exe/chronicle-etl +2 -2
  8. data/lib/chronicle/etl.rb +15 -2
  9. data/lib/chronicle/etl/catalog.rb +67 -17
  10. data/lib/chronicle/etl/cli/connectors.rb +32 -0
  11. data/lib/chronicle/etl/cli/jobs.rb +116 -0
  12. data/lib/chronicle/etl/cli/main.rb +83 -0
  13. data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
  14. data/lib/chronicle/etl/config.rb +53 -0
  15. data/lib/chronicle/etl/exceptions.rb +19 -0
  16. data/lib/chronicle/etl/extractors/csv_extractor.rb +2 -3
  17. data/lib/chronicle/etl/extractors/extractor.rb +21 -5
  18. data/lib/chronicle/etl/extractors/file_extractor.rb +2 -2
  19. data/lib/chronicle/etl/extractors/stdin_extractor.rb +2 -2
  20. data/lib/chronicle/etl/job.rb +71 -0
  21. data/lib/chronicle/etl/job_definition.rb +51 -0
  22. data/lib/chronicle/etl/job_log.rb +85 -0
  23. data/lib/chronicle/etl/job_logger.rb +78 -0
  24. data/lib/chronicle/etl/loaders/csv_loader.rb +4 -8
  25. data/lib/chronicle/etl/loaders/loader.rb +11 -2
  26. data/lib/chronicle/etl/loaders/rest_loader.rb +33 -0
  27. data/lib/chronicle/etl/loaders/stdout_loader.rb +5 -5
  28. data/lib/chronicle/etl/loaders/table_loader.rb +7 -6
  29. data/lib/chronicle/etl/models/activity.rb +15 -0
  30. data/lib/chronicle/etl/models/base.rb +103 -0
  31. data/lib/chronicle/etl/models/entity.rb +15 -0
  32. data/lib/chronicle/etl/models/generic.rb +23 -0
  33. data/lib/chronicle/etl/runner.rb +24 -46
  34. data/lib/chronicle/etl/transformers/null_transformer.rb +5 -6
  35. data/lib/chronicle/etl/transformers/transformer.rb +23 -7
  36. data/lib/chronicle/etl/utils/hash_utilities.rb +19 -0
  37. data/lib/chronicle/etl/utils/jsonapi.rb +28 -0
  38. data/lib/chronicle/etl/utils/progress_bar.rb +2 -2
  39. data/lib/chronicle/etl/version.rb +2 -2
  40. metadata +91 -5
  41. data/CHANGELOG.md +0 -23
  42. data/lib/chronicle/etl/cli.rb +0 -56
  43. data/lib/chronicle/etl/transformers/json_transformer.rb +0 -11
@@ -1,10 +1,9 @@
1
1
  module Chronicle
2
- module Etl
3
- class NullTransformer < Chronicle::Etl::Transformer
4
- def transform data
5
- return data
2
+ module ETL
3
+ class NullTransformer < Chronicle::ETL::Transformer
4
+ def transform
5
+ Chronicle::ETL::Models::Generic.new(@data)
6
6
  end
7
7
  end
8
-
9
8
  end
10
- end
9
+ end
@@ -1,18 +1,34 @@
1
1
  module Chronicle
2
- module Etl
2
+ module ETL
3
+ # Abstract class representing an Transformer for an ETL job
3
4
  class Transformer
4
- extend Chronicle::Etl::Catalog
5
+ extend Chronicle::ETL::Catalog
5
6
 
6
- def initialize(options = {})
7
+ # Construct a new instance of this transformer. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Transformer
11
+ def initialize(options = {}, data)
7
12
  @options = options
13
+ @data = data
14
+ @record = Chronicle::ETL::Models::Activity.new
8
15
  end
9
16
 
10
- def transform data
11
- raise NotImplementedError
12
- end
17
+ # @abstract Subclass is expected to implement #transform
18
+ # @!method transform
19
+ # The main entrypoint for transforming a record. Called by a Runner on each extracted record
20
+
21
+ # The domain or provider-specific id of the record this transformer is working on.
22
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
23
+ # data source from the beginning.
24
+ def id; end
25
+
26
+ # The domain or provider-specific timestamp of the record this transformer is working on.
27
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
28
+ # data source from the beginning.
29
+ def timestamp; end
13
30
  end
14
31
  end
15
32
  end
16
33
 
17
- require_relative 'json_transformer'
18
34
  require_relative 'null_transformer'
@@ -0,0 +1,19 @@
1
+ module Chronicle
2
+ module ETL
3
+ module Utils
4
+ module HashUtilities
5
+ def self.flatten_hash(hash)
6
+ hash.each_with_object({}) do |(k, v), h|
7
+ if v.is_a? Hash
8
+ flatten_hash(v).map do |h_k, h_v|
9
+ h["#{k}.#{h_k}".to_sym] = h_v
10
+ end
11
+ else
12
+ h[k] = v
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,28 @@
1
+ module Chronicle
2
+ module ETL
3
+ module Utils
4
+ module JSONAPI
5
+ # For a given Chronicle::ETL::Model, serialize it as jsonapi
6
+ def self.serialize(record)
7
+ return unless record.is_a? Chronicle::ETL::Models::Base
8
+
9
+ obj = record.identifier_hash
10
+ obj[:attributes] = record.attributes
11
+
12
+ relationships = Hash[record.associations.map do |k, v|
13
+ if v.is_a?(Array)
14
+ data = { data: v.map{ |association| serialize(association) } }
15
+ else
16
+ data = { data: serialize(v) }
17
+ end
18
+
19
+ [k, data]
20
+ end]
21
+
22
+ obj[:relationships] = relationships if relationships.any?
23
+ obj
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -2,7 +2,7 @@ require 'tty/progressbar'
2
2
  require 'colorize'
3
3
 
4
4
  module Chronicle
5
- module Etl
5
+ module ETL
6
6
  module Utils
7
7
 
8
8
  class ProgressBar
@@ -64,7 +64,7 @@ module Chronicle
64
64
  end
65
65
 
66
66
  def log(message)
67
- @pbar.log message.inspect
67
+ @pbar.log message
68
68
  end
69
69
 
70
70
  def finish
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
- module Etl
3
- VERSION = "0.1.4"
2
+ module ETL
3
+ VERSION = "0.2.4"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-18 00:00:00.000000000 Z
11
+ date: 2020-09-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -66,6 +66,34 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.17'
69
+ - !ruby/object:Gem::Dependency
70
+ name: sequel
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '5.35'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '5.35'
83
+ - !ruby/object:Gem::Dependency
84
+ name: deep_merge
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.2'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.2'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: bundler
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +150,48 @@ dependencies:
122
150
  - - "~>"
123
151
  - !ruby/object:Gem::Version
124
152
  version: '3.9'
153
+ - !ruby/object:Gem::Dependency
154
+ name: runcom
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '6.2'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '6.2'
167
+ - !ruby/object:Gem::Dependency
168
+ name: redcarpet
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '3.5'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '3.5'
181
+ - !ruby/object:Gem::Dependency
182
+ name: sqlite3
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: '1.4'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: '1.4'
125
195
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
126
196
  transformer it, and load it.
127
197
  email:
@@ -133,9 +203,10 @@ extra_rdoc_files: []
133
203
  files:
134
204
  - ".gitignore"
135
205
  - ".rspec"
206
+ - ".rubocop.yml"
136
207
  - ".ruby-version"
137
208
  - ".travis.yml"
138
- - CHANGELOG.md
209
+ - ".yardopts"
139
210
  - CODE_OF_CONDUCT.md
140
211
  - Gemfile
141
212
  - Gemfile.lock
@@ -148,19 +219,34 @@ files:
148
219
  - exe/chronicle-etl
149
220
  - lib/chronicle/etl.rb
150
221
  - lib/chronicle/etl/catalog.rb
151
- - lib/chronicle/etl/cli.rb
222
+ - lib/chronicle/etl/cli/connectors.rb
223
+ - lib/chronicle/etl/cli/jobs.rb
224
+ - lib/chronicle/etl/cli/main.rb
225
+ - lib/chronicle/etl/cli/subcommand_base.rb
226
+ - lib/chronicle/etl/config.rb
227
+ - lib/chronicle/etl/exceptions.rb
152
228
  - lib/chronicle/etl/extractors/csv_extractor.rb
153
229
  - lib/chronicle/etl/extractors/extractor.rb
154
230
  - lib/chronicle/etl/extractors/file_extractor.rb
155
231
  - lib/chronicle/etl/extractors/stdin_extractor.rb
232
+ - lib/chronicle/etl/job.rb
233
+ - lib/chronicle/etl/job_definition.rb
234
+ - lib/chronicle/etl/job_log.rb
235
+ - lib/chronicle/etl/job_logger.rb
156
236
  - lib/chronicle/etl/loaders/csv_loader.rb
157
237
  - lib/chronicle/etl/loaders/loader.rb
238
+ - lib/chronicle/etl/loaders/rest_loader.rb
158
239
  - lib/chronicle/etl/loaders/stdout_loader.rb
159
240
  - lib/chronicle/etl/loaders/table_loader.rb
241
+ - lib/chronicle/etl/models/activity.rb
242
+ - lib/chronicle/etl/models/base.rb
243
+ - lib/chronicle/etl/models/entity.rb
244
+ - lib/chronicle/etl/models/generic.rb
160
245
  - lib/chronicle/etl/runner.rb
161
- - lib/chronicle/etl/transformers/json_transformer.rb
162
246
  - lib/chronicle/etl/transformers/null_transformer.rb
163
247
  - lib/chronicle/etl/transformers/transformer.rb
248
+ - lib/chronicle/etl/utils/hash_utilities.rb
249
+ - lib/chronicle/etl/utils/jsonapi.rb
164
250
  - lib/chronicle/etl/utils/progress_bar.rb
165
251
  - lib/chronicle/etl/version.rb
166
252
  homepage: https://github.com/chronicle-app
@@ -1,23 +0,0 @@
1
- # Changelog
2
-
3
- This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
4
-
5
- ## [0.1.4] - 2020-08-18
6
- ### Updated
7
- - Better display of available ETL classes
8
- - Updated documentation
9
-
10
- ## [0.1.3] - 2020-08-13
11
- ### Added
12
- - Ability to list all available ETL classes
13
- - Refactored E, T, L module and class structure
14
- - Better progress bar
15
-
16
- ## [0.1.2] - 2020-08-02
17
- ### Added
18
- - This changelog
19
- - Ability to use extractors, transformers, and loaders from other gems
20
-
21
- ## [0.1.0] - 2020-08-01
22
- ### Added
23
- - Basic job runner and ETL classes
@@ -1,56 +0,0 @@
1
- require 'thor'
2
- require 'chronicle/etl'
3
- require 'colorize'
4
-
5
- module Chronicle
6
- module Etl
7
- class CLI < Thor
8
- default_task :job
9
-
10
- desc 'job', 'Runs an ETL job'
11
- method_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
12
- method_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
13
- method_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
14
- method_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
15
- method_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
16
- method_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
17
- method_option :job, aliases: '-j', desc: 'Job configuration file'
18
-
19
- def job
20
- runner_options = {
21
- extractor: {
22
- name: options[:extractor],
23
- options: options[:'extractor-opts']
24
- },
25
- transformer: {
26
- name: options[:transformer],
27
- options: options[:'transformer-opts']
28
- },
29
- loader: {
30
- name: options[:loader],
31
- options: options[:'loader-opts']
32
- }
33
- }
34
-
35
- runner = Runner.new(runner_options)
36
- runner.run!
37
- end
38
-
39
- # FIXME: namespace this differently
40
- desc 'list', 'List all ETL classes'
41
- def list
42
- klasses = Chronicle::Etl::Catalog.available_classes
43
- klasses = klasses.sort_by do |a|
44
- [a[:built_in].to_s, a[:provider], a[:phase]]
45
- end
46
-
47
- headers = klasses.first.keys.map do |key|
48
- key.to_s.capitalize.light_white
49
- end
50
-
51
- table = TTY::Table.new(headers, klasses.map(&:values))
52
- puts table.render(padding: [0, 2])
53
- end
54
- end
55
- end
56
- end
@@ -1,11 +0,0 @@
1
- require 'json'
2
-
3
- module Chronicle
4
- module Etl
5
- class JsonTransformer < Chronicle::Etl::Transformer
6
- def transform data
7
- return JSON.parse(data)
8
- end
9
- end
10
- end
11
- end