chronicle-etl 0.1.4 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile.lock +15 -1
  5. data/README.md +31 -13
  6. data/chronicle-etl.gemspec +6 -1
  7. data/exe/chronicle-etl +2 -2
  8. data/lib/chronicle/etl.rb +15 -2
  9. data/lib/chronicle/etl/catalog.rb +67 -17
  10. data/lib/chronicle/etl/cli/connectors.rb +32 -0
  11. data/lib/chronicle/etl/cli/jobs.rb +116 -0
  12. data/lib/chronicle/etl/cli/main.rb +83 -0
  13. data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
  14. data/lib/chronicle/etl/config.rb +53 -0
  15. data/lib/chronicle/etl/exceptions.rb +19 -0
  16. data/lib/chronicle/etl/extractors/csv_extractor.rb +2 -3
  17. data/lib/chronicle/etl/extractors/extractor.rb +21 -5
  18. data/lib/chronicle/etl/extractors/file_extractor.rb +2 -2
  19. data/lib/chronicle/etl/extractors/stdin_extractor.rb +2 -2
  20. data/lib/chronicle/etl/job.rb +71 -0
  21. data/lib/chronicle/etl/job_definition.rb +51 -0
  22. data/lib/chronicle/etl/job_log.rb +85 -0
  23. data/lib/chronicle/etl/job_logger.rb +78 -0
  24. data/lib/chronicle/etl/loaders/csv_loader.rb +4 -8
  25. data/lib/chronicle/etl/loaders/loader.rb +11 -2
  26. data/lib/chronicle/etl/loaders/rest_loader.rb +33 -0
  27. data/lib/chronicle/etl/loaders/stdout_loader.rb +5 -5
  28. data/lib/chronicle/etl/loaders/table_loader.rb +7 -6
  29. data/lib/chronicle/etl/models/activity.rb +15 -0
  30. data/lib/chronicle/etl/models/base.rb +103 -0
  31. data/lib/chronicle/etl/models/entity.rb +15 -0
  32. data/lib/chronicle/etl/models/generic.rb +23 -0
  33. data/lib/chronicle/etl/runner.rb +24 -46
  34. data/lib/chronicle/etl/transformers/null_transformer.rb +5 -6
  35. data/lib/chronicle/etl/transformers/transformer.rb +23 -7
  36. data/lib/chronicle/etl/utils/hash_utilities.rb +19 -0
  37. data/lib/chronicle/etl/utils/jsonapi.rb +28 -0
  38. data/lib/chronicle/etl/utils/progress_bar.rb +2 -2
  39. data/lib/chronicle/etl/version.rb +2 -2
  40. metadata +91 -5
  41. data/CHANGELOG.md +0 -23
  42. data/lib/chronicle/etl/cli.rb +0 -56
  43. data/lib/chronicle/etl/transformers/json_transformer.rb +0 -11
@@ -1,10 +1,9 @@
1
1
  module Chronicle
2
- module Etl
3
- class NullTransformer < Chronicle::Etl::Transformer
4
- def transform data
5
- return data
2
+ module ETL
3
+ class NullTransformer < Chronicle::ETL::Transformer
4
+ def transform
5
+ Chronicle::ETL::Models::Generic.new(@data)
6
6
  end
7
7
  end
8
-
9
8
  end
10
- end
9
+ end
@@ -1,18 +1,34 @@
1
1
  module Chronicle
2
- module Etl
2
+ module ETL
3
+ # Abstract class representing an Transformer for an ETL job
3
4
  class Transformer
4
- extend Chronicle::Etl::Catalog
5
+ extend Chronicle::ETL::Catalog
5
6
 
6
- def initialize(options = {})
7
+ # Construct a new instance of this transformer. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Transformer
11
+ def initialize(options = {}, data)
7
12
  @options = options
13
+ @data = data
14
+ @record = Chronicle::ETL::Models::Activity.new
8
15
  end
9
16
 
10
- def transform data
11
- raise NotImplementedError
12
- end
17
+ # @abstract Subclass is expected to implement #transform
18
+ # @!method transform
19
+ # The main entrypoint for transforming a record. Called by a Runner on each extracted record
20
+
21
+ # The domain or provider-specific id of the record this transformer is working on.
22
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
23
+ # data source from the beginning.
24
+ def id; end
25
+
26
+ # The domain or provider-specific timestamp of the record this transformer is working on.
27
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
28
+ # data source from the beginning.
29
+ def timestamp; end
13
30
  end
14
31
  end
15
32
  end
16
33
 
17
- require_relative 'json_transformer'
18
34
  require_relative 'null_transformer'
@@ -0,0 +1,19 @@
1
+ module Chronicle
2
+ module ETL
3
+ module Utils
4
+ module HashUtilities
5
+ def self.flatten_hash(hash)
6
+ hash.each_with_object({}) do |(k, v), h|
7
+ if v.is_a? Hash
8
+ flatten_hash(v).map do |h_k, h_v|
9
+ h["#{k}.#{h_k}".to_sym] = h_v
10
+ end
11
+ else
12
+ h[k] = v
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,28 @@
1
+ module Chronicle
2
+ module ETL
3
+ module Utils
4
+ module JSONAPI
5
+ # For a given Chronicle::ETL::Model, serialize it as jsonapi
6
+ def self.serialize(record)
7
+ return unless record.is_a? Chronicle::ETL::Models::Base
8
+
9
+ obj = record.identifier_hash
10
+ obj[:attributes] = record.attributes
11
+
12
+ relationships = Hash[record.associations.map do |k, v|
13
+ if v.is_a?(Array)
14
+ data = { data: v.map{ |association| serialize(association) } }
15
+ else
16
+ data = { data: serialize(v) }
17
+ end
18
+
19
+ [k, data]
20
+ end]
21
+
22
+ obj[:relationships] = relationships if relationships.any?
23
+ obj
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -2,7 +2,7 @@ require 'tty/progressbar'
2
2
  require 'colorize'
3
3
 
4
4
  module Chronicle
5
- module Etl
5
+ module ETL
6
6
  module Utils
7
7
 
8
8
  class ProgressBar
@@ -64,7 +64,7 @@ module Chronicle
64
64
  end
65
65
 
66
66
  def log(message)
67
- @pbar.log message.inspect
67
+ @pbar.log message
68
68
  end
69
69
 
70
70
  def finish
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
- module Etl
3
- VERSION = "0.1.4"
2
+ module ETL
3
+ VERSION = "0.2.4"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-18 00:00:00.000000000 Z
11
+ date: 2020-09-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -66,6 +66,34 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.17'
69
+ - !ruby/object:Gem::Dependency
70
+ name: sequel
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '5.35'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '5.35'
83
+ - !ruby/object:Gem::Dependency
84
+ name: deep_merge
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.2'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.2'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: bundler
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +150,48 @@ dependencies:
122
150
  - - "~>"
123
151
  - !ruby/object:Gem::Version
124
152
  version: '3.9'
153
+ - !ruby/object:Gem::Dependency
154
+ name: runcom
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '6.2'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '6.2'
167
+ - !ruby/object:Gem::Dependency
168
+ name: redcarpet
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '3.5'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '3.5'
181
+ - !ruby/object:Gem::Dependency
182
+ name: sqlite3
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: '1.4'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: '1.4'
125
195
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
126
196
  transformer it, and load it.
127
197
  email:
@@ -133,9 +203,10 @@ extra_rdoc_files: []
133
203
  files:
134
204
  - ".gitignore"
135
205
  - ".rspec"
206
+ - ".rubocop.yml"
136
207
  - ".ruby-version"
137
208
  - ".travis.yml"
138
- - CHANGELOG.md
209
+ - ".yardopts"
139
210
  - CODE_OF_CONDUCT.md
140
211
  - Gemfile
141
212
  - Gemfile.lock
@@ -148,19 +219,34 @@ files:
148
219
  - exe/chronicle-etl
149
220
  - lib/chronicle/etl.rb
150
221
  - lib/chronicle/etl/catalog.rb
151
- - lib/chronicle/etl/cli.rb
222
+ - lib/chronicle/etl/cli/connectors.rb
223
+ - lib/chronicle/etl/cli/jobs.rb
224
+ - lib/chronicle/etl/cli/main.rb
225
+ - lib/chronicle/etl/cli/subcommand_base.rb
226
+ - lib/chronicle/etl/config.rb
227
+ - lib/chronicle/etl/exceptions.rb
152
228
  - lib/chronicle/etl/extractors/csv_extractor.rb
153
229
  - lib/chronicle/etl/extractors/extractor.rb
154
230
  - lib/chronicle/etl/extractors/file_extractor.rb
155
231
  - lib/chronicle/etl/extractors/stdin_extractor.rb
232
+ - lib/chronicle/etl/job.rb
233
+ - lib/chronicle/etl/job_definition.rb
234
+ - lib/chronicle/etl/job_log.rb
235
+ - lib/chronicle/etl/job_logger.rb
156
236
  - lib/chronicle/etl/loaders/csv_loader.rb
157
237
  - lib/chronicle/etl/loaders/loader.rb
238
+ - lib/chronicle/etl/loaders/rest_loader.rb
158
239
  - lib/chronicle/etl/loaders/stdout_loader.rb
159
240
  - lib/chronicle/etl/loaders/table_loader.rb
241
+ - lib/chronicle/etl/models/activity.rb
242
+ - lib/chronicle/etl/models/base.rb
243
+ - lib/chronicle/etl/models/entity.rb
244
+ - lib/chronicle/etl/models/generic.rb
160
245
  - lib/chronicle/etl/runner.rb
161
- - lib/chronicle/etl/transformers/json_transformer.rb
162
246
  - lib/chronicle/etl/transformers/null_transformer.rb
163
247
  - lib/chronicle/etl/transformers/transformer.rb
248
+ - lib/chronicle/etl/utils/hash_utilities.rb
249
+ - lib/chronicle/etl/utils/jsonapi.rb
164
250
  - lib/chronicle/etl/utils/progress_bar.rb
165
251
  - lib/chronicle/etl/version.rb
166
252
  homepage: https://github.com/chronicle-app
@@ -1,23 +0,0 @@
1
- # Changelog
2
-
3
- This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
4
-
5
- ## [0.1.4] - 2020-08-18
6
- ### Updated
7
- - Better display of available ETL classes
8
- - Updated documentation
9
-
10
- ## [0.1.3] - 2020-08-13
11
- ### Added
12
- - Ability to list all available ETL classes
13
- - Refactored E, T, L module and class structure
14
- - Better progress bar
15
-
16
- ## [0.1.2] - 2020-08-02
17
- ### Added
18
- - This changelog
19
- - Ability to use extractors, transformers, and loaders from other gems
20
-
21
- ## [0.1.0] - 2020-08-01
22
- ### Added
23
- - Basic job runner and ETL classes
@@ -1,56 +0,0 @@
1
- require 'thor'
2
- require 'chronicle/etl'
3
- require 'colorize'
4
-
5
- module Chronicle
6
- module Etl
7
- class CLI < Thor
8
- default_task :job
9
-
10
- desc 'job', 'Runs an ETL job'
11
- method_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
12
- method_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
13
- method_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
14
- method_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
15
- method_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
16
- method_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
17
- method_option :job, aliases: '-j', desc: 'Job configuration file'
18
-
19
- def job
20
- runner_options = {
21
- extractor: {
22
- name: options[:extractor],
23
- options: options[:'extractor-opts']
24
- },
25
- transformer: {
26
- name: options[:transformer],
27
- options: options[:'transformer-opts']
28
- },
29
- loader: {
30
- name: options[:loader],
31
- options: options[:'loader-opts']
32
- }
33
- }
34
-
35
- runner = Runner.new(runner_options)
36
- runner.run!
37
- end
38
-
39
- # FIXME: namespace this differently
40
- desc 'list', 'List all ETL classes'
41
- def list
42
- klasses = Chronicle::Etl::Catalog.available_classes
43
- klasses = klasses.sort_by do |a|
44
- [a[:built_in].to_s, a[:provider], a[:phase]]
45
- end
46
-
47
- headers = klasses.first.keys.map do |key|
48
- key.to_s.capitalize.light_white
49
- end
50
-
51
- table = TTY::Table.new(headers, klasses.map(&:values))
52
- puts table.render(padding: [0, 2])
53
- end
54
- end
55
- end
56
- end
@@ -1,11 +0,0 @@
1
- require 'json'
2
-
3
- module Chronicle
4
- module Etl
5
- class JsonTransformer < Chronicle::Etl::Transformer
6
- def transform data
7
- return JSON.parse(data)
8
- end
9
- end
10
- end
11
- end