chronicle-etl 0.4.0 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/.rubocop.yml +3 -0
  4. data/README.md +156 -81
  5. data/chronicle-etl.gemspec +3 -0
  6. data/lib/chronicle/etl/cli/cli_base.rb +31 -0
  7. data/lib/chronicle/etl/cli/connectors.rb +4 -11
  8. data/lib/chronicle/etl/cli/jobs.rb +49 -22
  9. data/lib/chronicle/etl/cli/main.rb +32 -1
  10. data/lib/chronicle/etl/cli/plugins.rb +62 -0
  11. data/lib/chronicle/etl/cli/subcommand_base.rb +1 -1
  12. data/lib/chronicle/etl/cli.rb +3 -0
  13. data/lib/chronicle/etl/config.rb +7 -4
  14. data/lib/chronicle/etl/configurable.rb +15 -2
  15. data/lib/chronicle/etl/exceptions.rb +29 -2
  16. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -17
  17. data/lib/chronicle/etl/extractors/extractor.rb +5 -5
  18. data/lib/chronicle/etl/extractors/file_extractor.rb +33 -13
  19. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  20. data/lib/chronicle/etl/extractors/json_extractor.rb +21 -12
  21. data/lib/chronicle/etl/job.rb +7 -1
  22. data/lib/chronicle/etl/job_definition.rb +32 -6
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +35 -8
  24. data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
  25. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  26. data/lib/chronicle/etl/loaders/loader.rb +24 -1
  27. data/lib/chronicle/etl/loaders/table_loader.rb +13 -26
  28. data/lib/chronicle/etl/logger.rb +6 -2
  29. data/lib/chronicle/etl/models/base.rb +3 -0
  30. data/lib/chronicle/etl/models/entity.rb +8 -2
  31. data/lib/chronicle/etl/models/raw.rb +26 -0
  32. data/lib/chronicle/etl/registry/connector_registration.rb +5 -0
  33. data/lib/chronicle/etl/registry/plugin_registry.rb +75 -0
  34. data/lib/chronicle/etl/registry/registry.rb +27 -14
  35. data/lib/chronicle/etl/runner.rb +35 -17
  36. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  37. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  38. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  39. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  40. data/lib/chronicle/etl/version.rb +1 -1
  41. data/lib/chronicle/etl.rb +11 -4
  42. metadata +53 -6
  43. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  44. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  45. data/lib/chronicle/etl/models/generic.rb +0 -23
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-02-25 00:00:00.000000000 Z
11
+ date: 2022-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -150,6 +150,20 @@ dependencies:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
152
  version: '1.2'
153
+ - !ruby/object:Gem::Dependency
154
+ name: thor-hollaback
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '0.2'
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '0.2'
153
167
  - !ruby/object:Gem::Dependency
154
168
  name: tty-progressbar
155
169
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +178,20 @@ dependencies:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
180
  version: '0.17'
181
+ - !ruby/object:Gem::Dependency
182
+ name: tty-spinner
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
167
195
  - !ruby/object:Gem::Dependency
168
196
  name: tty-table
169
197
  requirement: !ruby/object:Gem::Requirement
@@ -178,6 +206,20 @@ dependencies:
178
206
  - - "~>"
179
207
  - !ruby/object:Gem::Version
180
208
  version: '0.11'
209
+ - !ruby/object:Gem::Dependency
210
+ name: tty-prompt
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - "~>"
214
+ - !ruby/object:Gem::Version
215
+ version: '0.23'
216
+ type: :runtime
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: '0.23'
181
223
  - !ruby/object:Gem::Dependency
182
224
  name: bundler
183
225
  requirement: !ruby/object:Gem::Requirement
@@ -317,9 +359,11 @@ files:
317
359
  - exe/chronicle-etl
318
360
  - lib/chronicle/etl.rb
319
361
  - lib/chronicle/etl/cli.rb
362
+ - lib/chronicle/etl/cli/cli_base.rb
320
363
  - lib/chronicle/etl/cli/connectors.rb
321
364
  - lib/chronicle/etl/cli/jobs.rb
322
365
  - lib/chronicle/etl/cli/main.rb
366
+ - lib/chronicle/etl/cli/plugins.rb
323
367
  - lib/chronicle/etl/cli/subcommand_base.rb
324
368
  - lib/chronicle/etl/config.rb
325
369
  - lib/chronicle/etl/configurable.rb
@@ -328,7 +372,7 @@ files:
328
372
  - lib/chronicle/etl/extractors/csv_extractor.rb
329
373
  - lib/chronicle/etl/extractors/extractor.rb
330
374
  - lib/chronicle/etl/extractors/file_extractor.rb
331
- - lib/chronicle/etl/extractors/helpers/filesystem_reader.rb
375
+ - lib/chronicle/etl/extractors/helpers/input_reader.rb
332
376
  - lib/chronicle/etl/extractors/json_extractor.rb
333
377
  - lib/chronicle/etl/extractors/stdin_extractor.rb
334
378
  - lib/chronicle/etl/job.rb
@@ -336,21 +380,24 @@ files:
336
380
  - lib/chronicle/etl/job_log.rb
337
381
  - lib/chronicle/etl/job_logger.rb
338
382
  - lib/chronicle/etl/loaders/csv_loader.rb
383
+ - lib/chronicle/etl/loaders/helpers/encoding_helper.rb
384
+ - lib/chronicle/etl/loaders/json_loader.rb
339
385
  - lib/chronicle/etl/loaders/loader.rb
340
386
  - lib/chronicle/etl/loaders/rest_loader.rb
341
- - lib/chronicle/etl/loaders/stdout_loader.rb
342
387
  - lib/chronicle/etl/loaders/table_loader.rb
343
388
  - lib/chronicle/etl/logger.rb
344
389
  - lib/chronicle/etl/models/activity.rb
345
390
  - lib/chronicle/etl/models/attachment.rb
346
391
  - lib/chronicle/etl/models/base.rb
347
392
  - lib/chronicle/etl/models/entity.rb
348
- - lib/chronicle/etl/models/generic.rb
393
+ - lib/chronicle/etl/models/raw.rb
349
394
  - lib/chronicle/etl/registry/connector_registration.rb
395
+ - lib/chronicle/etl/registry/plugin_registry.rb
350
396
  - lib/chronicle/etl/registry/registry.rb
351
397
  - lib/chronicle/etl/registry/self_registering.rb
352
398
  - lib/chronicle/etl/runner.rb
353
399
  - lib/chronicle/etl/serializers/jsonapi_serializer.rb
400
+ - lib/chronicle/etl/serializers/raw_serializer.rb
354
401
  - lib/chronicle/etl/serializers/serializer.rb
355
402
  - lib/chronicle/etl/transformers/image_file_transformer.rb
356
403
  - lib/chronicle/etl/transformers/null_transformer.rb
@@ -383,7 +430,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
383
430
  - !ruby/object:Gem::Version
384
431
  version: '0'
385
432
  requirements: []
386
- rubygems_version: 3.1.6
433
+ rubygems_version: 3.3.9
387
434
  signing_key:
388
435
  specification_version: 4
389
436
  summary: ETL tool for personal data
@@ -1,104 +0,0 @@
1
- require 'pathname'
2
-
3
- module Chronicle
4
- module ETL
5
- module Extractors
6
- module Helpers
7
- module FilesystemReader
8
-
9
- def filenames_in_directory(...)
10
- filenames = gather_files(...)
11
- if block_given?
12
- filenames.each do |filename|
13
- yield filename
14
- end
15
- else
16
- filenames
17
- end
18
- end
19
-
20
- def read_from_filesystem(filename:, yield_each_line: true, dir_glob_pattern: '**/*')
21
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
22
- if yield_each_line
23
- file.each_line do |line|
24
- yield line
25
- end
26
- else
27
- yield file.read
28
- end
29
- end
30
- end
31
-
32
- def open_from_filesystem(filename:, dir_glob_pattern: '**/*')
33
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
34
- yield file
35
- end
36
- end
37
-
38
- def results_count
39
- raise NotImplementedError
40
- # if file?
41
- # return 1
42
- # else
43
- # search_pattern = File.join(@options[:filename], '**/*')
44
- # Dir.glob(search_pattern).count
45
- # end
46
- end
47
-
48
- private
49
-
50
- def gather_files(path:, dir_glob_pattern: '**/*', load_since: nil, load_until: nil, smaller_than: nil, larger_than: nil, sort: :mtime)
51
- search_pattern = File.join(path, '**', dir_glob_pattern)
52
- files = Dir.glob(search_pattern)
53
-
54
- files = files.keep_if {|f| (File.mtime(f) > load_since)} if load_since
55
- files = files.keep_if {|f| (File.mtime(f) < load_until)} if load_until
56
-
57
- # pass in file sizes in bytes
58
- files = files.keep_if {|f| (File.size(f) < smaller_than)} if smaller_than
59
- files = files.keep_if {|f| (File.size(f) > larger_than)} if larger_than
60
-
61
- # TODO: incorporate sort argument
62
- files.sort_by{ |f| File.mtime(f) }
63
- end
64
-
65
- def select_files_in_directory(path:, dir_glob_pattern: '**/*')
66
- raise IOError.new("#{path} is not a directory.") unless directory?(path)
67
-
68
- search_pattern = File.join(path, dir_glob_pattern)
69
- Dir.glob(search_pattern).each do |filename|
70
- yield(filename)
71
- end
72
- end
73
-
74
- def open_files(filename:, dir_glob_pattern:)
75
- if stdin?(filename)
76
- yield $stdin
77
- elsif directory?(filename)
78
- search_pattern = File.join(filename, dir_glob_pattern)
79
- filenames = Dir.glob(search_pattern)
80
- filenames.each do |filename|
81
- file = File.open(filename)
82
- yield(file)
83
- end
84
- elsif file?(filename)
85
- yield File.open(filename)
86
- end
87
- end
88
-
89
- def stdin?(filename)
90
- filename == $stdin
91
- end
92
-
93
- def directory?(filename)
94
- Pathname.new(filename).directory?
95
- end
96
-
97
- def file?(filename)
98
- Pathname.new(filename).file?
99
- end
100
- end
101
- end
102
- end
103
- end
104
- end
@@ -1,14 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- class StdoutLoader < Chronicle::ETL::Loader
4
- register_connector do |r|
5
- r.description = 'stdout'
6
- end
7
-
8
- def load(record)
9
- serializer = Chronicle::ETL::JSONAPISerializer.new(record)
10
- puts serializer.serializable_hash.to_json
11
- end
12
- end
13
- end
14
- end
@@ -1,23 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Generic < Chronicle::ETL::Models::Base
7
- TYPE = 'generic'
8
-
9
- attr_accessor :properties
10
-
11
- def initialize(properties = {})
12
- @properties = properties
13
- super
14
- end
15
-
16
- # Generic models have arbitrary attributes stored in @properties
17
- def attributes
18
- @properties.transform_keys(&:to_sym)
19
- end
20
- end
21
- end
22
- end
23
- end