chronicle-etl 0.4.0 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/.rubocop.yml +3 -0
  4. data/README.md +156 -81
  5. data/chronicle-etl.gemspec +3 -0
  6. data/lib/chronicle/etl/cli/cli_base.rb +31 -0
  7. data/lib/chronicle/etl/cli/connectors.rb +4 -11
  8. data/lib/chronicle/etl/cli/jobs.rb +49 -22
  9. data/lib/chronicle/etl/cli/main.rb +32 -1
  10. data/lib/chronicle/etl/cli/plugins.rb +62 -0
  11. data/lib/chronicle/etl/cli/subcommand_base.rb +1 -1
  12. data/lib/chronicle/etl/cli.rb +3 -0
  13. data/lib/chronicle/etl/config.rb +7 -4
  14. data/lib/chronicle/etl/configurable.rb +15 -2
  15. data/lib/chronicle/etl/exceptions.rb +29 -2
  16. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -17
  17. data/lib/chronicle/etl/extractors/extractor.rb +5 -5
  18. data/lib/chronicle/etl/extractors/file_extractor.rb +33 -13
  19. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  20. data/lib/chronicle/etl/extractors/json_extractor.rb +21 -12
  21. data/lib/chronicle/etl/job.rb +7 -1
  22. data/lib/chronicle/etl/job_definition.rb +32 -6
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +35 -8
  24. data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
  25. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  26. data/lib/chronicle/etl/loaders/loader.rb +24 -1
  27. data/lib/chronicle/etl/loaders/table_loader.rb +13 -26
  28. data/lib/chronicle/etl/logger.rb +6 -2
  29. data/lib/chronicle/etl/models/base.rb +3 -0
  30. data/lib/chronicle/etl/models/entity.rb +8 -2
  31. data/lib/chronicle/etl/models/raw.rb +26 -0
  32. data/lib/chronicle/etl/registry/connector_registration.rb +5 -0
  33. data/lib/chronicle/etl/registry/plugin_registry.rb +75 -0
  34. data/lib/chronicle/etl/registry/registry.rb +27 -14
  35. data/lib/chronicle/etl/runner.rb +35 -17
  36. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  37. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  38. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  39. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  40. data/lib/chronicle/etl/version.rb +1 -1
  41. data/lib/chronicle/etl.rb +11 -4
  42. metadata +53 -6
  43. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  44. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  45. data/lib/chronicle/etl/models/generic.rb +0 -23
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-02-25 00:00:00.000000000 Z
11
+ date: 2022-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -150,6 +150,20 @@ dependencies:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
152
  version: '1.2'
153
+ - !ruby/object:Gem::Dependency
154
+ name: thor-hollaback
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '0.2'
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '0.2'
153
167
  - !ruby/object:Gem::Dependency
154
168
  name: tty-progressbar
155
169
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +178,20 @@ dependencies:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
180
  version: '0.17'
181
+ - !ruby/object:Gem::Dependency
182
+ name: tty-spinner
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
167
195
  - !ruby/object:Gem::Dependency
168
196
  name: tty-table
169
197
  requirement: !ruby/object:Gem::Requirement
@@ -178,6 +206,20 @@ dependencies:
178
206
  - - "~>"
179
207
  - !ruby/object:Gem::Version
180
208
  version: '0.11'
209
+ - !ruby/object:Gem::Dependency
210
+ name: tty-prompt
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - "~>"
214
+ - !ruby/object:Gem::Version
215
+ version: '0.23'
216
+ type: :runtime
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: '0.23'
181
223
  - !ruby/object:Gem::Dependency
182
224
  name: bundler
183
225
  requirement: !ruby/object:Gem::Requirement
@@ -317,9 +359,11 @@ files:
317
359
  - exe/chronicle-etl
318
360
  - lib/chronicle/etl.rb
319
361
  - lib/chronicle/etl/cli.rb
362
+ - lib/chronicle/etl/cli/cli_base.rb
320
363
  - lib/chronicle/etl/cli/connectors.rb
321
364
  - lib/chronicle/etl/cli/jobs.rb
322
365
  - lib/chronicle/etl/cli/main.rb
366
+ - lib/chronicle/etl/cli/plugins.rb
323
367
  - lib/chronicle/etl/cli/subcommand_base.rb
324
368
  - lib/chronicle/etl/config.rb
325
369
  - lib/chronicle/etl/configurable.rb
@@ -328,7 +372,7 @@ files:
328
372
  - lib/chronicle/etl/extractors/csv_extractor.rb
329
373
  - lib/chronicle/etl/extractors/extractor.rb
330
374
  - lib/chronicle/etl/extractors/file_extractor.rb
331
- - lib/chronicle/etl/extractors/helpers/filesystem_reader.rb
375
+ - lib/chronicle/etl/extractors/helpers/input_reader.rb
332
376
  - lib/chronicle/etl/extractors/json_extractor.rb
333
377
  - lib/chronicle/etl/extractors/stdin_extractor.rb
334
378
  - lib/chronicle/etl/job.rb
@@ -336,21 +380,24 @@ files:
336
380
  - lib/chronicle/etl/job_log.rb
337
381
  - lib/chronicle/etl/job_logger.rb
338
382
  - lib/chronicle/etl/loaders/csv_loader.rb
383
+ - lib/chronicle/etl/loaders/helpers/encoding_helper.rb
384
+ - lib/chronicle/etl/loaders/json_loader.rb
339
385
  - lib/chronicle/etl/loaders/loader.rb
340
386
  - lib/chronicle/etl/loaders/rest_loader.rb
341
- - lib/chronicle/etl/loaders/stdout_loader.rb
342
387
  - lib/chronicle/etl/loaders/table_loader.rb
343
388
  - lib/chronicle/etl/logger.rb
344
389
  - lib/chronicle/etl/models/activity.rb
345
390
  - lib/chronicle/etl/models/attachment.rb
346
391
  - lib/chronicle/etl/models/base.rb
347
392
  - lib/chronicle/etl/models/entity.rb
348
- - lib/chronicle/etl/models/generic.rb
393
+ - lib/chronicle/etl/models/raw.rb
349
394
  - lib/chronicle/etl/registry/connector_registration.rb
395
+ - lib/chronicle/etl/registry/plugin_registry.rb
350
396
  - lib/chronicle/etl/registry/registry.rb
351
397
  - lib/chronicle/etl/registry/self_registering.rb
352
398
  - lib/chronicle/etl/runner.rb
353
399
  - lib/chronicle/etl/serializers/jsonapi_serializer.rb
400
+ - lib/chronicle/etl/serializers/raw_serializer.rb
354
401
  - lib/chronicle/etl/serializers/serializer.rb
355
402
  - lib/chronicle/etl/transformers/image_file_transformer.rb
356
403
  - lib/chronicle/etl/transformers/null_transformer.rb
@@ -383,7 +430,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
383
430
  - !ruby/object:Gem::Version
384
431
  version: '0'
385
432
  requirements: []
386
- rubygems_version: 3.1.6
433
+ rubygems_version: 3.3.9
387
434
  signing_key:
388
435
  specification_version: 4
389
436
  summary: ETL tool for personal data
@@ -1,104 +0,0 @@
1
- require 'pathname'
2
-
3
- module Chronicle
4
- module ETL
5
- module Extractors
6
- module Helpers
7
- module FilesystemReader
8
-
9
- def filenames_in_directory(...)
10
- filenames = gather_files(...)
11
- if block_given?
12
- filenames.each do |filename|
13
- yield filename
14
- end
15
- else
16
- filenames
17
- end
18
- end
19
-
20
- def read_from_filesystem(filename:, yield_each_line: true, dir_glob_pattern: '**/*')
21
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
22
- if yield_each_line
23
- file.each_line do |line|
24
- yield line
25
- end
26
- else
27
- yield file.read
28
- end
29
- end
30
- end
31
-
32
- def open_from_filesystem(filename:, dir_glob_pattern: '**/*')
33
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
34
- yield file
35
- end
36
- end
37
-
38
- def results_count
39
- raise NotImplementedError
40
- # if file?
41
- # return 1
42
- # else
43
- # search_pattern = File.join(@options[:filename], '**/*')
44
- # Dir.glob(search_pattern).count
45
- # end
46
- end
47
-
48
- private
49
-
50
- def gather_files(path:, dir_glob_pattern: '**/*', load_since: nil, load_until: nil, smaller_than: nil, larger_than: nil, sort: :mtime)
51
- search_pattern = File.join(path, '**', dir_glob_pattern)
52
- files = Dir.glob(search_pattern)
53
-
54
- files = files.keep_if {|f| (File.mtime(f) > load_since)} if load_since
55
- files = files.keep_if {|f| (File.mtime(f) < load_until)} if load_until
56
-
57
- # pass in file sizes in bytes
58
- files = files.keep_if {|f| (File.size(f) < smaller_than)} if smaller_than
59
- files = files.keep_if {|f| (File.size(f) > larger_than)} if larger_than
60
-
61
- # TODO: incorporate sort argument
62
- files.sort_by{ |f| File.mtime(f) }
63
- end
64
-
65
- def select_files_in_directory(path:, dir_glob_pattern: '**/*')
66
- raise IOError.new("#{path} is not a directory.") unless directory?(path)
67
-
68
- search_pattern = File.join(path, dir_glob_pattern)
69
- Dir.glob(search_pattern).each do |filename|
70
- yield(filename)
71
- end
72
- end
73
-
74
- def open_files(filename:, dir_glob_pattern:)
75
- if stdin?(filename)
76
- yield $stdin
77
- elsif directory?(filename)
78
- search_pattern = File.join(filename, dir_glob_pattern)
79
- filenames = Dir.glob(search_pattern)
80
- filenames.each do |filename|
81
- file = File.open(filename)
82
- yield(file)
83
- end
84
- elsif file?(filename)
85
- yield File.open(filename)
86
- end
87
- end
88
-
89
- def stdin?(filename)
90
- filename == $stdin
91
- end
92
-
93
- def directory?(filename)
94
- Pathname.new(filename).directory?
95
- end
96
-
97
- def file?(filename)
98
- Pathname.new(filename).file?
99
- end
100
- end
101
- end
102
- end
103
- end
104
- end
@@ -1,14 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- class StdoutLoader < Chronicle::ETL::Loader
4
- register_connector do |r|
5
- r.description = 'stdout'
6
- end
7
-
8
- def load(record)
9
- serializer = Chronicle::ETL::JSONAPISerializer.new(record)
10
- puts serializer.serializable_hash.to_json
11
- end
12
- end
13
- end
14
- end
@@ -1,23 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Generic < Chronicle::ETL::Models::Base
7
- TYPE = 'generic'
8
-
9
- attr_accessor :properties
10
-
11
- def initialize(properties = {})
12
- @properties = properties
13
- super
14
- end
15
-
16
- # Generic models have arbitrary attributes stored in @properties
17
- def attributes
18
- @properties.transform_keys(&:to_sym)
19
- end
20
- end
21
- end
22
- end
23
- end