chronicle-etl 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/.rubocop.yml +28 -1
  4. data/Guardfile +7 -0
  5. data/README.md +149 -85
  6. data/Rakefile +4 -2
  7. data/chronicle-etl.gemspec +10 -5
  8. data/exe/chronicle-etl +1 -1
  9. data/lib/chronicle/etl/cli/connectors.rb +34 -0
  10. data/lib/chronicle/etl/cli/jobs.rb +44 -12
  11. data/lib/chronicle/etl/cli/main.rb +13 -19
  12. data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
  13. data/lib/chronicle/etl/cli.rb +7 -0
  14. data/lib/chronicle/etl/configurable.rb +158 -0
  15. data/lib/chronicle/etl/exceptions.rb +7 -1
  16. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
  17. data/lib/chronicle/etl/extractors/extractor.rb +23 -19
  18. data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
  19. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  20. data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
  21. data/lib/chronicle/etl/job.rb +1 -1
  22. data/lib/chronicle/etl/job_definition.rb +1 -1
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +1 -1
  24. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  25. data/lib/chronicle/etl/loaders/loader.rb +5 -2
  26. data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
  27. data/lib/chronicle/etl/loaders/table_loader.rb +21 -24
  28. data/lib/chronicle/etl/logger.rb +1 -0
  29. data/lib/chronicle/etl/models/base.rb +3 -0
  30. data/lib/chronicle/etl/models/entity.rb +8 -2
  31. data/lib/chronicle/etl/models/raw.rb +26 -0
  32. data/lib/chronicle/etl/registry/connector_registration.rb +1 -0
  33. data/lib/chronicle/etl/runner.rb +6 -4
  34. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  35. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  36. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  37. data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
  38. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  39. data/lib/chronicle/etl/transformers/transformer.rb +3 -2
  40. data/lib/chronicle/etl/version.rb +1 -1
  41. data/lib/chronicle/etl.rb +12 -4
  42. metadata +80 -19
  43. data/.ruby-version +0 -1
  44. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  45. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  46. data/lib/chronicle/etl/models/generic.rb +0 -23
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-02-07 00:00:00.000000000 Z
11
+ date: 2022-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '7.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '7.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: chronic_duration
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -98,16 +98,16 @@ dependencies:
98
98
  name: runcom
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - "~>"
101
+ - - ">="
102
102
  - !ruby/object:Gem::Version
103
- version: '6.2'
103
+ version: '6.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - "~>"
108
+ - - ">="
109
109
  - !ruby/object:Gem::Version
110
- version: '6.2'
110
+ version: '6.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: sequel
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -142,14 +142,14 @@ dependencies:
142
142
  requirements:
143
143
  - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: '0.20'
145
+ version: '1.2'
146
146
  type: :runtime
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: '0.20'
152
+ version: '1.2'
153
153
  - !ruby/object:Gem::Dependency
154
154
  name: tty-progressbar
155
155
  requirement: !ruby/object:Gem::Requirement
@@ -234,6 +234,62 @@ dependencies:
234
234
  - - "~>"
235
235
  - !ruby/object:Gem::Version
236
236
  version: '3.9'
237
+ - !ruby/object:Gem::Dependency
238
+ name: simplecov
239
+ requirement: !ruby/object:Gem::Requirement
240
+ requirements:
241
+ - - "~>"
242
+ - !ruby/object:Gem::Version
243
+ version: '0.21'
244
+ type: :development
245
+ prerelease: false
246
+ version_requirements: !ruby/object:Gem::Requirement
247
+ requirements:
248
+ - - "~>"
249
+ - !ruby/object:Gem::Version
250
+ version: '0.21'
251
+ - !ruby/object:Gem::Dependency
252
+ name: guard-rspec
253
+ requirement: !ruby/object:Gem::Requirement
254
+ requirements:
255
+ - - "~>"
256
+ - !ruby/object:Gem::Version
257
+ version: 4.7.3
258
+ type: :development
259
+ prerelease: false
260
+ version_requirements: !ruby/object:Gem::Requirement
261
+ requirements:
262
+ - - "~>"
263
+ - !ruby/object:Gem::Version
264
+ version: 4.7.3
265
+ - !ruby/object:Gem::Dependency
266
+ name: yard
267
+ requirement: !ruby/object:Gem::Requirement
268
+ requirements:
269
+ - - "~>"
270
+ - !ruby/object:Gem::Version
271
+ version: 0.9.7
272
+ type: :development
273
+ prerelease: false
274
+ version_requirements: !ruby/object:Gem::Requirement
275
+ requirements:
276
+ - - "~>"
277
+ - !ruby/object:Gem::Version
278
+ version: 0.9.7
279
+ - !ruby/object:Gem::Dependency
280
+ name: rubocop
281
+ requirement: !ruby/object:Gem::Requirement
282
+ requirements:
283
+ - - "~>"
284
+ - !ruby/object:Gem::Version
285
+ version: 1.25.1
286
+ type: :development
287
+ prerelease: false
288
+ version_requirements: !ruby/object:Gem::Requirement
289
+ requirements:
290
+ - - "~>"
291
+ - !ruby/object:Gem::Version
292
+ version: 1.25.1
237
293
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
238
294
  transformer it, and load it.
239
295
  email:
@@ -243,14 +299,15 @@ executables:
243
299
  extensions: []
244
300
  extra_rdoc_files: []
245
301
  files:
302
+ - ".github/workflows/ruby.yml"
246
303
  - ".gitignore"
247
304
  - ".rspec"
248
305
  - ".rubocop.yml"
249
- - ".ruby-version"
250
306
  - ".travis.yml"
251
307
  - ".yardopts"
252
308
  - CODE_OF_CONDUCT.md
253
309
  - Gemfile
310
+ - Guardfile
254
311
  - LICENSE.txt
255
312
  - README.md
256
313
  - Rakefile
@@ -259,17 +316,19 @@ files:
259
316
  - chronicle-etl.gemspec
260
317
  - exe/chronicle-etl
261
318
  - lib/chronicle/etl.rb
319
+ - lib/chronicle/etl/cli.rb
262
320
  - lib/chronicle/etl/cli/connectors.rb
263
321
  - lib/chronicle/etl/cli/jobs.rb
264
322
  - lib/chronicle/etl/cli/main.rb
265
323
  - lib/chronicle/etl/cli/subcommand_base.rb
266
324
  - lib/chronicle/etl/config.rb
325
+ - lib/chronicle/etl/configurable.rb
267
326
  - lib/chronicle/etl/exceptions.rb
268
327
  - lib/chronicle/etl/extraction.rb
269
328
  - lib/chronicle/etl/extractors/csv_extractor.rb
270
329
  - lib/chronicle/etl/extractors/extractor.rb
271
330
  - lib/chronicle/etl/extractors/file_extractor.rb
272
- - lib/chronicle/etl/extractors/helpers/filesystem_reader.rb
331
+ - lib/chronicle/etl/extractors/helpers/input_reader.rb
273
332
  - lib/chronicle/etl/extractors/json_extractor.rb
274
333
  - lib/chronicle/etl/extractors/stdin_extractor.rb
275
334
  - lib/chronicle/etl/job.rb
@@ -277,21 +336,22 @@ files:
277
336
  - lib/chronicle/etl/job_log.rb
278
337
  - lib/chronicle/etl/job_logger.rb
279
338
  - lib/chronicle/etl/loaders/csv_loader.rb
339
+ - lib/chronicle/etl/loaders/json_loader.rb
280
340
  - lib/chronicle/etl/loaders/loader.rb
281
341
  - lib/chronicle/etl/loaders/rest_loader.rb
282
- - lib/chronicle/etl/loaders/stdout_loader.rb
283
342
  - lib/chronicle/etl/loaders/table_loader.rb
284
343
  - lib/chronicle/etl/logger.rb
285
344
  - lib/chronicle/etl/models/activity.rb
286
345
  - lib/chronicle/etl/models/attachment.rb
287
346
  - lib/chronicle/etl/models/base.rb
288
347
  - lib/chronicle/etl/models/entity.rb
289
- - lib/chronicle/etl/models/generic.rb
348
+ - lib/chronicle/etl/models/raw.rb
290
349
  - lib/chronicle/etl/registry/connector_registration.rb
291
350
  - lib/chronicle/etl/registry/registry.rb
292
351
  - lib/chronicle/etl/registry/self_registering.rb
293
352
  - lib/chronicle/etl/runner.rb
294
353
  - lib/chronicle/etl/serializers/jsonapi_serializer.rb
354
+ - lib/chronicle/etl/serializers/raw_serializer.rb
295
355
  - lib/chronicle/etl/serializers/serializer.rb
296
356
  - lib/chronicle/etl/transformers/image_file_transformer.rb
297
357
  - lib/chronicle/etl/transformers/null_transformer.rb
@@ -305,9 +365,10 @@ homepage: https://github.com/chronicle-app
305
365
  licenses:
306
366
  - MIT
307
367
  metadata:
368
+ allowed_push_host: https://rubygems.org
308
369
  homepage_uri: https://github.com/chronicle-app
309
370
  source_code_uri: https://github.com/chronicle-app/chronicle-etl
310
- changelog_uri: https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md
371
+ changelog_uri: https://github.com/chronicle-app/chronicle-etl/releases
311
372
  post_install_message:
312
373
  rdoc_options: []
313
374
  require_paths:
@@ -316,14 +377,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
316
377
  requirements:
317
378
  - - ">="
318
379
  - !ruby/object:Gem::Version
319
- version: '0'
380
+ version: '2.7'
320
381
  required_rubygems_version: !ruby/object:Gem::Requirement
321
382
  requirements:
322
383
  - - ">="
323
384
  - !ruby/object:Gem::Version
324
385
  version: '0'
325
386
  requirements: []
326
- rubygems_version: 3.1.2
387
+ rubygems_version: 3.1.6
327
388
  signing_key:
328
389
  specification_version: 4
329
390
  summary: ETL tool for personal data
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- 2.7.1
@@ -1,104 +0,0 @@
1
- require 'pathname'
2
-
3
- module Chronicle
4
- module ETL
5
- module Extractors
6
- module Helpers
7
- module FilesystemReader
8
-
9
- def filenames_in_directory(...)
10
- filenames = gather_files(...)
11
- if block_given?
12
- filenames.each do |filename|
13
- yield filename
14
- end
15
- else
16
- filenames
17
- end
18
- end
19
-
20
- def read_from_filesystem(filename:, yield_each_line: true, dir_glob_pattern: '**/*')
21
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
22
- if yield_each_line
23
- file.each_line do |line|
24
- yield line
25
- end
26
- else
27
- yield file.read
28
- end
29
- end
30
- end
31
-
32
- def open_from_filesystem(filename:, dir_glob_pattern: '**/*')
33
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
34
- yield file
35
- end
36
- end
37
-
38
- def results_count
39
- raise NotImplementedError
40
- # if file?
41
- # return 1
42
- # else
43
- # search_pattern = File.join(@options[:filename], '**/*')
44
- # Dir.glob(search_pattern).count
45
- # end
46
- end
47
-
48
- private
49
-
50
- def gather_files(path:, dir_glob_pattern: '**/*', load_since: nil, load_until: nil, smaller_than: nil, larger_than: nil, sort: :mtime)
51
- search_pattern = File.join(path, '**', dir_glob_pattern)
52
- files = Dir.glob(search_pattern)
53
-
54
- files = files.keep_if {|f| (File.mtime(f) > load_since)} if load_since
55
- files = files.keep_if {|f| (File.mtime(f) < load_until)} if load_until
56
-
57
- # pass in file sizes in bytes
58
- files = files.keep_if {|f| (File.size(f) < smaller_than)} if smaller_than
59
- files = files.keep_if {|f| (File.size(f) > larger_than)} if larger_than
60
-
61
- # TODO: incorporate sort argument
62
- files.sort_by{ |f| File.mtime(f) }
63
- end
64
-
65
- def select_files_in_directory(path:, dir_glob_pattern: '**/*')
66
- raise IOError.new("#{path} is not a directory.") unless directory?(path)
67
-
68
- search_pattern = File.join(path, dir_glob_pattern)
69
- Dir.glob(search_pattern).each do |filename|
70
- yield(filename)
71
- end
72
- end
73
-
74
- def open_files(filename:, dir_glob_pattern:)
75
- if stdin?(filename)
76
- yield $stdin
77
- elsif directory?(filename)
78
- search_pattern = File.join(filename, dir_glob_pattern)
79
- filenames = Dir.glob(search_pattern)
80
- filenames.each do |filename|
81
- file = File.open(filename)
82
- yield(file)
83
- end
84
- elsif file?(filename)
85
- yield File.open(filename)
86
- end
87
- end
88
-
89
- def stdin?(filename)
90
- filename == $stdin
91
- end
92
-
93
- def directory?(filename)
94
- Pathname.new(filename).directory?
95
- end
96
-
97
- def file?(filename)
98
- Pathname.new(filename).file?
99
- end
100
- end
101
- end
102
- end
103
- end
104
- end
@@ -1,14 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- class StdoutLoader < Chronicle::ETL::Loader
4
- register_connector do |r|
5
- r.description = 'stdout'
6
- end
7
-
8
- def load(record)
9
- serializer = Chronicle::ETL::JSONAPISerializer.new(record)
10
- puts serializer.serializable_hash.to_json
11
- end
12
- end
13
- end
14
- end
@@ -1,23 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Generic < Chronicle::ETL::Models::Base
7
- TYPE = 'generic'
8
-
9
- attr_accessor :properties
10
-
11
- def initialize(properties = {})
12
- @properties = properties
13
- super
14
- end
15
-
16
- # Generic models have arbitrary attributes stored in @properties
17
- def attributes
18
- @properties.transform_keys(&:to_sym)
19
- end
20
- end
21
- end
22
- end
23
- end