chronicle-etl 0.3.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/.rubocop.yml +28 -1
  4. data/Guardfile +7 -0
  5. data/README.md +149 -85
  6. data/Rakefile +4 -2
  7. data/chronicle-etl.gemspec +10 -5
  8. data/exe/chronicle-etl +1 -1
  9. data/lib/chronicle/etl/cli/connectors.rb +34 -0
  10. data/lib/chronicle/etl/cli/jobs.rb +44 -12
  11. data/lib/chronicle/etl/cli/main.rb +13 -19
  12. data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
  13. data/lib/chronicle/etl/cli.rb +7 -0
  14. data/lib/chronicle/etl/configurable.rb +158 -0
  15. data/lib/chronicle/etl/exceptions.rb +7 -1
  16. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
  17. data/lib/chronicle/etl/extractors/extractor.rb +23 -19
  18. data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
  19. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  20. data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
  21. data/lib/chronicle/etl/job.rb +1 -1
  22. data/lib/chronicle/etl/job_definition.rb +1 -1
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +1 -1
  24. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  25. data/lib/chronicle/etl/loaders/loader.rb +5 -2
  26. data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
  27. data/lib/chronicle/etl/loaders/table_loader.rb +21 -24
  28. data/lib/chronicle/etl/logger.rb +1 -0
  29. data/lib/chronicle/etl/models/base.rb +3 -0
  30. data/lib/chronicle/etl/models/entity.rb +8 -2
  31. data/lib/chronicle/etl/models/raw.rb +26 -0
  32. data/lib/chronicle/etl/registry/connector_registration.rb +1 -0
  33. data/lib/chronicle/etl/runner.rb +6 -4
  34. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  35. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  36. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  37. data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
  38. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  39. data/lib/chronicle/etl/transformers/transformer.rb +3 -2
  40. data/lib/chronicle/etl/version.rb +1 -1
  41. data/lib/chronicle/etl.rb +12 -4
  42. metadata +80 -19
  43. data/.ruby-version +0 -1
  44. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  45. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  46. data/lib/chronicle/etl/models/generic.rb +0 -23
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-02-07 00:00:00.000000000 Z
11
+ date: 2022-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '7.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '7.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: chronic_duration
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -98,16 +98,16 @@ dependencies:
98
98
  name: runcom
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - "~>"
101
+ - - ">="
102
102
  - !ruby/object:Gem::Version
103
- version: '6.2'
103
+ version: '6.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - "~>"
108
+ - - ">="
109
109
  - !ruby/object:Gem::Version
110
- version: '6.2'
110
+ version: '6.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: sequel
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -142,14 +142,14 @@ dependencies:
142
142
  requirements:
143
143
  - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: '0.20'
145
+ version: '1.2'
146
146
  type: :runtime
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: '0.20'
152
+ version: '1.2'
153
153
  - !ruby/object:Gem::Dependency
154
154
  name: tty-progressbar
155
155
  requirement: !ruby/object:Gem::Requirement
@@ -234,6 +234,62 @@ dependencies:
234
234
  - - "~>"
235
235
  - !ruby/object:Gem::Version
236
236
  version: '3.9'
237
+ - !ruby/object:Gem::Dependency
238
+ name: simplecov
239
+ requirement: !ruby/object:Gem::Requirement
240
+ requirements:
241
+ - - "~>"
242
+ - !ruby/object:Gem::Version
243
+ version: '0.21'
244
+ type: :development
245
+ prerelease: false
246
+ version_requirements: !ruby/object:Gem::Requirement
247
+ requirements:
248
+ - - "~>"
249
+ - !ruby/object:Gem::Version
250
+ version: '0.21'
251
+ - !ruby/object:Gem::Dependency
252
+ name: guard-rspec
253
+ requirement: !ruby/object:Gem::Requirement
254
+ requirements:
255
+ - - "~>"
256
+ - !ruby/object:Gem::Version
257
+ version: 4.7.3
258
+ type: :development
259
+ prerelease: false
260
+ version_requirements: !ruby/object:Gem::Requirement
261
+ requirements:
262
+ - - "~>"
263
+ - !ruby/object:Gem::Version
264
+ version: 4.7.3
265
+ - !ruby/object:Gem::Dependency
266
+ name: yard
267
+ requirement: !ruby/object:Gem::Requirement
268
+ requirements:
269
+ - - "~>"
270
+ - !ruby/object:Gem::Version
271
+ version: 0.9.7
272
+ type: :development
273
+ prerelease: false
274
+ version_requirements: !ruby/object:Gem::Requirement
275
+ requirements:
276
+ - - "~>"
277
+ - !ruby/object:Gem::Version
278
+ version: 0.9.7
279
+ - !ruby/object:Gem::Dependency
280
+ name: rubocop
281
+ requirement: !ruby/object:Gem::Requirement
282
+ requirements:
283
+ - - "~>"
284
+ - !ruby/object:Gem::Version
285
+ version: 1.25.1
286
+ type: :development
287
+ prerelease: false
288
+ version_requirements: !ruby/object:Gem::Requirement
289
+ requirements:
290
+ - - "~>"
291
+ - !ruby/object:Gem::Version
292
+ version: 1.25.1
237
293
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
238
294
  transformer it, and load it.
239
295
  email:
@@ -243,14 +299,15 @@ executables:
243
299
  extensions: []
244
300
  extra_rdoc_files: []
245
301
  files:
302
+ - ".github/workflows/ruby.yml"
246
303
  - ".gitignore"
247
304
  - ".rspec"
248
305
  - ".rubocop.yml"
249
- - ".ruby-version"
250
306
  - ".travis.yml"
251
307
  - ".yardopts"
252
308
  - CODE_OF_CONDUCT.md
253
309
  - Gemfile
310
+ - Guardfile
254
311
  - LICENSE.txt
255
312
  - README.md
256
313
  - Rakefile
@@ -259,17 +316,19 @@ files:
259
316
  - chronicle-etl.gemspec
260
317
  - exe/chronicle-etl
261
318
  - lib/chronicle/etl.rb
319
+ - lib/chronicle/etl/cli.rb
262
320
  - lib/chronicle/etl/cli/connectors.rb
263
321
  - lib/chronicle/etl/cli/jobs.rb
264
322
  - lib/chronicle/etl/cli/main.rb
265
323
  - lib/chronicle/etl/cli/subcommand_base.rb
266
324
  - lib/chronicle/etl/config.rb
325
+ - lib/chronicle/etl/configurable.rb
267
326
  - lib/chronicle/etl/exceptions.rb
268
327
  - lib/chronicle/etl/extraction.rb
269
328
  - lib/chronicle/etl/extractors/csv_extractor.rb
270
329
  - lib/chronicle/etl/extractors/extractor.rb
271
330
  - lib/chronicle/etl/extractors/file_extractor.rb
272
- - lib/chronicle/etl/extractors/helpers/filesystem_reader.rb
331
+ - lib/chronicle/etl/extractors/helpers/input_reader.rb
273
332
  - lib/chronicle/etl/extractors/json_extractor.rb
274
333
  - lib/chronicle/etl/extractors/stdin_extractor.rb
275
334
  - lib/chronicle/etl/job.rb
@@ -277,21 +336,22 @@ files:
277
336
  - lib/chronicle/etl/job_log.rb
278
337
  - lib/chronicle/etl/job_logger.rb
279
338
  - lib/chronicle/etl/loaders/csv_loader.rb
339
+ - lib/chronicle/etl/loaders/json_loader.rb
280
340
  - lib/chronicle/etl/loaders/loader.rb
281
341
  - lib/chronicle/etl/loaders/rest_loader.rb
282
- - lib/chronicle/etl/loaders/stdout_loader.rb
283
342
  - lib/chronicle/etl/loaders/table_loader.rb
284
343
  - lib/chronicle/etl/logger.rb
285
344
  - lib/chronicle/etl/models/activity.rb
286
345
  - lib/chronicle/etl/models/attachment.rb
287
346
  - lib/chronicle/etl/models/base.rb
288
347
  - lib/chronicle/etl/models/entity.rb
289
- - lib/chronicle/etl/models/generic.rb
348
+ - lib/chronicle/etl/models/raw.rb
290
349
  - lib/chronicle/etl/registry/connector_registration.rb
291
350
  - lib/chronicle/etl/registry/registry.rb
292
351
  - lib/chronicle/etl/registry/self_registering.rb
293
352
  - lib/chronicle/etl/runner.rb
294
353
  - lib/chronicle/etl/serializers/jsonapi_serializer.rb
354
+ - lib/chronicle/etl/serializers/raw_serializer.rb
295
355
  - lib/chronicle/etl/serializers/serializer.rb
296
356
  - lib/chronicle/etl/transformers/image_file_transformer.rb
297
357
  - lib/chronicle/etl/transformers/null_transformer.rb
@@ -305,9 +365,10 @@ homepage: https://github.com/chronicle-app
305
365
  licenses:
306
366
  - MIT
307
367
  metadata:
368
+ allowed_push_host: https://rubygems.org
308
369
  homepage_uri: https://github.com/chronicle-app
309
370
  source_code_uri: https://github.com/chronicle-app/chronicle-etl
310
- changelog_uri: https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md
371
+ changelog_uri: https://github.com/chronicle-app/chronicle-etl/releases
311
372
  post_install_message:
312
373
  rdoc_options: []
313
374
  require_paths:
@@ -316,14 +377,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
316
377
  requirements:
317
378
  - - ">="
318
379
  - !ruby/object:Gem::Version
319
- version: '0'
380
+ version: '2.7'
320
381
  required_rubygems_version: !ruby/object:Gem::Requirement
321
382
  requirements:
322
383
  - - ">="
323
384
  - !ruby/object:Gem::Version
324
385
  version: '0'
325
386
  requirements: []
326
- rubygems_version: 3.1.2
387
+ rubygems_version: 3.1.6
327
388
  signing_key:
328
389
  specification_version: 4
329
390
  summary: ETL tool for personal data
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- 2.7.1
@@ -1,104 +0,0 @@
1
- require 'pathname'
2
-
3
- module Chronicle
4
- module ETL
5
- module Extractors
6
- module Helpers
7
- module FilesystemReader
8
-
9
- def filenames_in_directory(...)
10
- filenames = gather_files(...)
11
- if block_given?
12
- filenames.each do |filename|
13
- yield filename
14
- end
15
- else
16
- filenames
17
- end
18
- end
19
-
20
- def read_from_filesystem(filename:, yield_each_line: true, dir_glob_pattern: '**/*')
21
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
22
- if yield_each_line
23
- file.each_line do |line|
24
- yield line
25
- end
26
- else
27
- yield file.read
28
- end
29
- end
30
- end
31
-
32
- def open_from_filesystem(filename:, dir_glob_pattern: '**/*')
33
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
34
- yield file
35
- end
36
- end
37
-
38
- def results_count
39
- raise NotImplementedError
40
- # if file?
41
- # return 1
42
- # else
43
- # search_pattern = File.join(@options[:filename], '**/*')
44
- # Dir.glob(search_pattern).count
45
- # end
46
- end
47
-
48
- private
49
-
50
- def gather_files(path:, dir_glob_pattern: '**/*', load_since: nil, load_until: nil, smaller_than: nil, larger_than: nil, sort: :mtime)
51
- search_pattern = File.join(path, '**', dir_glob_pattern)
52
- files = Dir.glob(search_pattern)
53
-
54
- files = files.keep_if {|f| (File.mtime(f) > load_since)} if load_since
55
- files = files.keep_if {|f| (File.mtime(f) < load_until)} if load_until
56
-
57
- # pass in file sizes in bytes
58
- files = files.keep_if {|f| (File.size(f) < smaller_than)} if smaller_than
59
- files = files.keep_if {|f| (File.size(f) > larger_than)} if larger_than
60
-
61
- # TODO: incorporate sort argument
62
- files.sort_by{ |f| File.mtime(f) }
63
- end
64
-
65
- def select_files_in_directory(path:, dir_glob_pattern: '**/*')
66
- raise IOError.new("#{path} is not a directory.") unless directory?(path)
67
-
68
- search_pattern = File.join(path, dir_glob_pattern)
69
- Dir.glob(search_pattern).each do |filename|
70
- yield(filename)
71
- end
72
- end
73
-
74
- def open_files(filename:, dir_glob_pattern:)
75
- if stdin?(filename)
76
- yield $stdin
77
- elsif directory?(filename)
78
- search_pattern = File.join(filename, dir_glob_pattern)
79
- filenames = Dir.glob(search_pattern)
80
- filenames.each do |filename|
81
- file = File.open(filename)
82
- yield(file)
83
- end
84
- elsif file?(filename)
85
- yield File.open(filename)
86
- end
87
- end
88
-
89
- def stdin?(filename)
90
- filename == $stdin
91
- end
92
-
93
- def directory?(filename)
94
- Pathname.new(filename).directory?
95
- end
96
-
97
- def file?(filename)
98
- Pathname.new(filename).file?
99
- end
100
- end
101
- end
102
- end
103
- end
104
- end
@@ -1,14 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- class StdoutLoader < Chronicle::ETL::Loader
4
- register_connector do |r|
5
- r.description = 'stdout'
6
- end
7
-
8
- def load(record)
9
- serializer = Chronicle::ETL::JSONAPISerializer.new(record)
10
- puts serializer.serializable_hash.to_json
11
- end
12
- end
13
- end
14
- end
@@ -1,23 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Generic < Chronicle::ETL::Models::Base
7
- TYPE = 'generic'
8
-
9
- attr_accessor :properties
10
-
11
- def initialize(properties = {})
12
- @properties = properties
13
- super
14
- end
15
-
16
- # Generic models have arbitrary attributes stored in @properties
17
- def attributes
18
- @properties.transform_keys(&:to_sym)
19
- end
20
- end
21
- end
22
- end
23
- end