chronicle-etl 0.3.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/.rubocop.yml +31 -1
  4. data/Guardfile +7 -0
  5. data/README.md +157 -82
  6. data/Rakefile +4 -2
  7. data/chronicle-etl.gemspec +11 -3
  8. data/exe/chronicle-etl +1 -1
  9. data/lib/chronicle/etl/cli/connectors.rb +34 -5
  10. data/lib/chronicle/etl/cli/jobs.rb +90 -24
  11. data/lib/chronicle/etl/cli/main.rb +41 -19
  12. data/lib/chronicle/etl/cli/plugins.rb +62 -0
  13. data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
  14. data/lib/chronicle/etl/cli.rb +9 -0
  15. data/lib/chronicle/etl/config.rb +7 -4
  16. data/lib/chronicle/etl/configurable.rb +163 -0
  17. data/lib/chronicle/etl/exceptions.rb +29 -1
  18. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
  19. data/lib/chronicle/etl/extractors/extractor.rb +16 -15
  20. data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
  21. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  22. data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
  23. data/lib/chronicle/etl/job.rb +8 -2
  24. data/lib/chronicle/etl/job_definition.rb +20 -5
  25. data/lib/chronicle/etl/loaders/csv_loader.rb +36 -9
  26. data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
  27. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  28. data/lib/chronicle/etl/loaders/loader.rb +28 -2
  29. data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
  30. data/lib/chronicle/etl/loaders/table_loader.rb +18 -37
  31. data/lib/chronicle/etl/logger.rb +6 -2
  32. data/lib/chronicle/etl/models/base.rb +3 -0
  33. data/lib/chronicle/etl/models/entity.rb +8 -2
  34. data/lib/chronicle/etl/models/raw.rb +26 -0
  35. data/lib/chronicle/etl/registry/connector_registration.rb +6 -0
  36. data/lib/chronicle/etl/registry/plugin_registry.rb +70 -0
  37. data/lib/chronicle/etl/registry/registry.rb +27 -14
  38. data/lib/chronicle/etl/runner.rb +35 -17
  39. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  40. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  41. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  42. data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
  43. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  44. data/lib/chronicle/etl/transformers/transformer.rb +3 -2
  45. data/lib/chronicle/etl/version.rb +1 -1
  46. data/lib/chronicle/etl.rb +12 -4
  47. metadata +123 -18
  48. data/.ruby-version +0 -1
  49. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  50. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  51. data/lib/chronicle/etl/models/generic.rb +0 -23
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-02-10 00:00:00.000000000 Z
11
+ date: 2022-03-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '7.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '7.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: chronic_duration
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -98,16 +98,16 @@ dependencies:
98
98
  name: runcom
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - "~>"
101
+ - - ">="
102
102
  - !ruby/object:Gem::Version
103
- version: '6.2'
103
+ version: '6.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - "~>"
108
+ - - ">="
109
109
  - !ruby/object:Gem::Version
110
- version: '6.2'
110
+ version: '6.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: sequel
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -142,14 +142,28 @@ dependencies:
142
142
  requirements:
143
143
  - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: '0.20'
145
+ version: '1.2'
146
146
  type: :runtime
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: '0.20'
152
+ version: '1.2'
153
+ - !ruby/object:Gem::Dependency
154
+ name: thor-hollaback
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '0.2'
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '0.2'
153
167
  - !ruby/object:Gem::Dependency
154
168
  name: tty-progressbar
155
169
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +178,20 @@ dependencies:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
180
  version: '0.17'
181
+ - !ruby/object:Gem::Dependency
182
+ name: tty-spinner
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
167
195
  - !ruby/object:Gem::Dependency
168
196
  name: tty-table
169
197
  requirement: !ruby/object:Gem::Requirement
@@ -178,6 +206,20 @@ dependencies:
178
206
  - - "~>"
179
207
  - !ruby/object:Gem::Version
180
208
  version: '0.11'
209
+ - !ruby/object:Gem::Dependency
210
+ name: tty-prompt
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - "~>"
214
+ - !ruby/object:Gem::Version
215
+ version: '0.23'
216
+ type: :runtime
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: '0.23'
181
223
  - !ruby/object:Gem::Dependency
182
224
  name: bundler
183
225
  requirement: !ruby/object:Gem::Requirement
@@ -234,6 +276,62 @@ dependencies:
234
276
  - - "~>"
235
277
  - !ruby/object:Gem::Version
236
278
  version: '3.9'
279
+ - !ruby/object:Gem::Dependency
280
+ name: simplecov
281
+ requirement: !ruby/object:Gem::Requirement
282
+ requirements:
283
+ - - "~>"
284
+ - !ruby/object:Gem::Version
285
+ version: '0.21'
286
+ type: :development
287
+ prerelease: false
288
+ version_requirements: !ruby/object:Gem::Requirement
289
+ requirements:
290
+ - - "~>"
291
+ - !ruby/object:Gem::Version
292
+ version: '0.21'
293
+ - !ruby/object:Gem::Dependency
294
+ name: guard-rspec
295
+ requirement: !ruby/object:Gem::Requirement
296
+ requirements:
297
+ - - "~>"
298
+ - !ruby/object:Gem::Version
299
+ version: 4.7.3
300
+ type: :development
301
+ prerelease: false
302
+ version_requirements: !ruby/object:Gem::Requirement
303
+ requirements:
304
+ - - "~>"
305
+ - !ruby/object:Gem::Version
306
+ version: 4.7.3
307
+ - !ruby/object:Gem::Dependency
308
+ name: yard
309
+ requirement: !ruby/object:Gem::Requirement
310
+ requirements:
311
+ - - "~>"
312
+ - !ruby/object:Gem::Version
313
+ version: 0.9.7
314
+ type: :development
315
+ prerelease: false
316
+ version_requirements: !ruby/object:Gem::Requirement
317
+ requirements:
318
+ - - "~>"
319
+ - !ruby/object:Gem::Version
320
+ version: 0.9.7
321
+ - !ruby/object:Gem::Dependency
322
+ name: rubocop
323
+ requirement: !ruby/object:Gem::Requirement
324
+ requirements:
325
+ - - "~>"
326
+ - !ruby/object:Gem::Version
327
+ version: 1.25.1
328
+ type: :development
329
+ prerelease: false
330
+ version_requirements: !ruby/object:Gem::Requirement
331
+ requirements:
332
+ - - "~>"
333
+ - !ruby/object:Gem::Version
334
+ version: 1.25.1
237
335
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
238
336
  transformer it, and load it.
239
337
  email:
@@ -243,14 +341,15 @@ executables:
243
341
  extensions: []
244
342
  extra_rdoc_files: []
245
343
  files:
344
+ - ".github/workflows/ruby.yml"
246
345
  - ".gitignore"
247
346
  - ".rspec"
248
347
  - ".rubocop.yml"
249
- - ".ruby-version"
250
348
  - ".travis.yml"
251
349
  - ".yardopts"
252
350
  - CODE_OF_CONDUCT.md
253
351
  - Gemfile
352
+ - Guardfile
254
353
  - LICENSE.txt
255
354
  - README.md
256
355
  - Rakefile
@@ -259,17 +358,20 @@ files:
259
358
  - chronicle-etl.gemspec
260
359
  - exe/chronicle-etl
261
360
  - lib/chronicle/etl.rb
361
+ - lib/chronicle/etl/cli.rb
262
362
  - lib/chronicle/etl/cli/connectors.rb
263
363
  - lib/chronicle/etl/cli/jobs.rb
264
364
  - lib/chronicle/etl/cli/main.rb
365
+ - lib/chronicle/etl/cli/plugins.rb
265
366
  - lib/chronicle/etl/cli/subcommand_base.rb
266
367
  - lib/chronicle/etl/config.rb
368
+ - lib/chronicle/etl/configurable.rb
267
369
  - lib/chronicle/etl/exceptions.rb
268
370
  - lib/chronicle/etl/extraction.rb
269
371
  - lib/chronicle/etl/extractors/csv_extractor.rb
270
372
  - lib/chronicle/etl/extractors/extractor.rb
271
373
  - lib/chronicle/etl/extractors/file_extractor.rb
272
- - lib/chronicle/etl/extractors/helpers/filesystem_reader.rb
374
+ - lib/chronicle/etl/extractors/helpers/input_reader.rb
273
375
  - lib/chronicle/etl/extractors/json_extractor.rb
274
376
  - lib/chronicle/etl/extractors/stdin_extractor.rb
275
377
  - lib/chronicle/etl/job.rb
@@ -277,21 +379,24 @@ files:
277
379
  - lib/chronicle/etl/job_log.rb
278
380
  - lib/chronicle/etl/job_logger.rb
279
381
  - lib/chronicle/etl/loaders/csv_loader.rb
382
+ - lib/chronicle/etl/loaders/helpers/encoding_helper.rb
383
+ - lib/chronicle/etl/loaders/json_loader.rb
280
384
  - lib/chronicle/etl/loaders/loader.rb
281
385
  - lib/chronicle/etl/loaders/rest_loader.rb
282
- - lib/chronicle/etl/loaders/stdout_loader.rb
283
386
  - lib/chronicle/etl/loaders/table_loader.rb
284
387
  - lib/chronicle/etl/logger.rb
285
388
  - lib/chronicle/etl/models/activity.rb
286
389
  - lib/chronicle/etl/models/attachment.rb
287
390
  - lib/chronicle/etl/models/base.rb
288
391
  - lib/chronicle/etl/models/entity.rb
289
- - lib/chronicle/etl/models/generic.rb
392
+ - lib/chronicle/etl/models/raw.rb
290
393
  - lib/chronicle/etl/registry/connector_registration.rb
394
+ - lib/chronicle/etl/registry/plugin_registry.rb
291
395
  - lib/chronicle/etl/registry/registry.rb
292
396
  - lib/chronicle/etl/registry/self_registering.rb
293
397
  - lib/chronicle/etl/runner.rb
294
398
  - lib/chronicle/etl/serializers/jsonapi_serializer.rb
399
+ - lib/chronicle/etl/serializers/raw_serializer.rb
295
400
  - lib/chronicle/etl/serializers/serializer.rb
296
401
  - lib/chronicle/etl/transformers/image_file_transformer.rb
297
402
  - lib/chronicle/etl/transformers/null_transformer.rb
@@ -317,14 +422,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
317
422
  requirements:
318
423
  - - ">="
319
424
  - !ruby/object:Gem::Version
320
- version: '0'
425
+ version: '2.7'
321
426
  required_rubygems_version: !ruby/object:Gem::Requirement
322
427
  requirements:
323
428
  - - ">="
324
429
  - !ruby/object:Gem::Version
325
430
  version: '0'
326
431
  requirements: []
327
- rubygems_version: 3.1.2
432
+ rubygems_version: 3.3.9
328
433
  signing_key:
329
434
  specification_version: 4
330
435
  summary: ETL tool for personal data
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- 2.7.1
@@ -1,104 +0,0 @@
1
- require 'pathname'
2
-
3
- module Chronicle
4
- module ETL
5
- module Extractors
6
- module Helpers
7
- module FilesystemReader
8
-
9
- def filenames_in_directory(...)
10
- filenames = gather_files(...)
11
- if block_given?
12
- filenames.each do |filename|
13
- yield filename
14
- end
15
- else
16
- filenames
17
- end
18
- end
19
-
20
- def read_from_filesystem(filename:, yield_each_line: true, dir_glob_pattern: '**/*')
21
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
22
- if yield_each_line
23
- file.each_line do |line|
24
- yield line
25
- end
26
- else
27
- yield file.read
28
- end
29
- end
30
- end
31
-
32
- def open_from_filesystem(filename:, dir_glob_pattern: '**/*')
33
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
34
- yield file
35
- end
36
- end
37
-
38
- def results_count
39
- raise NotImplementedError
40
- # if file?
41
- # return 1
42
- # else
43
- # search_pattern = File.join(@options[:filename], '**/*')
44
- # Dir.glob(search_pattern).count
45
- # end
46
- end
47
-
48
- private
49
-
50
- def gather_files(path:, dir_glob_pattern: '**/*', load_since: nil, load_until: nil, smaller_than: nil, larger_than: nil, sort: :mtime)
51
- search_pattern = File.join(path, '**', dir_glob_pattern)
52
- files = Dir.glob(search_pattern)
53
-
54
- files = files.keep_if {|f| (File.mtime(f) > load_since)} if load_since
55
- files = files.keep_if {|f| (File.mtime(f) < load_until)} if load_until
56
-
57
- # pass in file sizes in bytes
58
- files = files.keep_if {|f| (File.size(f) < smaller_than)} if smaller_than
59
- files = files.keep_if {|f| (File.size(f) > larger_than)} if larger_than
60
-
61
- # TODO: incorporate sort argument
62
- files.sort_by{ |f| File.mtime(f) }
63
- end
64
-
65
- def select_files_in_directory(path:, dir_glob_pattern: '**/*')
66
- raise IOError.new("#{path} is not a directory.") unless directory?(path)
67
-
68
- search_pattern = File.join(path, dir_glob_pattern)
69
- Dir.glob(search_pattern).each do |filename|
70
- yield(filename)
71
- end
72
- end
73
-
74
- def open_files(filename:, dir_glob_pattern:)
75
- if stdin?(filename)
76
- yield $stdin
77
- elsif directory?(filename)
78
- search_pattern = File.join(filename, dir_glob_pattern)
79
- filenames = Dir.glob(search_pattern)
80
- filenames.each do |filename|
81
- file = File.open(filename)
82
- yield(file)
83
- end
84
- elsif file?(filename)
85
- yield File.open(filename)
86
- end
87
- end
88
-
89
- def stdin?(filename)
90
- filename == $stdin
91
- end
92
-
93
- def directory?(filename)
94
- Pathname.new(filename).directory?
95
- end
96
-
97
- def file?(filename)
98
- Pathname.new(filename).file?
99
- end
100
- end
101
- end
102
- end
103
- end
104
- end
@@ -1,14 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- class StdoutLoader < Chronicle::ETL::Loader
4
- register_connector do |r|
5
- r.description = 'stdout'
6
- end
7
-
8
- def load(record)
9
- serializer = Chronicle::ETL::JSONAPISerializer.new(record)
10
- puts serializer.serializable_hash.to_json
11
- end
12
- end
13
- end
14
- end
@@ -1,23 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Generic < Chronicle::ETL::Models::Base
7
- TYPE = 'generic'
8
-
9
- attr_accessor :properties
10
-
11
- def initialize(properties = {})
12
- @properties = properties
13
- super
14
- end
15
-
16
- # Generic models have arbitrary attributes stored in @properties
17
- def attributes
18
- @properties.transform_keys(&:to_sym)
19
- end
20
- end
21
- end
22
- end
23
- end