chronicle-etl 0.3.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/.rubocop.yml +31 -1
  4. data/Guardfile +7 -0
  5. data/README.md +157 -82
  6. data/Rakefile +4 -2
  7. data/chronicle-etl.gemspec +11 -3
  8. data/exe/chronicle-etl +1 -1
  9. data/lib/chronicle/etl/cli/connectors.rb +34 -5
  10. data/lib/chronicle/etl/cli/jobs.rb +90 -24
  11. data/lib/chronicle/etl/cli/main.rb +41 -19
  12. data/lib/chronicle/etl/cli/plugins.rb +62 -0
  13. data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
  14. data/lib/chronicle/etl/cli.rb +9 -0
  15. data/lib/chronicle/etl/config.rb +7 -4
  16. data/lib/chronicle/etl/configurable.rb +163 -0
  17. data/lib/chronicle/etl/exceptions.rb +29 -1
  18. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
  19. data/lib/chronicle/etl/extractors/extractor.rb +16 -15
  20. data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
  21. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  22. data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
  23. data/lib/chronicle/etl/job.rb +8 -2
  24. data/lib/chronicle/etl/job_definition.rb +20 -5
  25. data/lib/chronicle/etl/loaders/csv_loader.rb +36 -9
  26. data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
  27. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  28. data/lib/chronicle/etl/loaders/loader.rb +28 -2
  29. data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
  30. data/lib/chronicle/etl/loaders/table_loader.rb +18 -37
  31. data/lib/chronicle/etl/logger.rb +6 -2
  32. data/lib/chronicle/etl/models/base.rb +3 -0
  33. data/lib/chronicle/etl/models/entity.rb +8 -2
  34. data/lib/chronicle/etl/models/raw.rb +26 -0
  35. data/lib/chronicle/etl/registry/connector_registration.rb +6 -0
  36. data/lib/chronicle/etl/registry/plugin_registry.rb +70 -0
  37. data/lib/chronicle/etl/registry/registry.rb +27 -14
  38. data/lib/chronicle/etl/runner.rb +35 -17
  39. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  40. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  41. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  42. data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
  43. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  44. data/lib/chronicle/etl/transformers/transformer.rb +3 -2
  45. data/lib/chronicle/etl/version.rb +1 -1
  46. data/lib/chronicle/etl.rb +12 -4
  47. metadata +123 -18
  48. data/.ruby-version +0 -1
  49. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  50. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  51. data/lib/chronicle/etl/models/generic.rb +0 -23
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-02-10 00:00:00.000000000 Z
11
+ date: 2022-03-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '7.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '7.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: chronic_duration
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -98,16 +98,16 @@ dependencies:
98
98
  name: runcom
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - "~>"
101
+ - - ">="
102
102
  - !ruby/object:Gem::Version
103
- version: '6.2'
103
+ version: '6.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - "~>"
108
+ - - ">="
109
109
  - !ruby/object:Gem::Version
110
- version: '6.2'
110
+ version: '6.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: sequel
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -142,14 +142,28 @@ dependencies:
142
142
  requirements:
143
143
  - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: '0.20'
145
+ version: '1.2'
146
146
  type: :runtime
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: '0.20'
152
+ version: '1.2'
153
+ - !ruby/object:Gem::Dependency
154
+ name: thor-hollaback
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '0.2'
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '0.2'
153
167
  - !ruby/object:Gem::Dependency
154
168
  name: tty-progressbar
155
169
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +178,20 @@ dependencies:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
180
  version: '0.17'
181
+ - !ruby/object:Gem::Dependency
182
+ name: tty-spinner
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
167
195
  - !ruby/object:Gem::Dependency
168
196
  name: tty-table
169
197
  requirement: !ruby/object:Gem::Requirement
@@ -178,6 +206,20 @@ dependencies:
178
206
  - - "~>"
179
207
  - !ruby/object:Gem::Version
180
208
  version: '0.11'
209
+ - !ruby/object:Gem::Dependency
210
+ name: tty-prompt
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - "~>"
214
+ - !ruby/object:Gem::Version
215
+ version: '0.23'
216
+ type: :runtime
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: '0.23'
181
223
  - !ruby/object:Gem::Dependency
182
224
  name: bundler
183
225
  requirement: !ruby/object:Gem::Requirement
@@ -234,6 +276,62 @@ dependencies:
234
276
  - - "~>"
235
277
  - !ruby/object:Gem::Version
236
278
  version: '3.9'
279
+ - !ruby/object:Gem::Dependency
280
+ name: simplecov
281
+ requirement: !ruby/object:Gem::Requirement
282
+ requirements:
283
+ - - "~>"
284
+ - !ruby/object:Gem::Version
285
+ version: '0.21'
286
+ type: :development
287
+ prerelease: false
288
+ version_requirements: !ruby/object:Gem::Requirement
289
+ requirements:
290
+ - - "~>"
291
+ - !ruby/object:Gem::Version
292
+ version: '0.21'
293
+ - !ruby/object:Gem::Dependency
294
+ name: guard-rspec
295
+ requirement: !ruby/object:Gem::Requirement
296
+ requirements:
297
+ - - "~>"
298
+ - !ruby/object:Gem::Version
299
+ version: 4.7.3
300
+ type: :development
301
+ prerelease: false
302
+ version_requirements: !ruby/object:Gem::Requirement
303
+ requirements:
304
+ - - "~>"
305
+ - !ruby/object:Gem::Version
306
+ version: 4.7.3
307
+ - !ruby/object:Gem::Dependency
308
+ name: yard
309
+ requirement: !ruby/object:Gem::Requirement
310
+ requirements:
311
+ - - "~>"
312
+ - !ruby/object:Gem::Version
313
+ version: 0.9.7
314
+ type: :development
315
+ prerelease: false
316
+ version_requirements: !ruby/object:Gem::Requirement
317
+ requirements:
318
+ - - "~>"
319
+ - !ruby/object:Gem::Version
320
+ version: 0.9.7
321
+ - !ruby/object:Gem::Dependency
322
+ name: rubocop
323
+ requirement: !ruby/object:Gem::Requirement
324
+ requirements:
325
+ - - "~>"
326
+ - !ruby/object:Gem::Version
327
+ version: 1.25.1
328
+ type: :development
329
+ prerelease: false
330
+ version_requirements: !ruby/object:Gem::Requirement
331
+ requirements:
332
+ - - "~>"
333
+ - !ruby/object:Gem::Version
334
+ version: 1.25.1
237
335
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
238
336
  transformer it, and load it.
239
337
  email:
@@ -243,14 +341,15 @@ executables:
243
341
  extensions: []
244
342
  extra_rdoc_files: []
245
343
  files:
344
+ - ".github/workflows/ruby.yml"
246
345
  - ".gitignore"
247
346
  - ".rspec"
248
347
  - ".rubocop.yml"
249
- - ".ruby-version"
250
348
  - ".travis.yml"
251
349
  - ".yardopts"
252
350
  - CODE_OF_CONDUCT.md
253
351
  - Gemfile
352
+ - Guardfile
254
353
  - LICENSE.txt
255
354
  - README.md
256
355
  - Rakefile
@@ -259,17 +358,20 @@ files:
259
358
  - chronicle-etl.gemspec
260
359
  - exe/chronicle-etl
261
360
  - lib/chronicle/etl.rb
361
+ - lib/chronicle/etl/cli.rb
262
362
  - lib/chronicle/etl/cli/connectors.rb
263
363
  - lib/chronicle/etl/cli/jobs.rb
264
364
  - lib/chronicle/etl/cli/main.rb
365
+ - lib/chronicle/etl/cli/plugins.rb
265
366
  - lib/chronicle/etl/cli/subcommand_base.rb
266
367
  - lib/chronicle/etl/config.rb
368
+ - lib/chronicle/etl/configurable.rb
267
369
  - lib/chronicle/etl/exceptions.rb
268
370
  - lib/chronicle/etl/extraction.rb
269
371
  - lib/chronicle/etl/extractors/csv_extractor.rb
270
372
  - lib/chronicle/etl/extractors/extractor.rb
271
373
  - lib/chronicle/etl/extractors/file_extractor.rb
272
- - lib/chronicle/etl/extractors/helpers/filesystem_reader.rb
374
+ - lib/chronicle/etl/extractors/helpers/input_reader.rb
273
375
  - lib/chronicle/etl/extractors/json_extractor.rb
274
376
  - lib/chronicle/etl/extractors/stdin_extractor.rb
275
377
  - lib/chronicle/etl/job.rb
@@ -277,21 +379,24 @@ files:
277
379
  - lib/chronicle/etl/job_log.rb
278
380
  - lib/chronicle/etl/job_logger.rb
279
381
  - lib/chronicle/etl/loaders/csv_loader.rb
382
+ - lib/chronicle/etl/loaders/helpers/encoding_helper.rb
383
+ - lib/chronicle/etl/loaders/json_loader.rb
280
384
  - lib/chronicle/etl/loaders/loader.rb
281
385
  - lib/chronicle/etl/loaders/rest_loader.rb
282
- - lib/chronicle/etl/loaders/stdout_loader.rb
283
386
  - lib/chronicle/etl/loaders/table_loader.rb
284
387
  - lib/chronicle/etl/logger.rb
285
388
  - lib/chronicle/etl/models/activity.rb
286
389
  - lib/chronicle/etl/models/attachment.rb
287
390
  - lib/chronicle/etl/models/base.rb
288
391
  - lib/chronicle/etl/models/entity.rb
289
- - lib/chronicle/etl/models/generic.rb
392
+ - lib/chronicle/etl/models/raw.rb
290
393
  - lib/chronicle/etl/registry/connector_registration.rb
394
+ - lib/chronicle/etl/registry/plugin_registry.rb
291
395
  - lib/chronicle/etl/registry/registry.rb
292
396
  - lib/chronicle/etl/registry/self_registering.rb
293
397
  - lib/chronicle/etl/runner.rb
294
398
  - lib/chronicle/etl/serializers/jsonapi_serializer.rb
399
+ - lib/chronicle/etl/serializers/raw_serializer.rb
295
400
  - lib/chronicle/etl/serializers/serializer.rb
296
401
  - lib/chronicle/etl/transformers/image_file_transformer.rb
297
402
  - lib/chronicle/etl/transformers/null_transformer.rb
@@ -317,14 +422,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
317
422
  requirements:
318
423
  - - ">="
319
424
  - !ruby/object:Gem::Version
320
- version: '0'
425
+ version: '2.7'
321
426
  required_rubygems_version: !ruby/object:Gem::Requirement
322
427
  requirements:
323
428
  - - ">="
324
429
  - !ruby/object:Gem::Version
325
430
  version: '0'
326
431
  requirements: []
327
- rubygems_version: 3.1.2
432
+ rubygems_version: 3.3.9
328
433
  signing_key:
329
434
  specification_version: 4
330
435
  summary: ETL tool for personal data
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- 2.7.1
@@ -1,104 +0,0 @@
1
- require 'pathname'
2
-
3
- module Chronicle
4
- module ETL
5
- module Extractors
6
- module Helpers
7
- module FilesystemReader
8
-
9
- def filenames_in_directory(...)
10
- filenames = gather_files(...)
11
- if block_given?
12
- filenames.each do |filename|
13
- yield filename
14
- end
15
- else
16
- filenames
17
- end
18
- end
19
-
20
- def read_from_filesystem(filename:, yield_each_line: true, dir_glob_pattern: '**/*')
21
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
22
- if yield_each_line
23
- file.each_line do |line|
24
- yield line
25
- end
26
- else
27
- yield file.read
28
- end
29
- end
30
- end
31
-
32
- def open_from_filesystem(filename:, dir_glob_pattern: '**/*')
33
- open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
34
- yield file
35
- end
36
- end
37
-
38
- def results_count
39
- raise NotImplementedError
40
- # if file?
41
- # return 1
42
- # else
43
- # search_pattern = File.join(@options[:filename], '**/*')
44
- # Dir.glob(search_pattern).count
45
- # end
46
- end
47
-
48
- private
49
-
50
- def gather_files(path:, dir_glob_pattern: '**/*', load_since: nil, load_until: nil, smaller_than: nil, larger_than: nil, sort: :mtime)
51
- search_pattern = File.join(path, '**', dir_glob_pattern)
52
- files = Dir.glob(search_pattern)
53
-
54
- files = files.keep_if {|f| (File.mtime(f) > load_since)} if load_since
55
- files = files.keep_if {|f| (File.mtime(f) < load_until)} if load_until
56
-
57
- # pass in file sizes in bytes
58
- files = files.keep_if {|f| (File.size(f) < smaller_than)} if smaller_than
59
- files = files.keep_if {|f| (File.size(f) > larger_than)} if larger_than
60
-
61
- # TODO: incorporate sort argument
62
- files.sort_by{ |f| File.mtime(f) }
63
- end
64
-
65
- def select_files_in_directory(path:, dir_glob_pattern: '**/*')
66
- raise IOError.new("#{path} is not a directory.") unless directory?(path)
67
-
68
- search_pattern = File.join(path, dir_glob_pattern)
69
- Dir.glob(search_pattern).each do |filename|
70
- yield(filename)
71
- end
72
- end
73
-
74
- def open_files(filename:, dir_glob_pattern:)
75
- if stdin?(filename)
76
- yield $stdin
77
- elsif directory?(filename)
78
- search_pattern = File.join(filename, dir_glob_pattern)
79
- filenames = Dir.glob(search_pattern)
80
- filenames.each do |filename|
81
- file = File.open(filename)
82
- yield(file)
83
- end
84
- elsif file?(filename)
85
- yield File.open(filename)
86
- end
87
- end
88
-
89
- def stdin?(filename)
90
- filename == $stdin
91
- end
92
-
93
- def directory?(filename)
94
- Pathname.new(filename).directory?
95
- end
96
-
97
- def file?(filename)
98
- Pathname.new(filename).file?
99
- end
100
- end
101
- end
102
- end
103
- end
104
- end
@@ -1,14 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- class StdoutLoader < Chronicle::ETL::Loader
4
- register_connector do |r|
5
- r.description = 'stdout'
6
- end
7
-
8
- def load(record)
9
- serializer = Chronicle::ETL::JSONAPISerializer.new(record)
10
- puts serializer.serializable_hash.to_json
11
- end
12
- end
13
- end
14
- end
@@ -1,23 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Generic < Chronicle::ETL::Models::Base
7
- TYPE = 'generic'
8
-
9
- attr_accessor :properties
10
-
11
- def initialize(properties = {})
12
- @properties = properties
13
- super
14
- end
15
-
16
- # Generic models have arbitrary attributes stored in @properties
17
- def attributes
18
- @properties.transform_keys(&:to_sym)
19
- end
20
- end
21
- end
22
- end
23
- end