chronicle-etl 0.3.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/.rubocop.yml +31 -1
- data/Guardfile +7 -0
- data/README.md +157 -82
- data/Rakefile +4 -2
- data/chronicle-etl.gemspec +11 -3
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/cli/connectors.rb +34 -5
- data/lib/chronicle/etl/cli/jobs.rb +90 -24
- data/lib/chronicle/etl/cli/main.rb +41 -19
- data/lib/chronicle/etl/cli/plugins.rb +62 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
- data/lib/chronicle/etl/cli.rb +9 -0
- data/lib/chronicle/etl/config.rb +7 -4
- data/lib/chronicle/etl/configurable.rb +163 -0
- data/lib/chronicle/etl/exceptions.rb +29 -1
- data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
- data/lib/chronicle/etl/extractors/extractor.rb +16 -15
- data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
- data/lib/chronicle/etl/job.rb +8 -2
- data/lib/chronicle/etl/job_definition.rb +20 -5
- data/lib/chronicle/etl/loaders/csv_loader.rb +36 -9
- data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
- data/lib/chronicle/etl/loaders/loader.rb +28 -2
- data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
- data/lib/chronicle/etl/loaders/table_loader.rb +18 -37
- data/lib/chronicle/etl/logger.rb +6 -2
- data/lib/chronicle/etl/models/base.rb +3 -0
- data/lib/chronicle/etl/models/entity.rb +8 -2
- data/lib/chronicle/etl/models/raw.rb +26 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +6 -0
- data/lib/chronicle/etl/registry/plugin_registry.rb +70 -0
- data/lib/chronicle/etl/registry/registry.rb +27 -14
- data/lib/chronicle/etl/runner.rb +35 -17
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
- data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
- data/lib/chronicle/etl/serializers/serializer.rb +2 -1
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
- data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
- data/lib/chronicle/etl/transformers/transformer.rb +3 -2
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +12 -4
- metadata +123 -18
- data/.ruby-version +0 -1
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
- data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
- data/lib/chronicle/etl/models/generic.rb +0 -23
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chronicle-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Louis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-03-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
19
|
+
version: '7.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
26
|
+
version: '7.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: chronic_duration
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -98,16 +98,16 @@ dependencies:
|
|
98
98
|
name: runcom
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- - "
|
101
|
+
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '6.
|
103
|
+
version: '6.0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- - "
|
108
|
+
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '6.
|
110
|
+
version: '6.0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: sequel
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,14 +142,28 @@ dependencies:
|
|
142
142
|
requirements:
|
143
143
|
- - "~>"
|
144
144
|
- !ruby/object:Gem::Version
|
145
|
-
version: '
|
145
|
+
version: '1.2'
|
146
146
|
type: :runtime
|
147
147
|
prerelease: false
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
|
-
version: '
|
152
|
+
version: '1.2'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: thor-hollaback
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0.2'
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0.2'
|
153
167
|
- !ruby/object:Gem::Dependency
|
154
168
|
name: tty-progressbar
|
155
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -164,6 +178,20 @@ dependencies:
|
|
164
178
|
- - "~>"
|
165
179
|
- !ruby/object:Gem::Version
|
166
180
|
version: '0.17'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: tty-spinner
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - ">="
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0'
|
167
195
|
- !ruby/object:Gem::Dependency
|
168
196
|
name: tty-table
|
169
197
|
requirement: !ruby/object:Gem::Requirement
|
@@ -178,6 +206,20 @@ dependencies:
|
|
178
206
|
- - "~>"
|
179
207
|
- !ruby/object:Gem::Version
|
180
208
|
version: '0.11'
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: tty-prompt
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - "~>"
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0.23'
|
216
|
+
type: :runtime
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - "~>"
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '0.23'
|
181
223
|
- !ruby/object:Gem::Dependency
|
182
224
|
name: bundler
|
183
225
|
requirement: !ruby/object:Gem::Requirement
|
@@ -234,6 +276,62 @@ dependencies:
|
|
234
276
|
- - "~>"
|
235
277
|
- !ruby/object:Gem::Version
|
236
278
|
version: '3.9'
|
279
|
+
- !ruby/object:Gem::Dependency
|
280
|
+
name: simplecov
|
281
|
+
requirement: !ruby/object:Gem::Requirement
|
282
|
+
requirements:
|
283
|
+
- - "~>"
|
284
|
+
- !ruby/object:Gem::Version
|
285
|
+
version: '0.21'
|
286
|
+
type: :development
|
287
|
+
prerelease: false
|
288
|
+
version_requirements: !ruby/object:Gem::Requirement
|
289
|
+
requirements:
|
290
|
+
- - "~>"
|
291
|
+
- !ruby/object:Gem::Version
|
292
|
+
version: '0.21'
|
293
|
+
- !ruby/object:Gem::Dependency
|
294
|
+
name: guard-rspec
|
295
|
+
requirement: !ruby/object:Gem::Requirement
|
296
|
+
requirements:
|
297
|
+
- - "~>"
|
298
|
+
- !ruby/object:Gem::Version
|
299
|
+
version: 4.7.3
|
300
|
+
type: :development
|
301
|
+
prerelease: false
|
302
|
+
version_requirements: !ruby/object:Gem::Requirement
|
303
|
+
requirements:
|
304
|
+
- - "~>"
|
305
|
+
- !ruby/object:Gem::Version
|
306
|
+
version: 4.7.3
|
307
|
+
- !ruby/object:Gem::Dependency
|
308
|
+
name: yard
|
309
|
+
requirement: !ruby/object:Gem::Requirement
|
310
|
+
requirements:
|
311
|
+
- - "~>"
|
312
|
+
- !ruby/object:Gem::Version
|
313
|
+
version: 0.9.7
|
314
|
+
type: :development
|
315
|
+
prerelease: false
|
316
|
+
version_requirements: !ruby/object:Gem::Requirement
|
317
|
+
requirements:
|
318
|
+
- - "~>"
|
319
|
+
- !ruby/object:Gem::Version
|
320
|
+
version: 0.9.7
|
321
|
+
- !ruby/object:Gem::Dependency
|
322
|
+
name: rubocop
|
323
|
+
requirement: !ruby/object:Gem::Requirement
|
324
|
+
requirements:
|
325
|
+
- - "~>"
|
326
|
+
- !ruby/object:Gem::Version
|
327
|
+
version: 1.25.1
|
328
|
+
type: :development
|
329
|
+
prerelease: false
|
330
|
+
version_requirements: !ruby/object:Gem::Requirement
|
331
|
+
requirements:
|
332
|
+
- - "~>"
|
333
|
+
- !ruby/object:Gem::Version
|
334
|
+
version: 1.25.1
|
237
335
|
description: Chronicle-ETL allows you to extract personal data from a variety of services,
|
238
336
|
transformer it, and load it.
|
239
337
|
email:
|
@@ -243,14 +341,15 @@ executables:
|
|
243
341
|
extensions: []
|
244
342
|
extra_rdoc_files: []
|
245
343
|
files:
|
344
|
+
- ".github/workflows/ruby.yml"
|
246
345
|
- ".gitignore"
|
247
346
|
- ".rspec"
|
248
347
|
- ".rubocop.yml"
|
249
|
-
- ".ruby-version"
|
250
348
|
- ".travis.yml"
|
251
349
|
- ".yardopts"
|
252
350
|
- CODE_OF_CONDUCT.md
|
253
351
|
- Gemfile
|
352
|
+
- Guardfile
|
254
353
|
- LICENSE.txt
|
255
354
|
- README.md
|
256
355
|
- Rakefile
|
@@ -259,17 +358,20 @@ files:
|
|
259
358
|
- chronicle-etl.gemspec
|
260
359
|
- exe/chronicle-etl
|
261
360
|
- lib/chronicle/etl.rb
|
361
|
+
- lib/chronicle/etl/cli.rb
|
262
362
|
- lib/chronicle/etl/cli/connectors.rb
|
263
363
|
- lib/chronicle/etl/cli/jobs.rb
|
264
364
|
- lib/chronicle/etl/cli/main.rb
|
365
|
+
- lib/chronicle/etl/cli/plugins.rb
|
265
366
|
- lib/chronicle/etl/cli/subcommand_base.rb
|
266
367
|
- lib/chronicle/etl/config.rb
|
368
|
+
- lib/chronicle/etl/configurable.rb
|
267
369
|
- lib/chronicle/etl/exceptions.rb
|
268
370
|
- lib/chronicle/etl/extraction.rb
|
269
371
|
- lib/chronicle/etl/extractors/csv_extractor.rb
|
270
372
|
- lib/chronicle/etl/extractors/extractor.rb
|
271
373
|
- lib/chronicle/etl/extractors/file_extractor.rb
|
272
|
-
- lib/chronicle/etl/extractors/helpers/
|
374
|
+
- lib/chronicle/etl/extractors/helpers/input_reader.rb
|
273
375
|
- lib/chronicle/etl/extractors/json_extractor.rb
|
274
376
|
- lib/chronicle/etl/extractors/stdin_extractor.rb
|
275
377
|
- lib/chronicle/etl/job.rb
|
@@ -277,21 +379,24 @@ files:
|
|
277
379
|
- lib/chronicle/etl/job_log.rb
|
278
380
|
- lib/chronicle/etl/job_logger.rb
|
279
381
|
- lib/chronicle/etl/loaders/csv_loader.rb
|
382
|
+
- lib/chronicle/etl/loaders/helpers/encoding_helper.rb
|
383
|
+
- lib/chronicle/etl/loaders/json_loader.rb
|
280
384
|
- lib/chronicle/etl/loaders/loader.rb
|
281
385
|
- lib/chronicle/etl/loaders/rest_loader.rb
|
282
|
-
- lib/chronicle/etl/loaders/stdout_loader.rb
|
283
386
|
- lib/chronicle/etl/loaders/table_loader.rb
|
284
387
|
- lib/chronicle/etl/logger.rb
|
285
388
|
- lib/chronicle/etl/models/activity.rb
|
286
389
|
- lib/chronicle/etl/models/attachment.rb
|
287
390
|
- lib/chronicle/etl/models/base.rb
|
288
391
|
- lib/chronicle/etl/models/entity.rb
|
289
|
-
- lib/chronicle/etl/models/
|
392
|
+
- lib/chronicle/etl/models/raw.rb
|
290
393
|
- lib/chronicle/etl/registry/connector_registration.rb
|
394
|
+
- lib/chronicle/etl/registry/plugin_registry.rb
|
291
395
|
- lib/chronicle/etl/registry/registry.rb
|
292
396
|
- lib/chronicle/etl/registry/self_registering.rb
|
293
397
|
- lib/chronicle/etl/runner.rb
|
294
398
|
- lib/chronicle/etl/serializers/jsonapi_serializer.rb
|
399
|
+
- lib/chronicle/etl/serializers/raw_serializer.rb
|
295
400
|
- lib/chronicle/etl/serializers/serializer.rb
|
296
401
|
- lib/chronicle/etl/transformers/image_file_transformer.rb
|
297
402
|
- lib/chronicle/etl/transformers/null_transformer.rb
|
@@ -317,14 +422,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
317
422
|
requirements:
|
318
423
|
- - ">="
|
319
424
|
- !ruby/object:Gem::Version
|
320
|
-
version: '
|
425
|
+
version: '2.7'
|
321
426
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
322
427
|
requirements:
|
323
428
|
- - ">="
|
324
429
|
- !ruby/object:Gem::Version
|
325
430
|
version: '0'
|
326
431
|
requirements: []
|
327
|
-
rubygems_version: 3.
|
432
|
+
rubygems_version: 3.3.9
|
328
433
|
signing_key:
|
329
434
|
specification_version: 4
|
330
435
|
summary: ETL tool for personal data
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.7.1
|
@@ -1,104 +0,0 @@
|
|
1
|
-
require 'pathname'
|
2
|
-
|
3
|
-
module Chronicle
|
4
|
-
module ETL
|
5
|
-
module Extractors
|
6
|
-
module Helpers
|
7
|
-
module FilesystemReader
|
8
|
-
|
9
|
-
def filenames_in_directory(...)
|
10
|
-
filenames = gather_files(...)
|
11
|
-
if block_given?
|
12
|
-
filenames.each do |filename|
|
13
|
-
yield filename
|
14
|
-
end
|
15
|
-
else
|
16
|
-
filenames
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def read_from_filesystem(filename:, yield_each_line: true, dir_glob_pattern: '**/*')
|
21
|
-
open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
|
22
|
-
if yield_each_line
|
23
|
-
file.each_line do |line|
|
24
|
-
yield line
|
25
|
-
end
|
26
|
-
else
|
27
|
-
yield file.read
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def open_from_filesystem(filename:, dir_glob_pattern: '**/*')
|
33
|
-
open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
|
34
|
-
yield file
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def results_count
|
39
|
-
raise NotImplementedError
|
40
|
-
# if file?
|
41
|
-
# return 1
|
42
|
-
# else
|
43
|
-
# search_pattern = File.join(@options[:filename], '**/*')
|
44
|
-
# Dir.glob(search_pattern).count
|
45
|
-
# end
|
46
|
-
end
|
47
|
-
|
48
|
-
private
|
49
|
-
|
50
|
-
def gather_files(path:, dir_glob_pattern: '**/*', load_since: nil, load_until: nil, smaller_than: nil, larger_than: nil, sort: :mtime)
|
51
|
-
search_pattern = File.join(path, '**', dir_glob_pattern)
|
52
|
-
files = Dir.glob(search_pattern)
|
53
|
-
|
54
|
-
files = files.keep_if {|f| (File.mtime(f) > load_since)} if load_since
|
55
|
-
files = files.keep_if {|f| (File.mtime(f) < load_until)} if load_until
|
56
|
-
|
57
|
-
# pass in file sizes in bytes
|
58
|
-
files = files.keep_if {|f| (File.size(f) < smaller_than)} if smaller_than
|
59
|
-
files = files.keep_if {|f| (File.size(f) > larger_than)} if larger_than
|
60
|
-
|
61
|
-
# TODO: incorporate sort argument
|
62
|
-
files.sort_by{ |f| File.mtime(f) }
|
63
|
-
end
|
64
|
-
|
65
|
-
def select_files_in_directory(path:, dir_glob_pattern: '**/*')
|
66
|
-
raise IOError.new("#{path} is not a directory.") unless directory?(path)
|
67
|
-
|
68
|
-
search_pattern = File.join(path, dir_glob_pattern)
|
69
|
-
Dir.glob(search_pattern).each do |filename|
|
70
|
-
yield(filename)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def open_files(filename:, dir_glob_pattern:)
|
75
|
-
if stdin?(filename)
|
76
|
-
yield $stdin
|
77
|
-
elsif directory?(filename)
|
78
|
-
search_pattern = File.join(filename, dir_glob_pattern)
|
79
|
-
filenames = Dir.glob(search_pattern)
|
80
|
-
filenames.each do |filename|
|
81
|
-
file = File.open(filename)
|
82
|
-
yield(file)
|
83
|
-
end
|
84
|
-
elsif file?(filename)
|
85
|
-
yield File.open(filename)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def stdin?(filename)
|
90
|
-
filename == $stdin
|
91
|
-
end
|
92
|
-
|
93
|
-
def directory?(filename)
|
94
|
-
Pathname.new(filename).directory?
|
95
|
-
end
|
96
|
-
|
97
|
-
def file?(filename)
|
98
|
-
Pathname.new(filename).file?
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
@@ -1,14 +0,0 @@
|
|
1
|
-
module Chronicle
|
2
|
-
module ETL
|
3
|
-
class StdoutLoader < Chronicle::ETL::Loader
|
4
|
-
register_connector do |r|
|
5
|
-
r.description = 'stdout'
|
6
|
-
end
|
7
|
-
|
8
|
-
def load(record)
|
9
|
-
serializer = Chronicle::ETL::JSONAPISerializer.new(record)
|
10
|
-
puts serializer.serializable_hash.to_json
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
require 'chronicle/etl/models/base'
|
2
|
-
|
3
|
-
module Chronicle
|
4
|
-
module ETL
|
5
|
-
module Models
|
6
|
-
class Generic < Chronicle::ETL::Models::Base
|
7
|
-
TYPE = 'generic'
|
8
|
-
|
9
|
-
attr_accessor :properties
|
10
|
-
|
11
|
-
def initialize(properties = {})
|
12
|
-
@properties = properties
|
13
|
-
super
|
14
|
-
end
|
15
|
-
|
16
|
-
# Generic models have arbitrary attributes stored in @properties
|
17
|
-
def attributes
|
18
|
-
@properties.transform_keys(&:to_sym)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|