chronicle-etl 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/.rubocop.yml +28 -1
- data/Guardfile +7 -0
- data/README.md +149 -85
- data/Rakefile +4 -2
- data/chronicle-etl.gemspec +10 -5
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/cli/connectors.rb +34 -0
- data/lib/chronicle/etl/cli/jobs.rb +44 -12
- data/lib/chronicle/etl/cli/main.rb +13 -19
- data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
- data/lib/chronicle/etl/cli.rb +7 -0
- data/lib/chronicle/etl/configurable.rb +158 -0
- data/lib/chronicle/etl/exceptions.rb +7 -1
- data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
- data/lib/chronicle/etl/extractors/extractor.rb +23 -19
- data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
- data/lib/chronicle/etl/job.rb +1 -1
- data/lib/chronicle/etl/job_definition.rb +1 -1
- data/lib/chronicle/etl/loaders/csv_loader.rb +1 -1
- data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
- data/lib/chronicle/etl/loaders/loader.rb +5 -2
- data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
- data/lib/chronicle/etl/loaders/table_loader.rb +21 -24
- data/lib/chronicle/etl/logger.rb +1 -0
- data/lib/chronicle/etl/models/base.rb +3 -0
- data/lib/chronicle/etl/models/entity.rb +8 -2
- data/lib/chronicle/etl/models/raw.rb +26 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +1 -0
- data/lib/chronicle/etl/runner.rb +6 -4
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
- data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
- data/lib/chronicle/etl/serializers/serializer.rb +2 -1
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
- data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
- data/lib/chronicle/etl/transformers/transformer.rb +3 -2
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +12 -4
- metadata +80 -19
- data/.ruby-version +0 -1
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
- data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
- data/lib/chronicle/etl/models/generic.rb +0 -23
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chronicle-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Louis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
19
|
+
version: '7.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
26
|
+
version: '7.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: chronic_duration
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -98,16 +98,16 @@ dependencies:
|
|
98
98
|
name: runcom
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- - "
|
101
|
+
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '6.
|
103
|
+
version: '6.0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- - "
|
108
|
+
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '6.
|
110
|
+
version: '6.0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: sequel
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,14 +142,14 @@ dependencies:
|
|
142
142
|
requirements:
|
143
143
|
- - "~>"
|
144
144
|
- !ruby/object:Gem::Version
|
145
|
-
version: '
|
145
|
+
version: '1.2'
|
146
146
|
type: :runtime
|
147
147
|
prerelease: false
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
|
-
version: '
|
152
|
+
version: '1.2'
|
153
153
|
- !ruby/object:Gem::Dependency
|
154
154
|
name: tty-progressbar
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
@@ -234,6 +234,62 @@ dependencies:
|
|
234
234
|
- - "~>"
|
235
235
|
- !ruby/object:Gem::Version
|
236
236
|
version: '3.9'
|
237
|
+
- !ruby/object:Gem::Dependency
|
238
|
+
name: simplecov
|
239
|
+
requirement: !ruby/object:Gem::Requirement
|
240
|
+
requirements:
|
241
|
+
- - "~>"
|
242
|
+
- !ruby/object:Gem::Version
|
243
|
+
version: '0.21'
|
244
|
+
type: :development
|
245
|
+
prerelease: false
|
246
|
+
version_requirements: !ruby/object:Gem::Requirement
|
247
|
+
requirements:
|
248
|
+
- - "~>"
|
249
|
+
- !ruby/object:Gem::Version
|
250
|
+
version: '0.21'
|
251
|
+
- !ruby/object:Gem::Dependency
|
252
|
+
name: guard-rspec
|
253
|
+
requirement: !ruby/object:Gem::Requirement
|
254
|
+
requirements:
|
255
|
+
- - "~>"
|
256
|
+
- !ruby/object:Gem::Version
|
257
|
+
version: 4.7.3
|
258
|
+
type: :development
|
259
|
+
prerelease: false
|
260
|
+
version_requirements: !ruby/object:Gem::Requirement
|
261
|
+
requirements:
|
262
|
+
- - "~>"
|
263
|
+
- !ruby/object:Gem::Version
|
264
|
+
version: 4.7.3
|
265
|
+
- !ruby/object:Gem::Dependency
|
266
|
+
name: yard
|
267
|
+
requirement: !ruby/object:Gem::Requirement
|
268
|
+
requirements:
|
269
|
+
- - "~>"
|
270
|
+
- !ruby/object:Gem::Version
|
271
|
+
version: 0.9.7
|
272
|
+
type: :development
|
273
|
+
prerelease: false
|
274
|
+
version_requirements: !ruby/object:Gem::Requirement
|
275
|
+
requirements:
|
276
|
+
- - "~>"
|
277
|
+
- !ruby/object:Gem::Version
|
278
|
+
version: 0.9.7
|
279
|
+
- !ruby/object:Gem::Dependency
|
280
|
+
name: rubocop
|
281
|
+
requirement: !ruby/object:Gem::Requirement
|
282
|
+
requirements:
|
283
|
+
- - "~>"
|
284
|
+
- !ruby/object:Gem::Version
|
285
|
+
version: 1.25.1
|
286
|
+
type: :development
|
287
|
+
prerelease: false
|
288
|
+
version_requirements: !ruby/object:Gem::Requirement
|
289
|
+
requirements:
|
290
|
+
- - "~>"
|
291
|
+
- !ruby/object:Gem::Version
|
292
|
+
version: 1.25.1
|
237
293
|
description: Chronicle-ETL allows you to extract personal data from a variety of services,
|
238
294
|
transformer it, and load it.
|
239
295
|
email:
|
@@ -243,14 +299,15 @@ executables:
|
|
243
299
|
extensions: []
|
244
300
|
extra_rdoc_files: []
|
245
301
|
files:
|
302
|
+
- ".github/workflows/ruby.yml"
|
246
303
|
- ".gitignore"
|
247
304
|
- ".rspec"
|
248
305
|
- ".rubocop.yml"
|
249
|
-
- ".ruby-version"
|
250
306
|
- ".travis.yml"
|
251
307
|
- ".yardopts"
|
252
308
|
- CODE_OF_CONDUCT.md
|
253
309
|
- Gemfile
|
310
|
+
- Guardfile
|
254
311
|
- LICENSE.txt
|
255
312
|
- README.md
|
256
313
|
- Rakefile
|
@@ -259,17 +316,19 @@ files:
|
|
259
316
|
- chronicle-etl.gemspec
|
260
317
|
- exe/chronicle-etl
|
261
318
|
- lib/chronicle/etl.rb
|
319
|
+
- lib/chronicle/etl/cli.rb
|
262
320
|
- lib/chronicle/etl/cli/connectors.rb
|
263
321
|
- lib/chronicle/etl/cli/jobs.rb
|
264
322
|
- lib/chronicle/etl/cli/main.rb
|
265
323
|
- lib/chronicle/etl/cli/subcommand_base.rb
|
266
324
|
- lib/chronicle/etl/config.rb
|
325
|
+
- lib/chronicle/etl/configurable.rb
|
267
326
|
- lib/chronicle/etl/exceptions.rb
|
268
327
|
- lib/chronicle/etl/extraction.rb
|
269
328
|
- lib/chronicle/etl/extractors/csv_extractor.rb
|
270
329
|
- lib/chronicle/etl/extractors/extractor.rb
|
271
330
|
- lib/chronicle/etl/extractors/file_extractor.rb
|
272
|
-
- lib/chronicle/etl/extractors/helpers/
|
331
|
+
- lib/chronicle/etl/extractors/helpers/input_reader.rb
|
273
332
|
- lib/chronicle/etl/extractors/json_extractor.rb
|
274
333
|
- lib/chronicle/etl/extractors/stdin_extractor.rb
|
275
334
|
- lib/chronicle/etl/job.rb
|
@@ -277,21 +336,22 @@ files:
|
|
277
336
|
- lib/chronicle/etl/job_log.rb
|
278
337
|
- lib/chronicle/etl/job_logger.rb
|
279
338
|
- lib/chronicle/etl/loaders/csv_loader.rb
|
339
|
+
- lib/chronicle/etl/loaders/json_loader.rb
|
280
340
|
- lib/chronicle/etl/loaders/loader.rb
|
281
341
|
- lib/chronicle/etl/loaders/rest_loader.rb
|
282
|
-
- lib/chronicle/etl/loaders/stdout_loader.rb
|
283
342
|
- lib/chronicle/etl/loaders/table_loader.rb
|
284
343
|
- lib/chronicle/etl/logger.rb
|
285
344
|
- lib/chronicle/etl/models/activity.rb
|
286
345
|
- lib/chronicle/etl/models/attachment.rb
|
287
346
|
- lib/chronicle/etl/models/base.rb
|
288
347
|
- lib/chronicle/etl/models/entity.rb
|
289
|
-
- lib/chronicle/etl/models/
|
348
|
+
- lib/chronicle/etl/models/raw.rb
|
290
349
|
- lib/chronicle/etl/registry/connector_registration.rb
|
291
350
|
- lib/chronicle/etl/registry/registry.rb
|
292
351
|
- lib/chronicle/etl/registry/self_registering.rb
|
293
352
|
- lib/chronicle/etl/runner.rb
|
294
353
|
- lib/chronicle/etl/serializers/jsonapi_serializer.rb
|
354
|
+
- lib/chronicle/etl/serializers/raw_serializer.rb
|
295
355
|
- lib/chronicle/etl/serializers/serializer.rb
|
296
356
|
- lib/chronicle/etl/transformers/image_file_transformer.rb
|
297
357
|
- lib/chronicle/etl/transformers/null_transformer.rb
|
@@ -305,9 +365,10 @@ homepage: https://github.com/chronicle-app
|
|
305
365
|
licenses:
|
306
366
|
- MIT
|
307
367
|
metadata:
|
368
|
+
allowed_push_host: https://rubygems.org
|
308
369
|
homepage_uri: https://github.com/chronicle-app
|
309
370
|
source_code_uri: https://github.com/chronicle-app/chronicle-etl
|
310
|
-
changelog_uri: https://github.com/chronicle-app/chronicle-etl/
|
371
|
+
changelog_uri: https://github.com/chronicle-app/chronicle-etl/releases
|
311
372
|
post_install_message:
|
312
373
|
rdoc_options: []
|
313
374
|
require_paths:
|
@@ -316,14 +377,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
316
377
|
requirements:
|
317
378
|
- - ">="
|
318
379
|
- !ruby/object:Gem::Version
|
319
|
-
version: '
|
380
|
+
version: '2.7'
|
320
381
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
321
382
|
requirements:
|
322
383
|
- - ">="
|
323
384
|
- !ruby/object:Gem::Version
|
324
385
|
version: '0'
|
325
386
|
requirements: []
|
326
|
-
rubygems_version: 3.1.
|
387
|
+
rubygems_version: 3.1.6
|
327
388
|
signing_key:
|
328
389
|
specification_version: 4
|
329
390
|
summary: ETL tool for personal data
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.7.1
|
@@ -1,104 +0,0 @@
|
|
1
|
-
require 'pathname'
|
2
|
-
|
3
|
-
module Chronicle
|
4
|
-
module ETL
|
5
|
-
module Extractors
|
6
|
-
module Helpers
|
7
|
-
module FilesystemReader
|
8
|
-
|
9
|
-
def filenames_in_directory(...)
|
10
|
-
filenames = gather_files(...)
|
11
|
-
if block_given?
|
12
|
-
filenames.each do |filename|
|
13
|
-
yield filename
|
14
|
-
end
|
15
|
-
else
|
16
|
-
filenames
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def read_from_filesystem(filename:, yield_each_line: true, dir_glob_pattern: '**/*')
|
21
|
-
open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
|
22
|
-
if yield_each_line
|
23
|
-
file.each_line do |line|
|
24
|
-
yield line
|
25
|
-
end
|
26
|
-
else
|
27
|
-
yield file.read
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def open_from_filesystem(filename:, dir_glob_pattern: '**/*')
|
33
|
-
open_files(filename: filename, dir_glob_pattern: dir_glob_pattern) do |file|
|
34
|
-
yield file
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def results_count
|
39
|
-
raise NotImplementedError
|
40
|
-
# if file?
|
41
|
-
# return 1
|
42
|
-
# else
|
43
|
-
# search_pattern = File.join(@options[:filename], '**/*')
|
44
|
-
# Dir.glob(search_pattern).count
|
45
|
-
# end
|
46
|
-
end
|
47
|
-
|
48
|
-
private
|
49
|
-
|
50
|
-
def gather_files(path:, dir_glob_pattern: '**/*', load_since: nil, load_until: nil, smaller_than: nil, larger_than: nil, sort: :mtime)
|
51
|
-
search_pattern = File.join(path, '**', dir_glob_pattern)
|
52
|
-
files = Dir.glob(search_pattern)
|
53
|
-
|
54
|
-
files = files.keep_if {|f| (File.mtime(f) > load_since)} if load_since
|
55
|
-
files = files.keep_if {|f| (File.mtime(f) < load_until)} if load_until
|
56
|
-
|
57
|
-
# pass in file sizes in bytes
|
58
|
-
files = files.keep_if {|f| (File.size(f) < smaller_than)} if smaller_than
|
59
|
-
files = files.keep_if {|f| (File.size(f) > larger_than)} if larger_than
|
60
|
-
|
61
|
-
# TODO: incorporate sort argument
|
62
|
-
files.sort_by{ |f| File.mtime(f) }
|
63
|
-
end
|
64
|
-
|
65
|
-
def select_files_in_directory(path:, dir_glob_pattern: '**/*')
|
66
|
-
raise IOError.new("#{path} is not a directory.") unless directory?(path)
|
67
|
-
|
68
|
-
search_pattern = File.join(path, dir_glob_pattern)
|
69
|
-
Dir.glob(search_pattern).each do |filename|
|
70
|
-
yield(filename)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def open_files(filename:, dir_glob_pattern:)
|
75
|
-
if stdin?(filename)
|
76
|
-
yield $stdin
|
77
|
-
elsif directory?(filename)
|
78
|
-
search_pattern = File.join(filename, dir_glob_pattern)
|
79
|
-
filenames = Dir.glob(search_pattern)
|
80
|
-
filenames.each do |filename|
|
81
|
-
file = File.open(filename)
|
82
|
-
yield(file)
|
83
|
-
end
|
84
|
-
elsif file?(filename)
|
85
|
-
yield File.open(filename)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def stdin?(filename)
|
90
|
-
filename == $stdin
|
91
|
-
end
|
92
|
-
|
93
|
-
def directory?(filename)
|
94
|
-
Pathname.new(filename).directory?
|
95
|
-
end
|
96
|
-
|
97
|
-
def file?(filename)
|
98
|
-
Pathname.new(filename).file?
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
@@ -1,14 +0,0 @@
|
|
1
|
-
module Chronicle
|
2
|
-
module ETL
|
3
|
-
class StdoutLoader < Chronicle::ETL::Loader
|
4
|
-
register_connector do |r|
|
5
|
-
r.description = 'stdout'
|
6
|
-
end
|
7
|
-
|
8
|
-
def load(record)
|
9
|
-
serializer = Chronicle::ETL::JSONAPISerializer.new(record)
|
10
|
-
puts serializer.serializable_hash.to_json
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
require 'chronicle/etl/models/base'
|
2
|
-
|
3
|
-
module Chronicle
|
4
|
-
module ETL
|
5
|
-
module Models
|
6
|
-
class Generic < Chronicle::ETL::Models::Base
|
7
|
-
TYPE = 'generic'
|
8
|
-
|
9
|
-
attr_accessor :properties
|
10
|
-
|
11
|
-
def initialize(properties = {})
|
12
|
-
@properties = properties
|
13
|
-
super
|
14
|
-
end
|
15
|
-
|
16
|
-
# Generic models have arbitrary attributes stored in @properties
|
17
|
-
def attributes
|
18
|
-
@properties.transform_keys(&:to_sym)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|