chronicle-etl 0.5.5 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +15 -25
  3. data/.rubocop.yml +2 -44
  4. data/Gemfile +2 -2
  5. data/Guardfile +3 -3
  6. data/README.md +75 -68
  7. data/Rakefile +2 -2
  8. data/bin/console +4 -5
  9. data/chronicle-etl.gemspec +51 -49
  10. data/exe/chronicle-etl +1 -1
  11. data/lib/chronicle/etl/authorizer.rb +3 -4
  12. data/lib/chronicle/etl/cli/authorizations.rb +8 -6
  13. data/lib/chronicle/etl/cli/connectors.rb +7 -7
  14. data/lib/chronicle/etl/cli/jobs.rb +130 -53
  15. data/lib/chronicle/etl/cli/main.rb +29 -29
  16. data/lib/chronicle/etl/cli/plugins.rb +14 -15
  17. data/lib/chronicle/etl/cli/secrets.rb +14 -12
  18. data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
  19. data/lib/chronicle/etl/config.rb +18 -8
  20. data/lib/chronicle/etl/configurable.rb +20 -9
  21. data/lib/chronicle/etl/exceptions.rb +3 -3
  22. data/lib/chronicle/etl/extraction.rb +12 -2
  23. data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
  24. data/lib/chronicle/etl/extractors/extractor.rb +15 -2
  25. data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
  26. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
  27. data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
  28. data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
  29. data/lib/chronicle/etl/job.rb +35 -17
  30. data/lib/chronicle/etl/job_definition.rb +38 -26
  31. data/lib/chronicle/etl/job_log.rb +14 -16
  32. data/lib/chronicle/etl/job_logger.rb +4 -4
  33. data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
  34. data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
  35. data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
  36. data/lib/chronicle/etl/loaders/loader.rb +0 -17
  37. data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
  38. data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
  39. data/lib/chronicle/etl/logger.rb +2 -2
  40. data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
  41. data/lib/chronicle/etl/record.rb +15 -0
  42. data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
  43. data/lib/chronicle/etl/registry/connectors.rb +93 -36
  44. data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
  45. data/lib/chronicle/etl/registry/plugins.rb +27 -19
  46. data/lib/chronicle/etl/runner.rb +158 -128
  47. data/lib/chronicle/etl/secrets.rb +4 -4
  48. data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
  49. data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
  50. data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
  51. data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
  52. data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
  53. data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
  54. data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
  55. data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
  56. data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
  57. data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
  58. data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
  59. data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
  60. data/lib/chronicle/etl/transformers/transformer.rb +63 -41
  61. data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
  62. data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
  63. data/lib/chronicle/etl/version.rb +1 -1
  64. data/lib/chronicle/etl.rb +6 -8
  65. metadata +49 -47
  66. data/lib/chronicle/etl/models/activity.rb +0 -15
  67. data/lib/chronicle/etl/models/attachment.rb +0 -14
  68. data/lib/chronicle/etl/models/base.rb +0 -122
  69. data/lib/chronicle/etl/models/entity.rb +0 -29
  70. data/lib/chronicle/etl/models/raw.rb +0 -26
  71. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
  72. data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
  73. data/lib/chronicle/etl/serializers/serializer.rb +0 -28
  74. data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
  75. data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
  76. data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-05-19 00:00:00.000000000 Z
11
+ date: 2024-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.10.6
41
+ - !ruby/object:Gem::Dependency
42
+ name: chronicle-core
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.3'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.3'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: colorize
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -94,34 +108,6 @@ dependencies:
94
108
  - - "~>"
95
109
  - !ruby/object:Gem::Version
96
110
  version: 1.0.2
97
- - !ruby/object:Gem::Dependency
98
- name: mini_exiftool
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: '2.10'
104
- type: :runtime
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: '2.10'
111
- - !ruby/object:Gem::Dependency
112
- name: nokogiri
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - "~>"
116
- - !ruby/object:Gem::Version
117
- version: '1.13'
118
- type: :runtime
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: '1.13'
125
111
  - !ruby/object:Gem::Dependency
126
112
  name: omniauth
127
113
  requirement: !ruby/object:Gem::Requirement
@@ -254,14 +240,14 @@ dependencies:
254
240
  requirements:
255
241
  - - "~>"
256
242
  - !ruby/object:Gem::Version
257
- version: '0.11'
243
+ version: '0.12'
258
244
  type: :runtime
259
245
  prerelease: false
260
246
  version_requirements: !ruby/object:Gem::Requirement
261
247
  requirements:
262
248
  - - "~>"
263
249
  - !ruby/object:Gem::Version
264
- version: '0.11'
250
+ version: '0.12'
265
251
  - !ruby/object:Gem::Dependency
266
252
  name: xdg
267
253
  requirement: !ruby/object:Gem::Requirement
@@ -366,14 +352,14 @@ dependencies:
366
352
  requirements:
367
353
  - - "~>"
368
354
  - !ruby/object:Gem::Version
369
- version: 1.25.1
355
+ version: '1.57'
370
356
  type: :development
371
357
  prerelease: false
372
358
  version_requirements: !ruby/object:Gem::Requirement
373
359
  requirements:
374
360
  - - "~>"
375
361
  - !ruby/object:Gem::Version
376
- version: 1.25.1
362
+ version: '1.57'
377
363
  - !ruby/object:Gem::Dependency
378
364
  name: simplecov
379
365
  requirement: !ruby/object:Gem::Requirement
@@ -416,6 +402,20 @@ dependencies:
416
402
  - - "~>"
417
403
  - !ruby/object:Gem::Version
418
404
  version: '3'
405
+ - !ruby/object:Gem::Dependency
406
+ name: webrick
407
+ requirement: !ruby/object:Gem::Requirement
408
+ requirements:
409
+ - - "~>"
410
+ - !ruby/object:Gem::Version
411
+ version: '1.7'
412
+ type: :development
413
+ prerelease: false
414
+ version_requirements: !ruby/object:Gem::Requirement
415
+ requirements:
416
+ - - "~>"
417
+ - !ruby/object:Gem::Version
418
+ version: '1.7'
419
419
  - !ruby/object:Gem::Dependency
420
420
  name: yard
421
421
  requirement: !ruby/object:Gem::Requirement
@@ -489,12 +489,8 @@ files:
489
489
  - lib/chronicle/etl/loaders/rest_loader.rb
490
490
  - lib/chronicle/etl/loaders/table_loader.rb
491
491
  - lib/chronicle/etl/logger.rb
492
- - lib/chronicle/etl/models/activity.rb
493
- - lib/chronicle/etl/models/attachment.rb
494
- - lib/chronicle/etl/models/base.rb
495
- - lib/chronicle/etl/models/entity.rb
496
- - lib/chronicle/etl/models/raw.rb
497
492
  - lib/chronicle/etl/oauth_authorizer.rb
493
+ - lib/chronicle/etl/record.rb
498
494
  - lib/chronicle/etl/registry/connector_registration.rb
499
495
  - lib/chronicle/etl/registry/connectors.rb
500
496
  - lib/chronicle/etl/registry/plugin_registration.rb
@@ -503,16 +499,21 @@ files:
503
499
  - lib/chronicle/etl/registry/self_registering.rb
504
500
  - lib/chronicle/etl/runner.rb
505
501
  - lib/chronicle/etl/secrets.rb
506
- - lib/chronicle/etl/serializers/jsonapi_serializer.rb
507
- - lib/chronicle/etl/serializers/raw_serializer.rb
508
- - lib/chronicle/etl/serializers/serializer.rb
509
- - lib/chronicle/etl/transformers/image_file_transformer.rb
502
+ - lib/chronicle/etl/transformers/buffer_transformer.rb
503
+ - lib/chronicle/etl/transformers/chronicle_transformer.rb
504
+ - lib/chronicle/etl/transformers/chronobase_transformer.rb
505
+ - lib/chronicle/etl/transformers/fields_limit_transformer.rb
506
+ - lib/chronicle/etl/transformers/filter_fields_transformer.rb
507
+ - lib/chronicle/etl/transformers/filter_transformer.rb
508
+ - lib/chronicle/etl/transformers/format_transformer.rb
509
+ - lib/chronicle/etl/transformers/merge_meta_transformer.rb
510
+ - lib/chronicle/etl/transformers/multiply_transformer.rb
510
511
  - lib/chronicle/etl/transformers/null_transformer.rb
512
+ - lib/chronicle/etl/transformers/sampler_transformer.rb
513
+ - lib/chronicle/etl/transformers/sort_transformer.rb
511
514
  - lib/chronicle/etl/transformers/transformer.rb
512
515
  - lib/chronicle/etl/utils/binary_attachments.rb
513
- - lib/chronicle/etl/utils/hash_utilities.rb
514
516
  - lib/chronicle/etl/utils/progress_bar.rb
515
- - lib/chronicle/etl/utils/text_recognition.rb
516
517
  - lib/chronicle/etl/version.rb
517
518
  homepage: https://github.com/chronicle-app
518
519
  licenses:
@@ -522,6 +523,7 @@ metadata:
522
523
  homepage_uri: https://github.com/chronicle-app
523
524
  source_code_uri: https://github.com/chronicle-app/chronicle-etl
524
525
  changelog_uri: https://github.com/chronicle-app/chronicle-etl/releases
526
+ rubygems_mfa_required: 'true'
525
527
  post_install_message:
526
528
  rdoc_options: []
527
529
  require_paths:
@@ -530,14 +532,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
530
532
  requirements:
531
533
  - - ">="
532
534
  - !ruby/object:Gem::Version
533
- version: '2.7'
535
+ version: '3.1'
534
536
  required_rubygems_version: !ruby/object:Gem::Requirement
535
537
  requirements:
536
538
  - - ">="
537
539
  - !ruby/object:Gem::Version
538
540
  version: '0'
539
541
  requirements: []
540
- rubygems_version: 3.3.3
542
+ rubygems_version: 3.4.10
541
543
  signing_key:
542
544
  specification_version: 4
543
545
  summary: ETL tool for personal data
@@ -1,15 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Activity < Chronicle::ETL::Models::Base
7
- TYPE = 'activities'.freeze
8
- ATTRIBUTES = [:verb, :start_at, :end_at].freeze
9
- ASSOCIATIONS = [:involved, :actor].freeze
10
-
11
- attr_accessor(*ATTRIBUTES, *ASSOCIATIONS)
12
- end
13
- end
14
- end
15
- end
@@ -1,14 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Attachment < Chronicle::ETL::Models::Base
7
- TYPE = 'attachments'.freeze
8
- ATTRIBUTES = [:url_original, :data].freeze
9
-
10
- attr_accessor(*ATTRIBUTES)
11
- end
12
- end
13
- end
14
- end
@@ -1,122 +0,0 @@
1
- require 'digest'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- # Represents a record that's been transformed by a Transformer and
7
- # ready to be loaded. Loosely based on ActiveModel.
8
- #
9
- # @todo Experiment with just mixing in ActiveModel instead of this
10
- # this reimplementation
11
- class Base
12
- ATTRIBUTES = [:provider, :provider_id, :provider_namespace, :lat, :lng, :metadata].freeze
13
- ASSOCIATIONS = [].freeze
14
-
15
- attr_accessor(:id, :dedupe_on, *ATTRIBUTES)
16
-
17
- def initialize(attributes = {})
18
- assign_attributes(attributes) if attributes
19
- @dedupe_on = []
20
- @metadata = {}
21
- end
22
-
23
- # A unique identifier for this model is formed from a type
24
- # and either an id or lids.
25
- def identifier_hash
26
- {
27
- type: self.class::TYPE,
28
- id: @id,
29
- lids: lids
30
- }.compact
31
- end
32
-
33
- # Array of local ids that uniquely identify this record
34
- def lids
35
- @dedupe_on.map do |fields|
36
- generate_lid(fields)
37
- end.compact.uniq
38
- end
39
-
40
- # For a given set of fields of this model, generate a
41
- # unique local id by hashing the field values
42
- def generate_lid fields
43
- raise ArgumentError.new("Must provide an array of symbolized fields") unless fields.is_a?(Array)
44
-
45
- values = fields.sort.map do |field|
46
- instance_variable = "@#{field.to_s}"
47
- self.instance_variable_get(instance_variable)
48
- end
49
-
50
- return if values.any? { |e| e.nil? }
51
-
52
- Digest::SHA256.hexdigest(values.join(","))
53
- end
54
-
55
- # Set of attribute names that this model has is Base's shared
56
- # attributes combined with the child class's
57
- def attribute_list
58
- (ATTRIBUTES + self.class::ATTRIBUTES).uniq
59
- end
60
-
61
- # All of this record's attributes
62
- def attributes
63
- attributes = {}
64
- attribute_list.each do |attribute|
65
- instance_variable = "@#{attribute.to_s}"
66
- attributes[attribute] = self.instance_variable_get(instance_variable)
67
- end
68
- attributes.compact
69
- end
70
-
71
- # All of this record's associations
72
- def associations
73
- association_list = ASSOCIATIONS + self.class::ASSOCIATIONS
74
- attributes = {}
75
- association_list.each do |attribute|
76
- instance_variable = "@#{attribute.to_s}"
77
- association = self.instance_variable_get(instance_variable)
78
- attributes[attribute] = association if association
79
- end
80
- attributes.compact
81
- end
82
-
83
- def associations_hash
84
- associations.map do |k, v|
85
- if v.is_a?(Array)
86
- [k, v.map(&:to_h)]
87
- else
88
- [k, v.to_h]
89
- end
90
- end.to_h
91
- end
92
-
93
- def meta_hash
94
- {
95
- meta: {
96
- dedupe_on: @dedupe_on.map{|d| d.map(&:to_s).join(",")}
97
- }
98
- }
99
- end
100
-
101
- # FIXME: move this to a Utils module
102
- def to_h_flattened
103
- Chronicle::ETL::Utils::HashUtilities.flatten_hash(to_h)
104
- end
105
-
106
- def to_h
107
- identifier_hash
108
- .merge(attributes)
109
- .merge(associations_hash)
110
- .merge(meta_hash)
111
- end
112
-
113
- def assign_attributes attributes
114
- attributes.each do |k, v|
115
- setter = :"#{k}="
116
- public_send(setter, v) if respond_to? setter
117
- end
118
- end
119
- end
120
- end
121
- end
122
- end
@@ -1,29 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Entity < Chronicle::ETL::Models::Base
7
- TYPE = 'entities'.freeze
8
- ATTRIBUTES = [:title, :body, :provider_url, :represents, :slug, :myself, :metadata].freeze
9
-
10
- # TODO: This desperately needs a validation system
11
- ASSOCIATIONS = [
12
- :involvements, # inverse of activity's `involved`
13
- :analogous,
14
- :attachments,
15
- :abouts,
16
- :aboutables, # inverse of above
17
- :depicts,
18
- :consumers,
19
- :creators,
20
- :creations,
21
- :contains,
22
- :containers # inverse of above
23
- ].freeze # TODO: add these to reflect Chronicle Schema
24
-
25
- attr_accessor(*ATTRIBUTES, *ASSOCIATIONS)
26
- end
27
- end
28
- end
29
- end
@@ -1,26 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- # A record from an extraction with no processing or normalization applied
7
- class Raw
8
- TYPE = 'raw'
9
-
10
- attr_accessor :raw_data
11
-
12
- def initialize(raw_data)
13
- @raw_data = raw_data
14
- end
15
-
16
- def to_h
17
- @raw_data.to_h
18
- end
19
-
20
- def to_h_flattened
21
- Chronicle::ETL::Utils::HashUtilities.flatten_hash(to_h)
22
- end
23
- end
24
- end
25
- end
26
- end
@@ -1,31 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- class JSONAPISerializer < Chronicle::ETL::Serializer
4
- def initialize(*args)
5
- super
6
-
7
- raise(SerializationError, "Record must be a subclass of Chronicle::ETL::Model::Base") unless @record.is_a?(Chronicle::ETL::Models::Base)
8
- end
9
-
10
- def serializable_hash
11
- @record
12
- .identifier_hash
13
- .merge({ attributes: @record.attributes })
14
- .merge({ relationships: build_associations })
15
- .merge(@record.meta_hash)
16
- end
17
-
18
- def build_associations
19
- @record.associations.transform_values do |value|
20
- association_data =
21
- if value.is_a?(Array)
22
- value.map { |record| JSONAPISerializer.new(record).serializable_hash }
23
- else
24
- JSONAPISerializer.new(value).serializable_hash
25
- end
26
- { data: association_data }
27
- end
28
- end
29
- end
30
- end
31
- end
@@ -1,10 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- # Take a Raw model and output `raw_data` as a hash
4
- class RawSerializer < Chronicle::ETL::Serializer
5
- def serializable_hash
6
- @record.to_h
7
- end
8
- end
9
- end
10
- end
@@ -1,28 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- # Abstract class representing a Serializer for an ETL record
4
- class Serializer
5
- # Construct a new instance of this serializer.
6
- # == Parameters:
7
- # options::
8
- # Options for configuring this Serializers
9
- def initialize(record, options = {})
10
- @record = record
11
- @options = options
12
- end
13
-
14
- # Serialize a record as a hash
15
- def serializable_hash
16
- raise NotImplementedError
17
- end
18
-
19
- def self.serialize(record)
20
- serializer = self.new(record)
21
- serializer.serializable_hash
22
- end
23
- end
24
- end
25
- end
26
-
27
- require_relative 'jsonapi_serializer'
28
- require_relative 'raw_serializer'