chronicle-etl 0.5.5 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +15 -25
  3. data/.rubocop.yml +2 -44
  4. data/Gemfile +2 -2
  5. data/Guardfile +3 -3
  6. data/README.md +75 -68
  7. data/Rakefile +2 -2
  8. data/bin/console +4 -5
  9. data/chronicle-etl.gemspec +51 -49
  10. data/exe/chronicle-etl +1 -1
  11. data/lib/chronicle/etl/authorizer.rb +3 -4
  12. data/lib/chronicle/etl/cli/authorizations.rb +8 -6
  13. data/lib/chronicle/etl/cli/connectors.rb +7 -7
  14. data/lib/chronicle/etl/cli/jobs.rb +130 -53
  15. data/lib/chronicle/etl/cli/main.rb +29 -29
  16. data/lib/chronicle/etl/cli/plugins.rb +14 -15
  17. data/lib/chronicle/etl/cli/secrets.rb +14 -12
  18. data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
  19. data/lib/chronicle/etl/config.rb +18 -8
  20. data/lib/chronicle/etl/configurable.rb +20 -9
  21. data/lib/chronicle/etl/exceptions.rb +3 -3
  22. data/lib/chronicle/etl/extraction.rb +12 -2
  23. data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
  24. data/lib/chronicle/etl/extractors/extractor.rb +15 -2
  25. data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
  26. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
  27. data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
  28. data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
  29. data/lib/chronicle/etl/job.rb +35 -17
  30. data/lib/chronicle/etl/job_definition.rb +38 -26
  31. data/lib/chronicle/etl/job_log.rb +14 -16
  32. data/lib/chronicle/etl/job_logger.rb +4 -4
  33. data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
  34. data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
  35. data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
  36. data/lib/chronicle/etl/loaders/loader.rb +0 -17
  37. data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
  38. data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
  39. data/lib/chronicle/etl/logger.rb +2 -2
  40. data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
  41. data/lib/chronicle/etl/record.rb +15 -0
  42. data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
  43. data/lib/chronicle/etl/registry/connectors.rb +93 -36
  44. data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
  45. data/lib/chronicle/etl/registry/plugins.rb +27 -19
  46. data/lib/chronicle/etl/runner.rb +158 -128
  47. data/lib/chronicle/etl/secrets.rb +4 -4
  48. data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
  49. data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
  50. data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
  51. data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
  52. data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
  53. data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
  54. data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
  55. data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
  56. data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
  57. data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
  58. data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
  59. data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
  60. data/lib/chronicle/etl/transformers/transformer.rb +63 -41
  61. data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
  62. data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
  63. data/lib/chronicle/etl/version.rb +1 -1
  64. data/lib/chronicle/etl.rb +6 -8
  65. metadata +49 -47
  66. data/lib/chronicle/etl/models/activity.rb +0 -15
  67. data/lib/chronicle/etl/models/attachment.rb +0 -14
  68. data/lib/chronicle/etl/models/base.rb +0 -122
  69. data/lib/chronicle/etl/models/entity.rb +0 -29
  70. data/lib/chronicle/etl/models/raw.rb +0 -26
  71. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
  72. data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
  73. data/lib/chronicle/etl/serializers/serializer.rb +0 -28
  74. data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
  75. data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
  76. data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-05-19 00:00:00.000000000 Z
11
+ date: 2024-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.10.6
41
+ - !ruby/object:Gem::Dependency
42
+ name: chronicle-core
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.3'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.3'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: colorize
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -94,34 +108,6 @@ dependencies:
94
108
  - - "~>"
95
109
  - !ruby/object:Gem::Version
96
110
  version: 1.0.2
97
- - !ruby/object:Gem::Dependency
98
- name: mini_exiftool
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: '2.10'
104
- type: :runtime
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: '2.10'
111
- - !ruby/object:Gem::Dependency
112
- name: nokogiri
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - "~>"
116
- - !ruby/object:Gem::Version
117
- version: '1.13'
118
- type: :runtime
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: '1.13'
125
111
  - !ruby/object:Gem::Dependency
126
112
  name: omniauth
127
113
  requirement: !ruby/object:Gem::Requirement
@@ -254,14 +240,14 @@ dependencies:
254
240
  requirements:
255
241
  - - "~>"
256
242
  - !ruby/object:Gem::Version
257
- version: '0.11'
243
+ version: '0.12'
258
244
  type: :runtime
259
245
  prerelease: false
260
246
  version_requirements: !ruby/object:Gem::Requirement
261
247
  requirements:
262
248
  - - "~>"
263
249
  - !ruby/object:Gem::Version
264
- version: '0.11'
250
+ version: '0.12'
265
251
  - !ruby/object:Gem::Dependency
266
252
  name: xdg
267
253
  requirement: !ruby/object:Gem::Requirement
@@ -366,14 +352,14 @@ dependencies:
366
352
  requirements:
367
353
  - - "~>"
368
354
  - !ruby/object:Gem::Version
369
- version: 1.25.1
355
+ version: '1.57'
370
356
  type: :development
371
357
  prerelease: false
372
358
  version_requirements: !ruby/object:Gem::Requirement
373
359
  requirements:
374
360
  - - "~>"
375
361
  - !ruby/object:Gem::Version
376
- version: 1.25.1
362
+ version: '1.57'
377
363
  - !ruby/object:Gem::Dependency
378
364
  name: simplecov
379
365
  requirement: !ruby/object:Gem::Requirement
@@ -416,6 +402,20 @@ dependencies:
416
402
  - - "~>"
417
403
  - !ruby/object:Gem::Version
418
404
  version: '3'
405
+ - !ruby/object:Gem::Dependency
406
+ name: webrick
407
+ requirement: !ruby/object:Gem::Requirement
408
+ requirements:
409
+ - - "~>"
410
+ - !ruby/object:Gem::Version
411
+ version: '1.7'
412
+ type: :development
413
+ prerelease: false
414
+ version_requirements: !ruby/object:Gem::Requirement
415
+ requirements:
416
+ - - "~>"
417
+ - !ruby/object:Gem::Version
418
+ version: '1.7'
419
419
  - !ruby/object:Gem::Dependency
420
420
  name: yard
421
421
  requirement: !ruby/object:Gem::Requirement
@@ -489,12 +489,8 @@ files:
489
489
  - lib/chronicle/etl/loaders/rest_loader.rb
490
490
  - lib/chronicle/etl/loaders/table_loader.rb
491
491
  - lib/chronicle/etl/logger.rb
492
- - lib/chronicle/etl/models/activity.rb
493
- - lib/chronicle/etl/models/attachment.rb
494
- - lib/chronicle/etl/models/base.rb
495
- - lib/chronicle/etl/models/entity.rb
496
- - lib/chronicle/etl/models/raw.rb
497
492
  - lib/chronicle/etl/oauth_authorizer.rb
493
+ - lib/chronicle/etl/record.rb
498
494
  - lib/chronicle/etl/registry/connector_registration.rb
499
495
  - lib/chronicle/etl/registry/connectors.rb
500
496
  - lib/chronicle/etl/registry/plugin_registration.rb
@@ -503,16 +499,21 @@ files:
503
499
  - lib/chronicle/etl/registry/self_registering.rb
504
500
  - lib/chronicle/etl/runner.rb
505
501
  - lib/chronicle/etl/secrets.rb
506
- - lib/chronicle/etl/serializers/jsonapi_serializer.rb
507
- - lib/chronicle/etl/serializers/raw_serializer.rb
508
- - lib/chronicle/etl/serializers/serializer.rb
509
- - lib/chronicle/etl/transformers/image_file_transformer.rb
502
+ - lib/chronicle/etl/transformers/buffer_transformer.rb
503
+ - lib/chronicle/etl/transformers/chronicle_transformer.rb
504
+ - lib/chronicle/etl/transformers/chronobase_transformer.rb
505
+ - lib/chronicle/etl/transformers/fields_limit_transformer.rb
506
+ - lib/chronicle/etl/transformers/filter_fields_transformer.rb
507
+ - lib/chronicle/etl/transformers/filter_transformer.rb
508
+ - lib/chronicle/etl/transformers/format_transformer.rb
509
+ - lib/chronicle/etl/transformers/merge_meta_transformer.rb
510
+ - lib/chronicle/etl/transformers/multiply_transformer.rb
510
511
  - lib/chronicle/etl/transformers/null_transformer.rb
512
+ - lib/chronicle/etl/transformers/sampler_transformer.rb
513
+ - lib/chronicle/etl/transformers/sort_transformer.rb
511
514
  - lib/chronicle/etl/transformers/transformer.rb
512
515
  - lib/chronicle/etl/utils/binary_attachments.rb
513
- - lib/chronicle/etl/utils/hash_utilities.rb
514
516
  - lib/chronicle/etl/utils/progress_bar.rb
515
- - lib/chronicle/etl/utils/text_recognition.rb
516
517
  - lib/chronicle/etl/version.rb
517
518
  homepage: https://github.com/chronicle-app
518
519
  licenses:
@@ -522,6 +523,7 @@ metadata:
522
523
  homepage_uri: https://github.com/chronicle-app
523
524
  source_code_uri: https://github.com/chronicle-app/chronicle-etl
524
525
  changelog_uri: https://github.com/chronicle-app/chronicle-etl/releases
526
+ rubygems_mfa_required: 'true'
525
527
  post_install_message:
526
528
  rdoc_options: []
527
529
  require_paths:
@@ -530,14 +532,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
530
532
  requirements:
531
533
  - - ">="
532
534
  - !ruby/object:Gem::Version
533
- version: '2.7'
535
+ version: '3.1'
534
536
  required_rubygems_version: !ruby/object:Gem::Requirement
535
537
  requirements:
536
538
  - - ">="
537
539
  - !ruby/object:Gem::Version
538
540
  version: '0'
539
541
  requirements: []
540
- rubygems_version: 3.3.3
542
+ rubygems_version: 3.4.10
541
543
  signing_key:
542
544
  specification_version: 4
543
545
  summary: ETL tool for personal data
@@ -1,15 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Activity < Chronicle::ETL::Models::Base
7
- TYPE = 'activities'.freeze
8
- ATTRIBUTES = [:verb, :start_at, :end_at].freeze
9
- ASSOCIATIONS = [:involved, :actor].freeze
10
-
11
- attr_accessor(*ATTRIBUTES, *ASSOCIATIONS)
12
- end
13
- end
14
- end
15
- end
@@ -1,14 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Attachment < Chronicle::ETL::Models::Base
7
- TYPE = 'attachments'.freeze
8
- ATTRIBUTES = [:url_original, :data].freeze
9
-
10
- attr_accessor(*ATTRIBUTES)
11
- end
12
- end
13
- end
14
- end
@@ -1,122 +0,0 @@
1
- require 'digest'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- # Represents a record that's been transformed by a Transformer and
7
- # ready to be loaded. Loosely based on ActiveModel.
8
- #
9
- # @todo Experiment with just mixing in ActiveModel instead of this
10
- # this reimplementation
11
- class Base
12
- ATTRIBUTES = [:provider, :provider_id, :provider_namespace, :lat, :lng, :metadata].freeze
13
- ASSOCIATIONS = [].freeze
14
-
15
- attr_accessor(:id, :dedupe_on, *ATTRIBUTES)
16
-
17
- def initialize(attributes = {})
18
- assign_attributes(attributes) if attributes
19
- @dedupe_on = []
20
- @metadata = {}
21
- end
22
-
23
- # A unique identifier for this model is formed from a type
24
- # and either an id or lids.
25
- def identifier_hash
26
- {
27
- type: self.class::TYPE,
28
- id: @id,
29
- lids: lids
30
- }.compact
31
- end
32
-
33
- # Array of local ids that uniquely identify this record
34
- def lids
35
- @dedupe_on.map do |fields|
36
- generate_lid(fields)
37
- end.compact.uniq
38
- end
39
-
40
- # For a given set of fields of this model, generate a
41
- # unique local id by hashing the field values
42
- def generate_lid fields
43
- raise ArgumentError.new("Must provide an array of symbolized fields") unless fields.is_a?(Array)
44
-
45
- values = fields.sort.map do |field|
46
- instance_variable = "@#{field.to_s}"
47
- self.instance_variable_get(instance_variable)
48
- end
49
-
50
- return if values.any? { |e| e.nil? }
51
-
52
- Digest::SHA256.hexdigest(values.join(","))
53
- end
54
-
55
- # Set of attribute names that this model has is Base's shared
56
- # attributes combined with the child class's
57
- def attribute_list
58
- (ATTRIBUTES + self.class::ATTRIBUTES).uniq
59
- end
60
-
61
- # All of this record's attributes
62
- def attributes
63
- attributes = {}
64
- attribute_list.each do |attribute|
65
- instance_variable = "@#{attribute.to_s}"
66
- attributes[attribute] = self.instance_variable_get(instance_variable)
67
- end
68
- attributes.compact
69
- end
70
-
71
- # All of this record's associations
72
- def associations
73
- association_list = ASSOCIATIONS + self.class::ASSOCIATIONS
74
- attributes = {}
75
- association_list.each do |attribute|
76
- instance_variable = "@#{attribute.to_s}"
77
- association = self.instance_variable_get(instance_variable)
78
- attributes[attribute] = association if association
79
- end
80
- attributes.compact
81
- end
82
-
83
- def associations_hash
84
- associations.map do |k, v|
85
- if v.is_a?(Array)
86
- [k, v.map(&:to_h)]
87
- else
88
- [k, v.to_h]
89
- end
90
- end.to_h
91
- end
92
-
93
- def meta_hash
94
- {
95
- meta: {
96
- dedupe_on: @dedupe_on.map{|d| d.map(&:to_s).join(",")}
97
- }
98
- }
99
- end
100
-
101
- # FIXME: move this to a Utils module
102
- def to_h_flattened
103
- Chronicle::ETL::Utils::HashUtilities.flatten_hash(to_h)
104
- end
105
-
106
- def to_h
107
- identifier_hash
108
- .merge(attributes)
109
- .merge(associations_hash)
110
- .merge(meta_hash)
111
- end
112
-
113
- def assign_attributes attributes
114
- attributes.each do |k, v|
115
- setter = :"#{k}="
116
- public_send(setter, v) if respond_to? setter
117
- end
118
- end
119
- end
120
- end
121
- end
122
- end
@@ -1,29 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- class Entity < Chronicle::ETL::Models::Base
7
- TYPE = 'entities'.freeze
8
- ATTRIBUTES = [:title, :body, :provider_url, :represents, :slug, :myself, :metadata].freeze
9
-
10
- # TODO: This desperately needs a validation system
11
- ASSOCIATIONS = [
12
- :involvements, # inverse of activity's `involved`
13
- :analogous,
14
- :attachments,
15
- :abouts,
16
- :aboutables, # inverse of above
17
- :depicts,
18
- :consumers,
19
- :creators,
20
- :creations,
21
- :contains,
22
- :containers # inverse of above
23
- ].freeze # TODO: add these to reflect Chronicle Schema
24
-
25
- attr_accessor(*ATTRIBUTES, *ASSOCIATIONS)
26
- end
27
- end
28
- end
29
- end
@@ -1,26 +0,0 @@
1
- require 'chronicle/etl/models/base'
2
-
3
- module Chronicle
4
- module ETL
5
- module Models
6
- # A record from an extraction with no processing or normalization applied
7
- class Raw
8
- TYPE = 'raw'
9
-
10
- attr_accessor :raw_data
11
-
12
- def initialize(raw_data)
13
- @raw_data = raw_data
14
- end
15
-
16
- def to_h
17
- @raw_data.to_h
18
- end
19
-
20
- def to_h_flattened
21
- Chronicle::ETL::Utils::HashUtilities.flatten_hash(to_h)
22
- end
23
- end
24
- end
25
- end
26
- end
@@ -1,31 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- class JSONAPISerializer < Chronicle::ETL::Serializer
4
- def initialize(*args)
5
- super
6
-
7
- raise(SerializationError, "Record must be a subclass of Chronicle::ETL::Model::Base") unless @record.is_a?(Chronicle::ETL::Models::Base)
8
- end
9
-
10
- def serializable_hash
11
- @record
12
- .identifier_hash
13
- .merge({ attributes: @record.attributes })
14
- .merge({ relationships: build_associations })
15
- .merge(@record.meta_hash)
16
- end
17
-
18
- def build_associations
19
- @record.associations.transform_values do |value|
20
- association_data =
21
- if value.is_a?(Array)
22
- value.map { |record| JSONAPISerializer.new(record).serializable_hash }
23
- else
24
- JSONAPISerializer.new(value).serializable_hash
25
- end
26
- { data: association_data }
27
- end
28
- end
29
- end
30
- end
31
- end
@@ -1,10 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- # Take a Raw model and output `raw_data` as a hash
4
- class RawSerializer < Chronicle::ETL::Serializer
5
- def serializable_hash
6
- @record.to_h
7
- end
8
- end
9
- end
10
- end
@@ -1,28 +0,0 @@
1
- module Chronicle
2
- module ETL
3
- # Abstract class representing a Serializer for an ETL record
4
- class Serializer
5
- # Construct a new instance of this serializer.
6
- # == Parameters:
7
- # options::
8
- # Options for configuring this Serializers
9
- def initialize(record, options = {})
10
- @record = record
11
- @options = options
12
- end
13
-
14
- # Serialize a record as a hash
15
- def serializable_hash
16
- raise NotImplementedError
17
- end
18
-
19
- def self.serialize(record)
20
- serializer = self.new(record)
21
- serializer.serializable_hash
22
- end
23
- end
24
- end
25
- end
26
-
27
- require_relative 'jsonapi_serializer'
28
- require_relative 'raw_serializer'