fluent-plugin-openlineage 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +6 -0
  3. data/.github/workflows/linux.yml +30 -0
  4. data/.gitignore +16 -0
  5. data/.idea/.gitignore +8 -0
  6. data/.idea/fluentd.iml +204 -0
  7. data/.idea/misc.xml +4 -0
  8. data/.idea/modules/benchmark-memory-0.2.iml +12 -0
  9. data/.idea/modules/bigdecimal-3.1.iml +11 -0
  10. data/.idea/modules/certstore_c-0.1.iml +15 -0
  11. data/.idea/modules/concurrent-ruby-1.3.iml +18 -0
  12. data/.idea/modules/concurrent-ruby-1.31.iml +15 -0
  13. data/.idea/modules/connection_pool-2.4.iml +11 -0
  14. data/.idea/modules/cool.io-1.8.iml +16 -0
  15. data/.idea/modules/drb-2.2.iml +14 -0
  16. data/.idea/modules/drb-2.21.iml +11 -0
  17. data/.idea/modules/ffi-1.17.iml +20 -0
  18. data/.idea/modules/ffi-win32-extensions-1.0.iml +19 -0
  19. data/.idea/modules/fluentd-1.17.iml +43 -0
  20. data/.idea/modules/http_parser.rb-0.8.iml +17 -0
  21. data/.idea/modules/json-2.7.iml +14 -0
  22. data/.idea/modules/json-2.71.iml +11 -0
  23. data/.idea/modules/msgpack-1.7.iml +15 -0
  24. data/.idea/modules/mutex_m-0.2.iml +15 -0
  25. data/.idea/modules/new_gem.iml +15 -0
  26. data/.idea/modules/power_assert-2.0.iml +19 -0
  27. data/.idea/modules/rake-13.2.iml +18 -0
  28. data/.idea/modules/rake-13.21.iml +15 -0
  29. data/.idea/modules/rake-compiler-1.2.iml +13 -0
  30. data/.idea/modules/rusty_json_schema-0.15.iml +15 -0
  31. data/.idea/modules/serverengine-2.3.iml +17 -0
  32. data/.idea/modules/sigdump-0.2.iml +16 -0
  33. data/.idea/modules/specifications.iml +14 -0
  34. data/.idea/modules/specifications1.iml +11 -0
  35. data/.idea/modules/strptime-0.2.iml +16 -0
  36. data/.idea/modules/thermite-0.13.iml +17 -0
  37. data/.idea/modules/webrick-1.8.iml +18 -0
  38. data/.idea/modules/win32-event-0.6.iml +21 -0
  39. data/.idea/modules/win32-ipc-0.7.iml +20 -0
  40. data/.idea/modules/yajl-ruby-1.4.iml +779 -0
  41. data/.idea/modules.xml +41 -0
  42. data/.rspec +2 -0
  43. data/ChangeLog +3 -0
  44. data/Gemfile +3 -0
  45. data/LICENSE +202 -0
  46. data/README.md +250 -0
  47. data/Rakefile +13 -0
  48. data/fluent-plugin-openlineage.gemspec +28 -0
  49. data/lib/fluent/plugin/parser_openlineage.rb +182 -0
  50. data/misc/fluent.conf +101 -0
  51. data/misc/test-complete.json +73 -0
  52. data/misc/test-start.json +73 -0
  53. data/spec/Naming.md +500 -0
  54. data/spec/OpenLineage.json +304 -0
  55. data/spec/Versioning.md +49 -0
  56. data/spec/events/event_full.json +206 -0
  57. data/spec/events/event_invalid_dataset_facet.json +31 -0
  58. data/spec/events/event_invalid_input_dataset_facet.json +29 -0
  59. data/spec/events/event_invalid_job_facet.json +26 -0
  60. data/spec/events/event_invalid_output_dataset_facet.json +29 -0
  61. data/spec/events/event_invalid_run_facet.json +28 -0
  62. data/spec/events/event_no_run_id.json +28 -0
  63. data/spec/events/event_simple.json +29 -0
  64. data/spec/facets/ColumnLineageDatasetFacet.json +96 -0
  65. data/spec/facets/ColumnLineageDatasetFacet.md +106 -0
  66. data/spec/facets/DataQualityAssertionsDatasetFacet.json +49 -0
  67. data/spec/facets/DataQualityMetricsInputDatasetFacet.json +76 -0
  68. data/spec/facets/DatasetVersionDatasetFacet.json +31 -0
  69. data/spec/facets/DatasourceDatasetFacet.json +32 -0
  70. data/spec/facets/DocumentationDatasetFacet.json +31 -0
  71. data/spec/facets/DocumentationJobFacet.json +30 -0
  72. data/spec/facets/ErrorMessageRunFacet.json +41 -0
  73. data/spec/facets/ExternalQueryRunFacet.json +36 -0
  74. data/spec/facets/ExternalQueryRunFacet.md +49 -0
  75. data/spec/facets/ExtractionErrorRunFacet.json +58 -0
  76. data/spec/facets/JobTypeJobFacet.json +41 -0
  77. data/spec/facets/LifecycleStateChangeDatasetFacet.json +46 -0
  78. data/spec/facets/NominalTimeRunFacet.json +38 -0
  79. data/spec/facets/OutputStatisticsOutputDatasetFacet.json +36 -0
  80. data/spec/facets/OwnershipDatasetFacet.json +45 -0
  81. data/spec/facets/OwnershipJobFacet.json +45 -0
  82. data/spec/facets/ParentRunFacet.json +54 -0
  83. data/spec/facets/ProcessingEngineRunFacet.json +41 -0
  84. data/spec/facets/SQLJobFacet.json +30 -0
  85. data/spec/facets/SchemaDatasetFacet.json +59 -0
  86. data/spec/facets/SourceCodeJobFacet.json +34 -0
  87. data/spec/facets/SourceCodeLocationJobFacet.json +60 -0
  88. data/spec/facets/StorageDatasetFacet.json +35 -0
  89. data/spec/facets/SymlinksDatasetFacet.json +47 -0
  90. data/spec/fluent/plugin/test_parser_openlineage.rb +141 -0
  91. data/spec/registry/core/registry.json +31 -0
  92. data/spec/registry/gcp/facets/GcpCommonJobFacet.json +43 -0
  93. data/spec/registry/gcp/registry.json +6 -0
  94. data/spec/spec_helper.rb +8 -0
  95. data/spec/tests/ColumnLineageDatasetFacet/1.json +172 -0
  96. data/spec/tests/DataQualityAssertionsDatasetFacet/1.json +58 -0
  97. data/spec/tests/DataQualityMetricsInputDatasetFacet/1.json +23 -0
  98. data/spec/tests/DatasetVersionDatasetFacet/1.json +7 -0
  99. data/spec/tests/DatasourceDatasetFacet/1.json +7 -0
  100. data/spec/tests/DocumentationDatasetFacet/1.json +7 -0
  101. data/spec/tests/DocumentationJobFacet/1.json +7 -0
  102. data/spec/tests/ErrorMessageRunFacet/1.json +9 -0
  103. data/spec/tests/ExternalQueryRunFacet/1.json +8 -0
  104. data/spec/tests/ExtractionErrorRunFacet/1.json +15 -0
  105. data/spec/tests/JobTypeJobFacet/1.json +9 -0
  106. data/spec/tests/LifecycleStateChangeDatasetFacet/1.json +11 -0
  107. data/spec/tests/NominalTimeRunFacet/1.json +8 -0
  108. data/spec/tests/OutputStatisticsOutputDatasetFacet/1.json +9 -0
  109. data/spec/tests/OwnershipDatasetFacet/1.json +11 -0
  110. data/spec/tests/OwnershipJobFacet/1.json +11 -0
  111. data/spec/tests/ParentRunFacet/1.json +13 -0
  112. data/spec/tests/ProcessingEngineRunFacet/1.json +9 -0
  113. data/spec/tests/SQLJobFacet/1.json +7 -0
  114. data/spec/tests/SchemaDatasetFacet/1.json +92 -0
  115. data/spec/tests/SourceCodeJobFacet/1.json +8 -0
  116. data/spec/tests/SourceCodeLocationJobFacet/1.json +8 -0
  117. data/spec/tests/StorageDatasetFacet/1.json +8 -0
  118. data/spec/tests/SymlinksDatasetFacet/1.json +13 -0
  119. data/spec/tests/example_full_event.json +24 -0
  120. metadata +188 -3
@@ -0,0 +1,35 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/StorageDatasetFacet.json",
4
+ "$defs": {
5
+ "StorageDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "storageLayer": {
14
+ "description": "Storage layer provider with allowed values: iceberg, delta.",
15
+ "type": "string"
16
+ },
17
+ "fileFormat": {
18
+ "description": "File format with allowed values: parquet, orc, avro, json, csv, text, xml.",
19
+ "type": "string"
20
+ }
21
+ },
22
+ "additionalProperties": true,
23
+ "required": ["storageLayer"]
24
+ }
25
+ ],
26
+ "type": "object"
27
+ }
28
+ },
29
+ "type": "object",
30
+ "properties": {
31
+ "storage": {
32
+ "$ref": "#/$defs/StorageDatasetFacet"
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,47 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/SymlinksDatasetFacet.json",
4
+ "$defs": {
5
+ "SymlinksDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "identifiers": {
14
+ "type": "array",
15
+ "items": {
16
+ "type": "object",
17
+ "properties": {
18
+ "namespace": {
19
+ "type": "string",
20
+ "description": "The dataset namespace"
21
+ },
22
+ "name": {
23
+ "type": "string",
24
+ "description": "The dataset name"
25
+ },
26
+ "type": {
27
+ "type": "string",
28
+ "description": "Identifier type",
29
+ "example": "table"
30
+ }
31
+ },
32
+ "required": ["namespace", "name", "type"]
33
+ }
34
+ }
35
+ }
36
+ }
37
+ ],
38
+ "type": "object"
39
+ }
40
+ },
41
+ "type": "object",
42
+ "properties": {
43
+ "symlinks": {
44
+ "$ref": "#/$defs/SymlinksDatasetFacet"
45
+ }
46
+ }
47
+ }
@@ -0,0 +1,141 @@
1
+ require "helper"
2
+ require "lib/fluent/plugin/parser_openlineage.rb"
3
+
4
+ class OpenlineageParserTest < Test::Unit::TestCase
5
+ setup do
6
+ Fluent::Test.setup
7
+ @parser = Fluent::Test::Driver::Parser.new(Fluent::Plugin::OpenlineageParser)
8
+ @parser.configure(
9
+ 'spec_directory' => '../../spec/'
10
+ )
11
+ end
12
+
13
+ test "test event with no runId" do
14
+ ol_event = File.read("spec/events/event_no_run_id.json")
15
+
16
+ err = assert_raise Fluent::ParserError do
17
+ @parser.instance.parse(ol_event)
18
+ end
19
+ assert_match(/Openlineage validation failed: (.+) path "\/run": "runId" is a required property/, err.message)
20
+ end
21
+
22
+ test "test invalid json" do
23
+ assert_raise Fluent::ParserError do
24
+ @parser.instance.parse('{"run": Not a JSON}')
25
+ end
26
+ end
27
+
28
+ test "event full test" do
29
+ ol_event = File.read("spec/events/event_full.json")
30
+ @parser.instance.parse(ol_event) { | time, json |
31
+ assert_equal("ea041791-68bc-4ae1-bd89-4c8106a157e4", json['run']['runId'])
32
+ assert_equal(2000, json['outputs'][0]['outputFacets']['outputStatistics']['rowCount'])
33
+ }
34
+ end
35
+
36
+ test "event simple test" do
37
+ ol_event = File.read("spec/events/event_simple.json")
38
+ @parser.instance.parse(ol_event) { | time, json |
39
+ assert_equal("41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7", json['run']['runId'])
40
+ }
41
+ end
42
+
43
+ test "invalid spec_directory test" do
44
+ assert_raise Fluent::ParserError do
45
+ @parser.configure(
46
+ 'spec_directory' => './non-existent-spec/'
47
+ )
48
+ end
49
+ end
50
+
51
+ test "valid spec_directory without slash" do
52
+ @parser.configure(
53
+ 'spec_directory' => '../../spec'
54
+ )
55
+ ol_event = File.read("spec/events/event_simple.json")
56
+ @parser.instance.parse(ol_event) { | time, json |
57
+ assert_equal("41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7", json['run']['runId'])
58
+ }
59
+ end
60
+
61
+ test "run facet validation" do
62
+ ol_event = File.read("spec/events/event_invalid_run_facet.json")
63
+ err = assert_raise Fluent::ParserError do
64
+ @parser.instance.parse(ol_event)
65
+ end
66
+ assert_match(/Openlineage validation failed: (.+) path "\/run\/facets/, err.message)
67
+ end
68
+
69
+ test "run facet validation turned off" do
70
+ ol_event = File.read("spec/events/event_invalid_run_facet.json")
71
+ @parser.configure(
72
+ 'spec_directory' => '../../spec/',
73
+ 'validate_run_facets' => false,
74
+ )
75
+ @parser.instance.parse(ol_event) { | time, json |
76
+ assert_equal("41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7", json['run']['runId'])
77
+ }
78
+ end
79
+
80
+ test "job facet validation" do
81
+ ol_event = File.read("spec/events/event_invalid_job_facet.json")
82
+ err = assert_raise Fluent::ParserError do
83
+ @parser.instance.parse(ol_event)
84
+ end
85
+ assert_match(/Openlineage validation failed: (.+) path "\/job\/facets\/ownership/, err.message)
86
+ end
87
+
88
+ test "job facet validation turned off" do
89
+ ol_event = File.read("spec/events/event_invalid_job_facet.json")
90
+ @parser.configure(
91
+ 'spec_directory' => '../../spec/',
92
+ 'validate_job_facets' => false,
93
+ )
94
+ @parser.instance.parse(ol_event) { | time, json |
95
+ assert_equal("41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7", json['run']['runId'])
96
+ }
97
+ end
98
+
99
+ test "dataset facet validation" do
100
+ ol_event = File.read("spec/events/event_invalid_dataset_facet.json")
101
+ @parser.configure(
102
+ 'spec_directory' => '../../spec/',
103
+ 'validate_dataset_facets' => true,
104
+ )
105
+ err = assert_raise Fluent::ParserError do
106
+ @parser.instance.parse(ol_event)
107
+ end
108
+ assert_match(/Openlineage validation failed: (.+) path "\/outputs\/0\/facets\/ownership\/owners/, err.message)
109
+ end
110
+
111
+ test "input dataset facet validation" do
112
+ ol_event = File.read("spec/events/event_invalid_input_dataset_facet.json")
113
+ @parser.configure(
114
+ 'json_parser' => 'yajl',
115
+ 'spec_directory' => '../../spec/',
116
+ 'validate_input_dataset_facets' => true,
117
+ )
118
+ err = assert_raise Fluent::ParserError do
119
+ @parser.instance.parse(ol_event)
120
+ end
121
+ assert_match(/Openlineage validation failed: (.+) "columnMetrics" is a required property/, err.message)
122
+ end
123
+
124
+ test "output dataset facet validation" do
125
+ ol_event = File.read("spec/events/event_invalid_output_dataset_facet.json")
126
+ @parser.configure(
127
+ 'spec_directory' => '../../spec/',
128
+ 'validate_output_dataset_facets' => true,
129
+ )
130
+ err = assert_raise Fluent::ParserError do
131
+ @parser.instance.parse(ol_event)
132
+ end
133
+ assert_match(/Openlineage validation failed: (.+) path \"\/outputs\/0\/outputFacets\/outputStatistics\/rowCount/, err.message)
134
+ end
135
+
136
+ private
137
+
138
+ def create_driver(conf)
139
+ Fluent::Test::Driver::Parser.new(Fluent::Plugin::OpenlineageParser).configure(conf)
140
+ end
141
+ end
@@ -0,0 +1,31 @@
1
+ {
2
+ "producer": {
3
+ "root_doc_url": "https://github.com/OpenLineage/OpenLineage/tree/main/spec",
4
+ "produced_facets": [
5
+ "ol:core:ColumnLineageDatasetFacet.json",
6
+ "ol:core:DataQualityAssertionsDatasetFacet.json",
7
+ "ol:core:DataQualityMetricsInputDatasetFacet.json",
8
+ "ol:core:DatasetVersionDatasetFacet.json",
9
+ "ol:core:DatasourceDatasetFacet.json",
10
+ "ol:core:DocumentationDatasetFacet.json",
11
+ "ol:core:DocumentationJobFacet.json",
12
+ "ol:core:ErrorMessageRunFacet.json",
13
+ "ol:core:ExternalQueryRunFacet.json",
14
+ "ol:core:ExtractionErrorRunFacet.json",
15
+ "ol:core:JobTypeJobFacet.json",
16
+ "ol:core:LifecycleStateChangeDatasetFacet.json",
17
+ "ol:core:NominalTimeRunFacet.json",
18
+ "ol:core:OutputStatisticsOutputDatasetFacet.json",
19
+ "ol:core:OwnershipDatasetFacet.json",
20
+ "ol:core:OwnershipJobFacet.json",
21
+ "ol:core:ParentRunFacet.json",
22
+ "ol:core:ProcessingEngineRunFacet.json",
23
+ "ol:core:SQLJobFacet.json",
24
+ "ol:core:SchemaDatasetFacet.json",
25
+ "ol:core:SourceCodeJobFacet.json",
26
+ "ol:core:SourceCodeLocationJobFacet.json",
27
+ "ol:core:StorageDatasetFacet.json",
28
+ "ol:core:SymlinksDatasetFacet.json"
29
+ ]
30
+ }
31
+ }
@@ -0,0 +1,43 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-0/GcpCommonJobFacet.json",
4
+ "$defs": {
5
+ "GcpCommonJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "displayName": {
14
+ "description": "The name of the job to be used on UI",
15
+ "type": "string"
16
+ },
17
+ "origin": {
18
+ "type": "object",
19
+ "properties": {
20
+ "sourceType": {
21
+ "description": "Type of the source. Possible values can be found in GCP documentation (https://cloud.google.com/data-catalog/docs/reference/data-lineage/rest/v1/projects.locations.processes#SourceType) ",
22
+ "type": "string"
23
+ },
24
+ "name": {
25
+ "description": "If the sourceType isn't CUSTOM, the value of this field should be a GCP resource name of the system, which reports lineage. The project and location parts of the resource name must match the project and location of the lineage resource being created. More details in GCP documentation https://cloud.google.com/data-catalog/docs/reference/data-lineage/rest/v1/projects.locations.processes#origin",
26
+ "type": "string"
27
+ }
28
+ }
29
+ }
30
+ },
31
+ "additionalProperties": true
32
+ }
33
+ ],
34
+ "type": "object"
35
+ }
36
+ },
37
+ "type": "object",
38
+ "properties": {
39
+ "gcp_common": {
40
+ "$ref": "#/$defs/GcpCommonJobFacet"
41
+ }
42
+ }
43
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "consumer": {
3
+ "root_doc_URL": "https://cloud.google.com/data-catalog/docs/reference/data-lineage/rpc/google.cloud.datacatalog.lineage.v1#google.cloud.datacatalog.lineage.v1.Lineage.ProcessOpenLineageRunEvent",
4
+ "produced_facets": ["ol:gcp:GcpCommonJobFacet.json"]
5
+ }
6
+ }
@@ -0,0 +1,8 @@
1
+ $LOAD_PATH.unshift(File.expand_path("../../", __FILE__))
2
+ require "test-unit"
3
+ require "fluent/test"
4
+ require "fluent/test/driver/parser"
5
+ require "fluent/test/helpers"
6
+
7
+ Test::Unit::TestCase.include(Fluent::Test::Helpers)
8
+ Test::Unit::TestCase.extend(Fluent::Test::Helpers)
@@ -0,0 +1,172 @@
1
+ {
2
+ "columnLineage": {
3
+ "fields": {
4
+ "NAME": {
5
+ "inputFields": [
6
+ {
7
+ "namespace": "SnowflakeOpenLineage",
8
+ "name": "CUSTOMERS",
9
+ "field": "NAME",
10
+ "transformations": [
11
+ {
12
+ "type": "DIRECT",
13
+ "subtype": "IDENTITY",
14
+ "masking": false
15
+ }
16
+ ]
17
+ },
18
+ {
19
+ "namespace": "SnowflakeOpenLineage",
20
+ "name": "CUSTOMERS",
21
+ "field": "ID",
22
+ "transformations": [
23
+ {
24
+ "type": "INDIRECT",
25
+ "subtype": "JOIN",
26
+ "description": "ON (DISCOUNTS.CUSTOMERS_ID=CUSTOMERS.ID)"
27
+ }
28
+ ]
29
+ },
30
+ {
31
+ "namespace": "SnowflakeOpenLineage",
32
+ "name": "DISCOUNTS",
33
+ "field": "CUSTOMERS_ID",
34
+ "transformations": [
35
+ {
36
+ "type": "INDIRECT",
37
+ "subtype": "JOIN",
38
+ "description": "ON (DISCOUNTS.CUSTOMERS_ID=CUSTOMERS.ID)"
39
+ }
40
+ ]
41
+ }
42
+ ],
43
+ "transformationDescription": "SELECT NAME",
44
+ "transformationType": "IDENTITY"
45
+ },
46
+ "AMOUNT_OFF": {
47
+ "inputFields": [
48
+ {
49
+ "namespace": "SnowflakeOpenLineage",
50
+ "name": "DISCOUNTS",
51
+ "field": "AMOUNT_OFF",
52
+ "transformations": [
53
+ {
54
+ "type": "DIRECT",
55
+ "subtype": "IDENTITY",
56
+ "masking": false
57
+ }
58
+ ]
59
+ },
60
+ {
61
+ "namespace": "SnowflakeOpenLineage",
62
+ "name": "CUSTOMERS",
63
+ "field": "ID",
64
+ "transformations": [
65
+ {
66
+ "type": "INDIRECT",
67
+ "subtype": "JOIN",
68
+ "description": "ON (DISCOUNTS.CUSTOMERS_ID=CUSTOMERS.ID)"
69
+ }
70
+ ]
71
+ },
72
+ {
73
+ "namespace": "SnowflakeOpenLineage",
74
+ "name": "DISCOUNTS",
75
+ "field": "CUSTOMERS_ID",
76
+ "transformations": [
77
+ {
78
+ "type": "INDIRECT",
79
+ "subtype": "JOIN",
80
+ "description": "ON (DISCOUNTS.CUSTOMERS_ID=CUSTOMERS.ID)"
81
+ }
82
+ ]
83
+ }
84
+ ],
85
+ "transformationType": "IDENTITY"
86
+ },
87
+ "STARTS_AT": {
88
+ "inputFields": [
89
+ {
90
+ "namespace": "SnowflakeOpenLineage",
91
+ "name": "DISCOUNTS",
92
+ "field": "STARTS_AT",
93
+ "transformations": [
94
+ {
95
+ "type": "DIRECT",
96
+ "subtype": "IDENTITY",
97
+ "masking": false
98
+ }
99
+ ]
100
+ },
101
+ {
102
+ "namespace": "SnowflakeOpenLineage",
103
+ "name": "CUSTOMERS",
104
+ "field": "ID",
105
+ "transformations": [
106
+ {
107
+ "type": "INDIRECT",
108
+ "subtype": "JOIN",
109
+ "description": "ON (DISCOUNTS.CUSTOMERS_ID=CUSTOMERS.ID)"
110
+ }
111
+ ]
112
+ },
113
+ {
114
+ "namespace": "SnowflakeOpenLineage",
115
+ "name": "DISCOUNTS",
116
+ "field": "CUSTOMERS_ID",
117
+ "transformations": [
118
+ {
119
+ "type": "INDIRECT",
120
+ "subtype": "JOIN",
121
+ "description": "ON (DISCOUNTS.CUSTOMERS_ID=CUSTOMERS.ID)"
122
+ }
123
+ ]
124
+ }
125
+ ],
126
+ "transformationType": "IDENTITY"
127
+ },
128
+ "ENDS_AT": {
129
+ "inputFields": [
130
+ {
131
+ "namespace": "SnowflakeOpenLineage",
132
+ "name": "DISCOUNTS",
133
+ "field": "ENDS_AT",
134
+ "transformations": [
135
+ {
136
+ "type": "DIRECT",
137
+ "subtype": "IDENTITY",
138
+ "masking": false
139
+ }
140
+ ]
141
+ },
142
+ {
143
+ "namespace": "SnowflakeOpenLineage",
144
+ "name": "CUSTOMERS",
145
+ "field": "ID",
146
+ "transformations": [
147
+ {
148
+ "type": "INDIRECT",
149
+ "subtype": "JOIN",
150
+ "description": "ON (DISCOUNTS.CUSTOMERS_ID=CUSTOMERS.ID)"
151
+ }
152
+ ]
153
+ },
154
+ {
155
+ "namespace": "SnowflakeOpenLineage",
156
+ "name": "DISCOUNTS",
157
+ "field": "CUSTOMERS_ID",
158
+ "transformations": [
159
+ {
160
+ "type": "INDIRECT",
161
+ "subtype": "JOIN",
162
+ "description": "ON (DISCOUNTS.CUSTOMERS_ID=CUSTOMERS.ID)"
163
+ }
164
+ ]
165
+ }
166
+ ]
167
+ }
168
+ },
169
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
170
+ "_schemaURL": "https://openlineage.io/spec/facets/1-1-0/ColumnLineageDatasetFacet.json"
171
+ }
172
+ }
@@ -0,0 +1,58 @@
1
+ {
2
+ "dataQualityAssertions": {
3
+ "assertions": [
4
+ {
5
+ "assertion": "accepted_values",
6
+ "column": "status",
7
+ "success": true
8
+ },
9
+ {
10
+ "assertion": "not_null",
11
+ "column": "amount",
12
+ "success": true
13
+ },
14
+ {
15
+ "assertion": "not_null",
16
+ "column": "bank_transfer_amount",
17
+ "success": true
18
+ },
19
+ {
20
+ "assertion": "not_null",
21
+ "column": "coupon_amount",
22
+ "success": true
23
+ },
24
+ {
25
+ "assertion": "not_null",
26
+ "column": "credit_card_amount",
27
+ "success": true
28
+ },
29
+ {
30
+ "assertion": "not_null",
31
+ "column": "customer_id",
32
+ "success": true
33
+ },
34
+ {
35
+ "assertion": "not_null",
36
+ "column": "gift_card_amount",
37
+ "success": true
38
+ },
39
+ {
40
+ "assertion": "not_null",
41
+ "column": "order_id",
42
+ "success": true
43
+ },
44
+ {
45
+ "assertion": "relationships",
46
+ "column": "customer_id",
47
+ "success": true
48
+ },
49
+ {
50
+ "assertion": "unique",
51
+ "column": "order_id",
52
+ "success": true
53
+ }
54
+ ],
55
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
56
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DataQualityAssertionsDatasetFacet.json"
57
+ }
58
+ }
@@ -0,0 +1,23 @@
1
+ {
2
+ "dataQualityMetrics": {
3
+ "columnMetrics": {
4
+ "amount": {
5
+ "count": 10,
6
+ "max": 1500,
7
+ "min": 100,
8
+ "sum": 6600
9
+ },
10
+ "counterparty_id": {
11
+ "nullCount": 0
12
+ },
13
+ "user_id": {
14
+ "distinctCount": 3,
15
+ "nullCount": 0
16
+ }
17
+ },
18
+ "rowCount": 10,
19
+ "fileCount": 5,
20
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
21
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DataQualityMetricsInputDatasetFacet.json"
22
+ }
23
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": {
3
+ "datasetVersion": "2",
4
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
5
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DatasetVersionDatasetFacet.json"
6
+ }
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": {
3
+ "datasetVersion": "2",
4
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
5
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DatasetVersionDatasetFacet.json"
6
+ }
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "documentation": {
3
+ "description": "Some dataset description",
4
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
5
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DocumentationDatasetFacet.json"
6
+ }
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "documentation": {
3
+ "description": "Determines the popular day of week orders are placed.",
4
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
5
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DocumentationDatasetFacet.json"
6
+ }
7
+ }
@@ -0,0 +1,9 @@
1
+ {
2
+ "errorMessage": {
3
+ "message": "failed",
4
+ "programmingLanguage": "JAVA",
5
+ "stackTrace": "<stack_trace>",
6
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
7
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/ErrorMessageRunFacet.json"
8
+ }
9
+ }
@@ -0,0 +1,8 @@
1
+ {
2
+ "externalQuery": {
3
+ "_producer": "https://github.com/OpenLineage/OpenLineage/tree/0.0.1/integration/airflow",
4
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/ExternalQueryRunFacet.json",
5
+ "externalQueryId": "bigquery_1690280798993890_f4ade238a8121576708e1892f69e67fe",
6
+ "source": "bigquery"
7
+ }
8
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "extractionError": {
3
+ "totalTasks": 1,
4
+ "failedTasks": 1,
5
+ "errors": [
6
+ {
7
+ "errorMessage": "Expected TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, STAGE or SEQUENCE after DROP, found: POLICY at Line: 1, Column 6",
8
+ "task": "DROP POLICY IF EXISTS name ON table_name",
9
+ "taskNumber": 0
10
+ }
11
+ ],
12
+ "_producer": "https://github.com/OpenLineage/OpenLineage/tree/0.0.1/integration/dbt",
13
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-1/ExtractionErrorRunFacet.json"
14
+ }
15
+ }
@@ -0,0 +1,9 @@
1
+ {
2
+ "jobType": {
3
+ "processingType": "BATCH",
4
+ "integration": "SPARK",
5
+ "jobType": "QUERY",
6
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
7
+ "_schemaURL": "https://openlineage.io/spec/facets/2-0-2/JobTypeJobFacet.json"
8
+ }
9
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "lifecycleStateChange": {
3
+ "lifecycleStateChange": "RENAME",
4
+ "previousIdentifier": {
5
+ "name": "/tmp/alter_test/alter_table_test",
6
+ "namespace": "file"
7
+ },
8
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
9
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DatasetVersionDatasetFacet.json"
10
+ }
11
+ }