fluent-plugin-openlineage 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +6 -0
  3. data/.github/workflows/linux.yml +30 -0
  4. data/.gitignore +16 -0
  5. data/.idea/.gitignore +8 -0
  6. data/.idea/fluentd.iml +204 -0
  7. data/.idea/misc.xml +4 -0
  8. data/.idea/modules/benchmark-memory-0.2.iml +12 -0
  9. data/.idea/modules/bigdecimal-3.1.iml +11 -0
  10. data/.idea/modules/certstore_c-0.1.iml +15 -0
  11. data/.idea/modules/concurrent-ruby-1.3.iml +18 -0
  12. data/.idea/modules/concurrent-ruby-1.31.iml +15 -0
  13. data/.idea/modules/connection_pool-2.4.iml +11 -0
  14. data/.idea/modules/cool.io-1.8.iml +16 -0
  15. data/.idea/modules/drb-2.2.iml +14 -0
  16. data/.idea/modules/drb-2.21.iml +11 -0
  17. data/.idea/modules/ffi-1.17.iml +20 -0
  18. data/.idea/modules/ffi-win32-extensions-1.0.iml +19 -0
  19. data/.idea/modules/fluentd-1.17.iml +43 -0
  20. data/.idea/modules/http_parser.rb-0.8.iml +17 -0
  21. data/.idea/modules/json-2.7.iml +14 -0
  22. data/.idea/modules/json-2.71.iml +11 -0
  23. data/.idea/modules/msgpack-1.7.iml +15 -0
  24. data/.idea/modules/mutex_m-0.2.iml +15 -0
  25. data/.idea/modules/new_gem.iml +15 -0
  26. data/.idea/modules/power_assert-2.0.iml +19 -0
  27. data/.idea/modules/rake-13.2.iml +18 -0
  28. data/.idea/modules/rake-13.21.iml +15 -0
  29. data/.idea/modules/rake-compiler-1.2.iml +13 -0
  30. data/.idea/modules/rusty_json_schema-0.15.iml +15 -0
  31. data/.idea/modules/serverengine-2.3.iml +17 -0
  32. data/.idea/modules/sigdump-0.2.iml +16 -0
  33. data/.idea/modules/specifications.iml +14 -0
  34. data/.idea/modules/specifications1.iml +11 -0
  35. data/.idea/modules/strptime-0.2.iml +16 -0
  36. data/.idea/modules/thermite-0.13.iml +17 -0
  37. data/.idea/modules/webrick-1.8.iml +18 -0
  38. data/.idea/modules/win32-event-0.6.iml +21 -0
  39. data/.idea/modules/win32-ipc-0.7.iml +20 -0
  40. data/.idea/modules/yajl-ruby-1.4.iml +779 -0
  41. data/.idea/modules.xml +41 -0
  42. data/.rspec +2 -0
  43. data/ChangeLog +3 -0
  44. data/Gemfile +3 -0
  45. data/LICENSE +202 -0
  46. data/README.md +250 -0
  47. data/Rakefile +13 -0
  48. data/fluent-plugin-openlineage.gemspec +28 -0
  49. data/lib/fluent/plugin/parser_openlineage.rb +182 -0
  50. data/misc/fluent.conf +101 -0
  51. data/misc/test-complete.json +73 -0
  52. data/misc/test-start.json +73 -0
  53. data/spec/Naming.md +500 -0
  54. data/spec/OpenLineage.json +304 -0
  55. data/spec/Versioning.md +49 -0
  56. data/spec/events/event_full.json +206 -0
  57. data/spec/events/event_invalid_dataset_facet.json +31 -0
  58. data/spec/events/event_invalid_input_dataset_facet.json +29 -0
  59. data/spec/events/event_invalid_job_facet.json +26 -0
  60. data/spec/events/event_invalid_output_dataset_facet.json +29 -0
  61. data/spec/events/event_invalid_run_facet.json +28 -0
  62. data/spec/events/event_no_run_id.json +28 -0
  63. data/spec/events/event_simple.json +29 -0
  64. data/spec/facets/ColumnLineageDatasetFacet.json +96 -0
  65. data/spec/facets/ColumnLineageDatasetFacet.md +106 -0
  66. data/spec/facets/DataQualityAssertionsDatasetFacet.json +49 -0
  67. data/spec/facets/DataQualityMetricsInputDatasetFacet.json +76 -0
  68. data/spec/facets/DatasetVersionDatasetFacet.json +31 -0
  69. data/spec/facets/DatasourceDatasetFacet.json +32 -0
  70. data/spec/facets/DocumentationDatasetFacet.json +31 -0
  71. data/spec/facets/DocumentationJobFacet.json +30 -0
  72. data/spec/facets/ErrorMessageRunFacet.json +41 -0
  73. data/spec/facets/ExternalQueryRunFacet.json +36 -0
  74. data/spec/facets/ExternalQueryRunFacet.md +49 -0
  75. data/spec/facets/ExtractionErrorRunFacet.json +58 -0
  76. data/spec/facets/JobTypeJobFacet.json +41 -0
  77. data/spec/facets/LifecycleStateChangeDatasetFacet.json +46 -0
  78. data/spec/facets/NominalTimeRunFacet.json +38 -0
  79. data/spec/facets/OutputStatisticsOutputDatasetFacet.json +36 -0
  80. data/spec/facets/OwnershipDatasetFacet.json +45 -0
  81. data/spec/facets/OwnershipJobFacet.json +45 -0
  82. data/spec/facets/ParentRunFacet.json +54 -0
  83. data/spec/facets/ProcessingEngineRunFacet.json +41 -0
  84. data/spec/facets/SQLJobFacet.json +30 -0
  85. data/spec/facets/SchemaDatasetFacet.json +59 -0
  86. data/spec/facets/SourceCodeJobFacet.json +34 -0
  87. data/spec/facets/SourceCodeLocationJobFacet.json +60 -0
  88. data/spec/facets/StorageDatasetFacet.json +35 -0
  89. data/spec/facets/SymlinksDatasetFacet.json +47 -0
  90. data/spec/fluent/plugin/test_parser_openlineage.rb +141 -0
  91. data/spec/registry/core/registry.json +31 -0
  92. data/spec/registry/gcp/facets/GcpCommonJobFacet.json +43 -0
  93. data/spec/registry/gcp/registry.json +6 -0
  94. data/spec/spec_helper.rb +8 -0
  95. data/spec/tests/ColumnLineageDatasetFacet/1.json +172 -0
  96. data/spec/tests/DataQualityAssertionsDatasetFacet/1.json +58 -0
  97. data/spec/tests/DataQualityMetricsInputDatasetFacet/1.json +23 -0
  98. data/spec/tests/DatasetVersionDatasetFacet/1.json +7 -0
  99. data/spec/tests/DatasourceDatasetFacet/1.json +7 -0
  100. data/spec/tests/DocumentationDatasetFacet/1.json +7 -0
  101. data/spec/tests/DocumentationJobFacet/1.json +7 -0
  102. data/spec/tests/ErrorMessageRunFacet/1.json +9 -0
  103. data/spec/tests/ExternalQueryRunFacet/1.json +8 -0
  104. data/spec/tests/ExtractionErrorRunFacet/1.json +15 -0
  105. data/spec/tests/JobTypeJobFacet/1.json +9 -0
  106. data/spec/tests/LifecycleStateChangeDatasetFacet/1.json +11 -0
  107. data/spec/tests/NominalTimeRunFacet/1.json +8 -0
  108. data/spec/tests/OutputStatisticsOutputDatasetFacet/1.json +9 -0
  109. data/spec/tests/OwnershipDatasetFacet/1.json +11 -0
  110. data/spec/tests/OwnershipJobFacet/1.json +11 -0
  111. data/spec/tests/ParentRunFacet/1.json +13 -0
  112. data/spec/tests/ProcessingEngineRunFacet/1.json +9 -0
  113. data/spec/tests/SQLJobFacet/1.json +7 -0
  114. data/spec/tests/SchemaDatasetFacet/1.json +92 -0
  115. data/spec/tests/SourceCodeJobFacet/1.json +8 -0
  116. data/spec/tests/SourceCodeLocationJobFacet/1.json +8 -0
  117. data/spec/tests/StorageDatasetFacet/1.json +8 -0
  118. data/spec/tests/SymlinksDatasetFacet/1.json +13 -0
  119. data/spec/tests/example_full_event.json +24 -0
  120. metadata +188 -3
@@ -0,0 +1,29 @@
1
+ {
2
+ "eventType": "COMPLETE",
3
+ "eventTime": "2020-12-28T19:52:00.001+10:00",
4
+ "run": {
5
+ "runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7"
6
+ },
7
+ "job": {
8
+ "namespace": "my-scheduler-namespace",
9
+ "name": "myjob"
10
+ },
11
+ "inputs": [],
12
+ "outputs": [
13
+ {
14
+ "namespace": "my-datasource-namespace",
15
+ "name": "instance.schema.output-1",
16
+ "outputFacets": {
17
+ "outputStatistics": {
18
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
19
+ "_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/OutputStatisticsOutputDatasetFacet",
20
+ "rowCount": "wrong",
21
+ "size": 2097152,
22
+ "fileCount": 5
23
+ }
24
+ }
25
+ }
26
+ ],
27
+ "producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
28
+ "schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
29
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "eventType": "COMPLETE",
3
+ "eventTime": "2020-12-28T19:52:00.001+10:00",
4
+ "run": {
5
+ "runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7",
6
+ "facets": {
7
+ "parent": {
8
+ "run": {
9
+ "noRunId": "invalid run id"
10
+ },
11
+ "job": {
12
+ "namespace": "parent_namespace",
13
+ "name": "parent_name"
14
+ },
15
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
16
+ "_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunFacet"
17
+ }
18
+ }
19
+ },
20
+ "job": {
21
+ "namespace": "my-scheduler-namespace",
22
+ "name": "myjob"
23
+ },
24
+ "inputs": [ ],
25
+ "outputs": [ ],
26
+ "producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
27
+ "schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
28
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "eventType": "COMPLETE",
3
+ "eventTime": "2020-12-28T19:52:00.001+10:00",
4
+ "run": {
5
+ },
6
+ "job": {
7
+ "namespace": "my-scheduler-namespace",
8
+ "name": "myjob"
9
+ },
10
+ "inputs": [
11
+ {
12
+ "namespace": "my-datasource-namespace",
13
+ "name": "instance.schema.input-1"
14
+ },
15
+ {
16
+ "namespace": "my-datasource-namespace",
17
+ "name": "instance.schema.input-2"
18
+ }
19
+ ],
20
+ "outputs": [
21
+ {
22
+ "namespace": "my-datasource-namespace",
23
+ "name": "instance.schema.output-1"
24
+ }
25
+ ],
26
+ "producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
27
+ "schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
28
+ }
@@ -0,0 +1,29 @@
1
+ {
2
+ "eventType": "COMPLETE",
3
+ "eventTime": "2020-12-28T19:52:00.001+10:00",
4
+ "run": {
5
+ "runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7"
6
+ },
7
+ "job": {
8
+ "namespace": "my-scheduler-namespace",
9
+ "name": "myjob"
10
+ },
11
+ "inputs": [
12
+ {
13
+ "namespace": "my-datasource-namespace",
14
+ "name": "instance.schema.input-1"
15
+ },
16
+ {
17
+ "namespace": "my-datasource-namespace",
18
+ "name": "instance.schema.input-2"
19
+ }
20
+ ],
21
+ "outputs": [
22
+ {
23
+ "namespace": "my-datasource-namespace",
24
+ "name": "instance.schema.output-1"
25
+ }
26
+ ],
27
+ "producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
28
+ "schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
29
+ }
@@ -0,0 +1,96 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-1-0/ColumnLineageDatasetFacet.json",
4
+ "$defs": {
5
+ "ColumnLineageDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "fields": {
14
+ "description": "Column level lineage that maps output fields into input fields used to evaluate them.",
15
+ "type": "object",
16
+ "additionalProperties": {
17
+ "type": "object",
18
+ "properties": {
19
+ "inputFields": {
20
+ "type": "array",
21
+ "items": {
22
+ "type": "object",
23
+ "properties": {
24
+ "namespace": {
25
+ "type": "string",
26
+ "description": "The input dataset namespace"
27
+ },
28
+ "name": {
29
+ "type": "string",
30
+ "description": "The input dataset name"
31
+ },
32
+ "field": {
33
+ "type": "string",
34
+ "description": "The input field"
35
+ },
36
+ "transformations": {
37
+ "type": "array",
38
+ "items": {
39
+ "type": "object",
40
+ "properties": {
41
+ "type": {
42
+ "description": "The type of the transformation. Allowed values are: DIRECT, INDIRECT",
43
+ "type": "string"
44
+ },
45
+ "subtype": {
46
+ "type": "string",
47
+ "description": "The subtype of the transformation"
48
+ },
49
+ "description": {
50
+ "type": "string",
51
+ "description": "a string representation of the transformation applied"
52
+ },
53
+ "masking": {
54
+ "type": "boolean",
55
+ "description": "is transformation masking the data or not"
56
+ }
57
+ },
58
+ "required": ["type"],
59
+ "additionalProperties": true
60
+ }
61
+ }
62
+ },
63
+ "additionalProperties": true,
64
+ "required": ["namespace", "name", "field"]
65
+ }
66
+ },
67
+ "transformationDescription": {
68
+ "type": "string",
69
+ "description": "a string representation of the transformation applied",
70
+ "deprecated": true
71
+ },
72
+ "transformationType": {
73
+ "type": "string",
74
+ "description": "IDENTITY|MASKED reflects a clearly defined behavior. IDENTITY: exact same as input; MASKED: no original data available (like a hash of PII for example)",
75
+ "deprecated": true
76
+ }
77
+ },
78
+ "additionalProperties": true,
79
+ "required": ["inputFields"]
80
+ }
81
+ }
82
+ },
83
+ "additionalProperties": true,
84
+ "required": ["fields"]
85
+ }
86
+ ],
87
+ "type": "object"
88
+ }
89
+ },
90
+ "type": "object",
91
+ "properties": {
92
+ "columnLineage": {
93
+ "$ref": "#/$defs/ColumnLineageDatasetFacet"
94
+ }
95
+ }
96
+ }
@@ -0,0 +1,106 @@
1
+ # Column level lineage
2
+
3
+ The [column level lineage facet](ColumnLineageDatasetFacet.json) captures the lineage of columns of an output dataset
4
+ from the columns in input datasets. It must refer to existing columns as defined in the
5
+ [`schema` facet](SchemaDatasetFacet.json). Additional information on the transformation from the input columns to the
6
+ output column is stored in the optional _transformationDescription_ and _transformationType_ fields.
7
+
8
+ - _transformationDescription_: a human readable description of the transformation. ex: "(a+b)" or "identical"
9
+ - _transformationType_: type of the transformation. possible values:
10
+ - "IDENTITY" exactly the same as the input.
11
+ - "MASKED" for example a hash of the input value that doesn't expose the original value.
12
+
13
+ Output Dataset example of adding a columnLineage facet:
14
+
15
+ ```diff
16
+ {
17
+ "namespace": "{namespace of the outputdataset}",
18
+ "name": "{name of the output dataset}",
19
+ "facets": {
20
+ "schema": {
21
+ "fields": [
22
+ { "name": "{first column of the output dataset}", "type": "{its type}"},
23
+ { "name": "{second column of the output dataset}", "type": "{its type}"},
24
+ ...
25
+ ]
26
+ },
27
+ > "columnLineage": {
28
+ > "fields": {
29
+ > "{first column of the output dataset}": {
30
+ > "inputFields": [
31
+ > { "namespace": "{input dataset namespace}", name: "{input dataset name}", "field": "{input dataset column name}"},
32
+ > ... other inputs
33
+ > ],
34
+ > "transformationDescription": "identical",
35
+ > "transformationType": "IDENTITY"
36
+ > },
37
+ > "{second column of the output dataset}": ...,
38
+ > ...
39
+ > }
40
+ > }
41
+ }
42
+ }
43
+ ```
44
+
45
+ Full lineage event example:
46
+
47
+ ```
48
+ {
49
+ "eventType": "START",
50
+ "eventTime": "2020-12-09T23:37:31.081Z",
51
+ "run": {
52
+ "runId": "3b452093-782c-4ef2-9c0c-aafe2aa6f34d",
53
+ },
54
+ "job": {
55
+ "namespace": "my-scheduler-namespace",
56
+ "name": "myjob.mytask",
57
+ },
58
+ "inputs": [
59
+ {
60
+ "namespace": "my-datasource-namespace",
61
+ "name": "instance.schema.table",
62
+ "facets": {
63
+ "schema": {
64
+ "fields": [
65
+ { "name": "ia", "type": "INT"},
66
+ { "name": "ib", "type": "INT"}
67
+ ]
68
+ },
69
+ }
70
+ }
71
+ ],
72
+ "outputs": [
73
+ {
74
+ "namespace": "my-datasource-namespace",
75
+ "name": "instance.schema.output_table",
76
+ "facets": {
77
+ "schema": {
78
+ "fields": [
79
+ { "name": "a", "type": "INT"},
80
+ { "name": "b", "type": "INT"}
81
+ ]
82
+ },
83
+ "columnLineage": {
84
+ "fields": {
85
+ "a": {
86
+ "inputFields": [
87
+ {namespace: "my-datasource-namespace", name: "instance.schema.table", "field": "ia"},
88
+ ... other inputs
89
+ ],
90
+ transformationDescription: "identical",
91
+ transformationType: "IDENTITY"
92
+ },
93
+ "b": ... other output fields
94
+ }
95
+ }
96
+ }
97
+ }
98
+ ],
99
+ "schemaURL": "https://openlineage.io/spec/1-0-0/OpenLineage.json#/definitions/RunEvent"
100
+ }
101
+ ```
102
+
103
+ ---
104
+
105
+ SPDX-License-Identifier: Apache-2.0\
106
+ Copyright 2018-2024 contributors to the OpenLineage project
@@ -0,0 +1,49 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/DataQualityAssertionsDatasetFacet.json",
4
+ "$defs": {
5
+ "DataQualityAssertionsDatasetFacet": {
6
+ "description": "list of tests performed on dataset or dataset columns, and their results",
7
+ "allOf": [
8
+ {
9
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/InputDatasetFacet"
10
+ },
11
+ {
12
+ "type": "object",
13
+ "required": ["assertions"],
14
+ "properties": {
15
+ "assertions": {
16
+ "type": "array",
17
+ "items": {
18
+ "type": "object",
19
+ "properties": {
20
+ "assertion": {
21
+ "type": "string",
22
+ "description": "Type of expectation test that dataset is subjected to",
23
+ "example": "not_null"
24
+ },
25
+ "success": {
26
+ "type": "boolean"
27
+ },
28
+ "column": {
29
+ "type": "string",
30
+ "description": "Column that expectation is testing. It should match the name provided in SchemaDatasetFacet. If column field is empty, then expectation refers to whole dataset.",
31
+ "example": "id"
32
+ }
33
+ },
34
+ "required": ["assertion", "success"]
35
+ }
36
+ }
37
+ }
38
+ }
39
+ ],
40
+ "type": "object"
41
+ }
42
+ },
43
+ "type": "object",
44
+ "properties": {
45
+ "dataQualityAssertions": {
46
+ "$ref": "#/$defs/DataQualityAssertionsDatasetFacet"
47
+ }
48
+ }
49
+ }
@@ -0,0 +1,76 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-2/DataQualityMetricsInputDatasetFacet.json",
4
+ "$defs": {
5
+ "DataQualityMetricsInputDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/InputDatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "required": ["columnMetrics"],
13
+ "properties": {
14
+ "rowCount": {
15
+ "description": "The number of rows evaluated",
16
+ "type": "integer"
17
+ },
18
+ "bytes": {
19
+ "description": "The size in bytes",
20
+ "type": "integer"
21
+ },
22
+ "fileCount": {
23
+ "description": "The number of files evaluated",
24
+ "type": "integer"
25
+ },
26
+ "columnMetrics": {
27
+ "description": "The property key is the column name",
28
+ "type": "object",
29
+ "additionalProperties": {
30
+ "type": "object",
31
+ "properties": {
32
+ "nullCount": {
33
+ "description": "The number of null values in this column for the rows evaluated",
34
+ "type": "integer"
35
+ },
36
+ "distinctCount": {
37
+ "description": "The number of distinct values in this column for the rows evaluated",
38
+ "type": "integer"
39
+ },
40
+ "sum": {
41
+ "description": "The total sum of values in this column for the rows evaluated",
42
+ "type": "number"
43
+ },
44
+ "count": {
45
+ "description": "The number of values in this column",
46
+ "type": "number"
47
+ },
48
+ "min": {
49
+ "type": "number"
50
+ },
51
+ "max": {
52
+ "type": "number"
53
+ },
54
+ "quantiles": {
55
+ "description": "The property key is the quantile. Examples: 0.1 0.25 0.5 0.75 1",
56
+ "type": "object",
57
+ "additionalProperties": {
58
+ "type": "number"
59
+ }
60
+ }
61
+ }
62
+ }
63
+ }
64
+ }
65
+ }
66
+ ],
67
+ "type": "object"
68
+ }
69
+ },
70
+ "type": "object",
71
+ "properties": {
72
+ "dataQualityMetrics": {
73
+ "$ref": "#/$defs/DataQualityMetricsInputDatasetFacet"
74
+ }
75
+ }
76
+ }
@@ -0,0 +1,31 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/DatasetVersionDatasetFacet.json",
4
+ "$defs": {
5
+ "DatasetVersionDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "datasetVersion": {
14
+ "description": "The version of the dataset.",
15
+ "type": "string"
16
+ }
17
+ },
18
+ "additionalProperties": true,
19
+ "required": ["datasetVersion"]
20
+ }
21
+ ],
22
+ "type": "object"
23
+ }
24
+ },
25
+ "type": "object",
26
+ "properties": {
27
+ "version": {
28
+ "$ref": "#/$defs/DatasetVersionDatasetFacet"
29
+ }
30
+ }
31
+ }
@@ -0,0 +1,32 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/DatasourceDatasetFacet.json",
4
+ "$defs": {
5
+ "DatasourceDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "name": {
14
+ "type": "string"
15
+ },
16
+ "uri": {
17
+ "type": "string",
18
+ "format": "uri"
19
+ }
20
+ }
21
+ }
22
+ ],
23
+ "type": "object"
24
+ }
25
+ },
26
+ "type": "object",
27
+ "properties": {
28
+ "dataSource": {
29
+ "$ref": "#/$defs/DatasourceDatasetFacet"
30
+ }
31
+ }
32
+ }
@@ -0,0 +1,31 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/DocumentationDatasetFacet.json",
4
+ "$defs": {
5
+ "DocumentationDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "description": {
14
+ "description": "The description of the dataset.",
15
+ "type": "string",
16
+ "example": "canonical representation of entity Foo"
17
+ }
18
+ },
19
+ "required": ["description"]
20
+ }
21
+ ],
22
+ "type": "object"
23
+ }
24
+ },
25
+ "type": "object",
26
+ "properties": {
27
+ "documentation": {
28
+ "$ref": "#/$defs/DocumentationDatasetFacet"
29
+ }
30
+ }
31
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/DocumentationJobFacet.json",
4
+ "$defs": {
5
+ "DocumentationJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "description": {
14
+ "description": "The description of the job.",
15
+ "type": "string"
16
+ }
17
+ },
18
+ "required": ["description"]
19
+ }
20
+ ],
21
+ "type": "object"
22
+ }
23
+ },
24
+ "type": "object",
25
+ "properties": {
26
+ "documentation": {
27
+ "$ref": "#/$defs/DocumentationJobFacet"
28
+ }
29
+ }
30
+ }
@@ -0,0 +1,41 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/ErrorMessageRunFacet.json",
4
+ "$defs": {
5
+ "ErrorMessageRunFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "message": {
14
+ "description": "A human-readable string representing error message generated by observed system",
15
+ "type": "string",
16
+ "example": "org.apache.spark.sql.AnalysisException: Table or view not found: wrong_table_name; line 1 pos 14"
17
+ },
18
+ "programmingLanguage": {
19
+ "description": "Programming language the observed system uses.",
20
+ "type": "string",
21
+ "example": "JAVA"
22
+ },
23
+ "stackTrace": {
24
+ "description": "A language-specific stack trace generated by observed system",
25
+ "type": "string",
26
+ "example": "Exception in thread \"main\" java.lang.RuntimeException: A test exception\nat io.openlineage.SomeClass.method(SomeClass.java:13)\nat io.openlineage.SomeClass.anotherMethod(SomeClass.java:9)"
27
+ }
28
+ },
29
+ "required": ["message", "programmingLanguage"]
30
+ }
31
+ ],
32
+ "type": "object"
33
+ }
34
+ },
35
+ "type": "object",
36
+ "properties": {
37
+ "errorMessage": {
38
+ "$ref": "#/$defs/ErrorMessageRunFacet"
39
+ }
40
+ }
41
+ }
@@ -0,0 +1,36 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-2/ExternalQueryRunFacet.json",
4
+ "$defs": {
5
+ "ExternalQueryRunFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "externalQueryId": {
14
+ "description": "Identifier for the external system",
15
+ "type": "string",
16
+ "example": "my-project-1234:US.bquijob_123x456_123y123z123c"
17
+ },
18
+ "source": {
19
+ "description": "source of the external query",
20
+ "type": "string",
21
+ "example": "bigquery"
22
+ }
23
+ },
24
+ "required": ["externalQueryId", "source"]
25
+ }
26
+ ],
27
+ "type": "object"
28
+ }
29
+ },
30
+ "type": "object",
31
+ "properties": {
32
+ "externalQuery": {
33
+ "$ref": "#/$defs/ExternalQueryRunFacet"
34
+ }
35
+ }
36
+ }