fluentd-openlineage-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.idea/.gitignore +8 -0
- data/.idea/fluentd.iml +204 -0
- data/.idea/misc.xml +4 -0
- data/.idea/modules/benchmark-memory-0.2.iml +12 -0
- data/.idea/modules/bigdecimal-3.1.iml +11 -0
- data/.idea/modules/certstore_c-0.1.iml +15 -0
- data/.idea/modules/concurrent-ruby-1.3.iml +18 -0
- data/.idea/modules/concurrent-ruby-1.31.iml +15 -0
- data/.idea/modules/connection_pool-2.4.iml +11 -0
- data/.idea/modules/cool.io-1.8.iml +16 -0
- data/.idea/modules/drb-2.2.iml +14 -0
- data/.idea/modules/drb-2.21.iml +11 -0
- data/.idea/modules/ffi-1.17.iml +20 -0
- data/.idea/modules/ffi-win32-extensions-1.0.iml +19 -0
- data/.idea/modules/fluentd-1.17.iml +43 -0
- data/.idea/modules/http_parser.rb-0.8.iml +17 -0
- data/.idea/modules/json-2.7.iml +14 -0
- data/.idea/modules/json-2.71.iml +11 -0
- data/.idea/modules/msgpack-1.7.iml +15 -0
- data/.idea/modules/mutex_m-0.2.iml +15 -0
- data/.idea/modules/new_gem.iml +15 -0
- data/.idea/modules/power_assert-2.0.iml +19 -0
- data/.idea/modules/rake-13.2.iml +18 -0
- data/.idea/modules/rake-13.21.iml +15 -0
- data/.idea/modules/rake-compiler-1.2.iml +13 -0
- data/.idea/modules/rusty_json_schema-0.15.iml +15 -0
- data/.idea/modules/serverengine-2.3.iml +17 -0
- data/.idea/modules/sigdump-0.2.iml +16 -0
- data/.idea/modules/specifications.iml +14 -0
- data/.idea/modules/specifications1.iml +11 -0
- data/.idea/modules/strptime-0.2.iml +16 -0
- data/.idea/modules/thermite-0.13.iml +17 -0
- data/.idea/modules/webrick-1.8.iml +18 -0
- data/.idea/modules/win32-event-0.6.iml +21 -0
- data/.idea/modules/win32-ipc-0.7.iml +20 -0
- data/.idea/modules/yajl-ruby-1.4.iml +779 -0
- data/.idea/modules.xml +41 -0
- data/Gemfile +3 -0
- data/README.md +223 -0
- data/Rakefile +13 -0
- data/config/conf/fluent.conf +101 -0
- data/config/test-complete.json +73 -0
- data/config/test-start.json +73 -0
- data/events/event_full.json +206 -0
- data/events/event_invalid_dataset_facet.json +31 -0
- data/events/event_invalid_input_dataset_facet.json +29 -0
- data/events/event_invalid_job_facet.json +26 -0
- data/events/event_invalid_output_dataset_facet.json +29 -0
- data/events/event_invalid_run_facet.json +28 -0
- data/events/event_no_run_id.json +28 -0
- data/events/event_simple.json +29 -0
- data/fluentd-openlineage-parser.gemspec +28 -0
- data/lib/fluent/plugin/fluentd-openlineage-parser.rb +182 -0
- data/spec/Naming.md +500 -0
- data/spec/OpenLineage.json +304 -0
- data/spec/OpenLineage.md +179 -0
- data/spec/OpenLineage.yml +27 -0
- data/spec/OpenLineageModel.svg +1 -0
- data/spec/Versioning.md +49 -0
- data/spec/facets/ColumnLineageDatasetFacet.json +96 -0
- data/spec/facets/ColumnLineageDatasetFacet.md +106 -0
- data/spec/facets/DataQualityAssertionsDatasetFacet.json +49 -0
- data/spec/facets/DataQualityMetricsInputDatasetFacet.json +76 -0
- data/spec/facets/DatasetVersionDatasetFacet.json +31 -0
- data/spec/facets/DatasourceDatasetFacet.json +32 -0
- data/spec/facets/DocumentationDatasetFacet.json +31 -0
- data/spec/facets/DocumentationJobFacet.json +30 -0
- data/spec/facets/ErrorMessageRunFacet.json +41 -0
- data/spec/facets/ExternalQueryRunFacet.json +36 -0
- data/spec/facets/ExternalQueryRunFacet.md +49 -0
- data/spec/facets/ExtractionErrorRunFacet.json +58 -0
- data/spec/facets/JobTypeJobFacet.json +41 -0
- data/spec/facets/LifecycleStateChangeDatasetFacet.json +46 -0
- data/spec/facets/NominalTimeRunFacet.json +38 -0
- data/spec/facets/OutputStatisticsOutputDatasetFacet.json +36 -0
- data/spec/facets/OwnershipDatasetFacet.json +45 -0
- data/spec/facets/OwnershipJobFacet.json +45 -0
- data/spec/facets/ParentRunFacet.json +54 -0
- data/spec/facets/ProcessingEngineRunFacet.json +41 -0
- data/spec/facets/SQLJobFacet.json +30 -0
- data/spec/facets/SchemaDatasetFacet.json +59 -0
- data/spec/facets/SourceCodeJobFacet.json +34 -0
- data/spec/facets/SourceCodeLocationJobFacet.json +60 -0
- data/spec/facets/StorageDatasetFacet.json +35 -0
- data/spec/facets/SymlinksDatasetFacet.json +47 -0
- data/spec/registry/core/registry.json +31 -0
- data/spec/registry/gcp/facets/GcpCommonJobFacet.json +43 -0
- data/spec/registry/gcp/registry.json +6 -0
- data/spec/release.sh +80 -0
- data/spec/tests/ColumnLineageDatasetFacet/1.json +172 -0
- data/spec/tests/DataQualityAssertionsDatasetFacet/1.json +58 -0
- data/spec/tests/DataQualityMetricsInputDatasetFacet/1.json +23 -0
- data/spec/tests/DatasetVersionDatasetFacet/1.json +7 -0
- data/spec/tests/DatasourceDatasetFacet/1.json +7 -0
- data/spec/tests/DocumentationDatasetFacet/1.json +7 -0
- data/spec/tests/DocumentationJobFacet/1.json +7 -0
- data/spec/tests/ErrorMessageRunFacet/1.json +9 -0
- data/spec/tests/ExternalQueryRunFacet/1.json +8 -0
- data/spec/tests/ExtractionErrorRunFacet/1.json +15 -0
- data/spec/tests/JobTypeJobFacet/1.json +9 -0
- data/spec/tests/LifecycleStateChangeDatasetFacet/1.json +11 -0
- data/spec/tests/NominalTimeRunFacet/1.json +8 -0
- data/spec/tests/OutputStatisticsOutputDatasetFacet/1.json +9 -0
- data/spec/tests/OwnershipDatasetFacet/1.json +11 -0
- data/spec/tests/OwnershipJobFacet/1.json +11 -0
- data/spec/tests/ParentRunFacet/1.json +13 -0
- data/spec/tests/ProcessingEngineRunFacet/1.json +9 -0
- data/spec/tests/SQLJobFacet/1.json +7 -0
- data/spec/tests/SchemaDatasetFacet/1.json +92 -0
- data/spec/tests/SourceCodeJobFacet/1.json +8 -0
- data/spec/tests/SourceCodeLocationJobFacet/1.json +8 -0
- data/spec/tests/StorageDatasetFacet/1.json +8 -0
- data/spec/tests/SymlinksDatasetFacet/1.json +13 -0
- data/spec/tests/example_full_event.json +24 -0
- data/test/helper.rb +8 -0
- data/test/plugin/test_parser_openlineage.rb +141 -0
- metadata +298 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/2-0-3/JobTypeJobFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"JobTypeJobFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"processingType": {
|
14
|
+
"description": "Job processing type like: BATCH or STREAMING",
|
15
|
+
"type": "string",
|
16
|
+
"example": "BATCH"
|
17
|
+
},
|
18
|
+
"integration": {
|
19
|
+
"description": "OpenLineage integration type of this job: for example SPARK|DBT|AIRFLOW|FLINK",
|
20
|
+
"type": "string",
|
21
|
+
"example": "SPARK"
|
22
|
+
},
|
23
|
+
"jobType": {
|
24
|
+
"description": "Run type, for example: QUERY|COMMAND|DAG|TASK|JOB|MODEL. This is an integration-specific field.",
|
25
|
+
"type": "string",
|
26
|
+
"example": "QUERY"
|
27
|
+
}
|
28
|
+
},
|
29
|
+
"required": ["processingType", "integration"]
|
30
|
+
}
|
31
|
+
],
|
32
|
+
"type": "object"
|
33
|
+
}
|
34
|
+
},
|
35
|
+
"type": "object",
|
36
|
+
"properties": {
|
37
|
+
"jobType": {
|
38
|
+
"$ref": "#/$defs/JobTypeJobFacet"
|
39
|
+
}
|
40
|
+
}
|
41
|
+
}
|
@@ -0,0 +1,46 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-0-1/LifecycleStateChangeDatasetFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"LifecycleStateChangeDatasetFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"lifecycleStateChange": {
|
14
|
+
"description": "The lifecycle state change.",
|
15
|
+
"type": "string",
|
16
|
+
"enum": ["ALTER", "CREATE", "DROP", "OVERWRITE", "RENAME", "TRUNCATE"]
|
17
|
+
},
|
18
|
+
"previousIdentifier": {
|
19
|
+
"description": "Previous name of the dataset in case of renaming it.",
|
20
|
+
"type": "object",
|
21
|
+
"properties": {
|
22
|
+
"name": {
|
23
|
+
"documentation": "Previous dataset name.",
|
24
|
+
"type": "string"
|
25
|
+
},
|
26
|
+
"namespace": {
|
27
|
+
"documentation": "Previous dataset namespace.",
|
28
|
+
"type": "string"
|
29
|
+
}
|
30
|
+
},
|
31
|
+
"required": ["name", "namespace"]
|
32
|
+
}
|
33
|
+
},
|
34
|
+
"required": ["lifecycleStateChange"]
|
35
|
+
}
|
36
|
+
],
|
37
|
+
"type": "object"
|
38
|
+
}
|
39
|
+
},
|
40
|
+
"type": "object",
|
41
|
+
"properties": {
|
42
|
+
"lifecycleStateChange": {
|
43
|
+
"$ref": "#/$defs/LifecycleStateChangeDatasetFacet"
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
@@ -0,0 +1,38 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-0-1/NominalTimeRunFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"NominalTimeRunFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"nominalStartTime": {
|
14
|
+
"description": "An [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal start time (included) of the run. AKA the schedule time",
|
15
|
+
"type": "string",
|
16
|
+
"format": "date-time",
|
17
|
+
"example": "2020-12-17T03:00:00.000Z"
|
18
|
+
},
|
19
|
+
"nominalEndTime": {
|
20
|
+
"description": "An [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal end time (excluded) of the run. (Should be the nominal start time of the next run)",
|
21
|
+
"type": "string",
|
22
|
+
"format": "date-time",
|
23
|
+
"example": "2020-12-17T04:00:00.000Z"
|
24
|
+
}
|
25
|
+
},
|
26
|
+
"required": ["nominalStartTime"]
|
27
|
+
}
|
28
|
+
],
|
29
|
+
"type": "object"
|
30
|
+
}
|
31
|
+
},
|
32
|
+
"type": "object",
|
33
|
+
"properties": {
|
34
|
+
"nominalTime": {
|
35
|
+
"$ref": "#/$defs/NominalTimeRunFacet"
|
36
|
+
}
|
37
|
+
}
|
38
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-0-2/OutputStatisticsOutputDatasetFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"OutputStatisticsOutputDatasetFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/OutputDatasetFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"rowCount": {
|
14
|
+
"description": "The number of rows written to the dataset",
|
15
|
+
"type": "integer"
|
16
|
+
},
|
17
|
+
"size": {
|
18
|
+
"description": "The size in bytes written to the dataset",
|
19
|
+
"type": "integer"
|
20
|
+
},
|
21
|
+
"fileCount": {
|
22
|
+
"description": "The number of files written to the dataset",
|
23
|
+
"type": "integer"
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
27
|
+
]
|
28
|
+
}
|
29
|
+
},
|
30
|
+
"type": "object",
|
31
|
+
"properties": {
|
32
|
+
"outputStatistics": {
|
33
|
+
"$ref": "#/$defs/OutputStatisticsOutputDatasetFacet"
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-0-1/OwnershipDatasetFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"OwnershipDatasetFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"owners": {
|
14
|
+
"description": "The owners of the dataset.",
|
15
|
+
"type": "array",
|
16
|
+
"items": {
|
17
|
+
"type": "object",
|
18
|
+
"properties": {
|
19
|
+
"name": {
|
20
|
+
"description": "the identifier of the owner of the Dataset. It is recommended to define this as a URN. For example application:foo, user:jdoe, team:data",
|
21
|
+
"type": "string",
|
22
|
+
"example": "application:app_name"
|
23
|
+
},
|
24
|
+
"type": {
|
25
|
+
"description": "The type of ownership (optional)",
|
26
|
+
"type": "string",
|
27
|
+
"example": "MAINTAINER"
|
28
|
+
}
|
29
|
+
},
|
30
|
+
"required": ["name"]
|
31
|
+
}
|
32
|
+
}
|
33
|
+
}
|
34
|
+
}
|
35
|
+
],
|
36
|
+
"type": "object"
|
37
|
+
}
|
38
|
+
},
|
39
|
+
"type": "object",
|
40
|
+
"properties": {
|
41
|
+
"ownership": {
|
42
|
+
"$ref": "#/$defs/OwnershipDatasetFacet"
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-0-1/OwnershipJobFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"OwnershipJobFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"owners": {
|
14
|
+
"description": "The owners of the job.",
|
15
|
+
"type": "array",
|
16
|
+
"items": {
|
17
|
+
"type": "object",
|
18
|
+
"properties": {
|
19
|
+
"name": {
|
20
|
+
"description": "the identifier of the owner of the Job. It is recommended to define this as a URN. For example application:foo, user:jdoe, team:data",
|
21
|
+
"type": "string",
|
22
|
+
"example": "application:app_name"
|
23
|
+
},
|
24
|
+
"type": {
|
25
|
+
"description": "The type of ownership (optional)",
|
26
|
+
"type": "string",
|
27
|
+
"example": "MAINTAINER"
|
28
|
+
}
|
29
|
+
},
|
30
|
+
"required": ["name"]
|
31
|
+
}
|
32
|
+
}
|
33
|
+
}
|
34
|
+
}
|
35
|
+
],
|
36
|
+
"type": "object"
|
37
|
+
}
|
38
|
+
},
|
39
|
+
"type": "object",
|
40
|
+
"properties": {
|
41
|
+
"ownership": {
|
42
|
+
"$ref": "#/$defs/OwnershipJobFacet"
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,54 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-0-1/ParentRunFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"ParentRunFacet": {
|
6
|
+
"description": "the id of the parent run and job, iff this run was spawn from an other run (for example, the Dag run scheduling its tasks)",
|
7
|
+
"allOf": [
|
8
|
+
{
|
9
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
10
|
+
},
|
11
|
+
{
|
12
|
+
"type": "object",
|
13
|
+
"properties": {
|
14
|
+
"run": {
|
15
|
+
"type": "object",
|
16
|
+
"properties": {
|
17
|
+
"runId": {
|
18
|
+
"description": "The globally unique ID of the run associated with the job.",
|
19
|
+
"type": "string",
|
20
|
+
"format": "uuid"
|
21
|
+
}
|
22
|
+
},
|
23
|
+
"required": ["runId"]
|
24
|
+
},
|
25
|
+
"job": {
|
26
|
+
"type": "object",
|
27
|
+
"properties": {
|
28
|
+
"namespace": {
|
29
|
+
"description": "The namespace containing that job",
|
30
|
+
"type": "string",
|
31
|
+
"example": "my-scheduler-namespace"
|
32
|
+
},
|
33
|
+
"name": {
|
34
|
+
"description": "The unique name for that job within that namespace",
|
35
|
+
"type": "string",
|
36
|
+
"example": "myjob.mytask"
|
37
|
+
}
|
38
|
+
},
|
39
|
+
"required": ["namespace", "name"]
|
40
|
+
}
|
41
|
+
},
|
42
|
+
"required": ["run", "job"]
|
43
|
+
}
|
44
|
+
],
|
45
|
+
"type": "object"
|
46
|
+
}
|
47
|
+
},
|
48
|
+
"type": "object",
|
49
|
+
"properties": {
|
50
|
+
"parent": {
|
51
|
+
"$ref": "#/$defs/ParentRunFacet"
|
52
|
+
}
|
53
|
+
}
|
54
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-1-1/ProcessingEngineRunFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"ProcessingEngineRunFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"version": {
|
14
|
+
"description": "Processing engine version. Might be Airflow or Spark version.",
|
15
|
+
"type": "string",
|
16
|
+
"example": "2.5.0"
|
17
|
+
},
|
18
|
+
"name": {
|
19
|
+
"description": "Processing engine name, e.g. Airflow or Spark",
|
20
|
+
"type": "string",
|
21
|
+
"example": "Airflow"
|
22
|
+
},
|
23
|
+
"openlineageAdapterVersion": {
|
24
|
+
"description": "OpenLineage adapter package version. Might be e.g. OpenLineage Airflow integration package version",
|
25
|
+
"type": "string",
|
26
|
+
"example": "0.19.0"
|
27
|
+
}
|
28
|
+
},
|
29
|
+
"required": ["version"]
|
30
|
+
}
|
31
|
+
],
|
32
|
+
"type": "object"
|
33
|
+
}
|
34
|
+
},
|
35
|
+
"type": "object",
|
36
|
+
"properties": {
|
37
|
+
"processing_engine": {
|
38
|
+
"$ref": "#/$defs/ProcessingEngineRunFacet"
|
39
|
+
}
|
40
|
+
}
|
41
|
+
}
|
@@ -0,0 +1,30 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-0-1/SQLJobFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"SQLJobFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"query": {
|
14
|
+
"type": "string",
|
15
|
+
"example": "SELECT * FROM foo"
|
16
|
+
}
|
17
|
+
},
|
18
|
+
"required": ["query"]
|
19
|
+
}
|
20
|
+
],
|
21
|
+
"type": "object"
|
22
|
+
}
|
23
|
+
},
|
24
|
+
"type": "object",
|
25
|
+
"properties": {
|
26
|
+
"sql": {
|
27
|
+
"$ref": "#/$defs/SQLJobFacet"
|
28
|
+
}
|
29
|
+
}
|
30
|
+
}
|
@@ -0,0 +1,59 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-1-1/SchemaDatasetFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"SchemaDatasetFacetFields": {
|
6
|
+
"type": "object",
|
7
|
+
"properties": {
|
8
|
+
"name": {
|
9
|
+
"description": "The name of the field.",
|
10
|
+
"type": "string",
|
11
|
+
"example": "column1"
|
12
|
+
},
|
13
|
+
"type": {
|
14
|
+
"description": "The type of the field.",
|
15
|
+
"type": "string",
|
16
|
+
"example": "VARCHAR|INT|..."
|
17
|
+
},
|
18
|
+
"description": {
|
19
|
+
"description": "The description of the field.",
|
20
|
+
"type": "string"
|
21
|
+
},
|
22
|
+
"fields": {
|
23
|
+
"description": "Nested struct fields.",
|
24
|
+
"type": "array",
|
25
|
+
"items": {
|
26
|
+
"$ref": "#/$defs/SchemaDatasetFacetFields"
|
27
|
+
}
|
28
|
+
}
|
29
|
+
},
|
30
|
+
"required": ["name"]
|
31
|
+
},
|
32
|
+
"SchemaDatasetFacet": {
|
33
|
+
"allOf": [
|
34
|
+
{
|
35
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
|
36
|
+
},
|
37
|
+
{
|
38
|
+
"type": "object",
|
39
|
+
"properties": {
|
40
|
+
"fields": {
|
41
|
+
"description": "The fields of the data source.",
|
42
|
+
"type": "array",
|
43
|
+
"items": {
|
44
|
+
"$ref": "#/$defs/SchemaDatasetFacetFields"
|
45
|
+
}
|
46
|
+
}
|
47
|
+
}
|
48
|
+
}
|
49
|
+
],
|
50
|
+
"type": "object"
|
51
|
+
}
|
52
|
+
},
|
53
|
+
"type": "object",
|
54
|
+
"properties": {
|
55
|
+
"schema": {
|
56
|
+
"$ref": "#/$defs/SchemaDatasetFacet"
|
57
|
+
}
|
58
|
+
}
|
59
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-0-1/SourceCodeJobFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"SourceCodeJobFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"language": {
|
14
|
+
"description": "Language in which source code of this job was written.",
|
15
|
+
"type": "string"
|
16
|
+
},
|
17
|
+
"sourceCode": {
|
18
|
+
"description": "Source code of this job.",
|
19
|
+
"type": "string"
|
20
|
+
}
|
21
|
+
},
|
22
|
+
"required": ["language", "sourceCode"]
|
23
|
+
}
|
24
|
+
],
|
25
|
+
"type": "object"
|
26
|
+
}
|
27
|
+
},
|
28
|
+
"type": "object",
|
29
|
+
"properties": {
|
30
|
+
"sourceCode": {
|
31
|
+
"$ref": "#/$defs/SourceCodeJobFacet"
|
32
|
+
}
|
33
|
+
}
|
34
|
+
}
|
@@ -0,0 +1,60 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-0-1/SourceCodeLocationJobFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"SourceCodeLocationJobFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"type": {
|
14
|
+
"type": "string",
|
15
|
+
"description": "the source control system",
|
16
|
+
"example": "git|svn"
|
17
|
+
},
|
18
|
+
"url": {
|
19
|
+
"type": "string",
|
20
|
+
"description": "the full http URL to locate the file",
|
21
|
+
"format": "uri",
|
22
|
+
"example": "https://github.com/MarquezProject/marquez-airflow-quickstart/blob/693e35482bc2e526ced2b5f9f76ef83dec6ec691/dags/dummy_example.py"
|
23
|
+
},
|
24
|
+
"repoUrl": {
|
25
|
+
"type": "string",
|
26
|
+
"description": "the URL to the repository",
|
27
|
+
"example": "git@github.com:{org}/{repo}.git or https://github.com/{org}/{repo}.git|svn://<your_ip>/<repository_name>"
|
28
|
+
},
|
29
|
+
"path": {
|
30
|
+
"type": "string",
|
31
|
+
"description": "the path in the repo containing the source files",
|
32
|
+
"example": "path/to/my/dags"
|
33
|
+
},
|
34
|
+
"version": {
|
35
|
+
"type": "string",
|
36
|
+
"description": "the current version deployed (not a branch name, the actual unique version)",
|
37
|
+
"example": "git: the git sha | Svn: the revision number"
|
38
|
+
},
|
39
|
+
"tag": {
|
40
|
+
"type": "string",
|
41
|
+
"description": "optional tag name"
|
42
|
+
},
|
43
|
+
"branch": {
|
44
|
+
"type": "string",
|
45
|
+
"description": "optional branch name"
|
46
|
+
}
|
47
|
+
},
|
48
|
+
"required": ["type", "url"]
|
49
|
+
}
|
50
|
+
],
|
51
|
+
"type": "object"
|
52
|
+
}
|
53
|
+
},
|
54
|
+
"type": "object",
|
55
|
+
"properties": {
|
56
|
+
"sourceCodeLocation": {
|
57
|
+
"$ref": "#/$defs/SourceCodeLocationJobFacet"
|
58
|
+
}
|
59
|
+
}
|
60
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-0-1/StorageDatasetFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"StorageDatasetFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"storageLayer": {
|
14
|
+
"description": "Storage layer provider with allowed values: iceberg, delta.",
|
15
|
+
"type": "string"
|
16
|
+
},
|
17
|
+
"fileFormat": {
|
18
|
+
"description": "File format with allowed values: parquet, orc, avro, json, csv, text, xml.",
|
19
|
+
"type": "string"
|
20
|
+
}
|
21
|
+
},
|
22
|
+
"additionalProperties": true,
|
23
|
+
"required": ["storageLayer"]
|
24
|
+
}
|
25
|
+
],
|
26
|
+
"type": "object"
|
27
|
+
}
|
28
|
+
},
|
29
|
+
"type": "object",
|
30
|
+
"properties": {
|
31
|
+
"storage": {
|
32
|
+
"$ref": "#/$defs/StorageDatasetFacet"
|
33
|
+
}
|
34
|
+
}
|
35
|
+
}
|
@@ -0,0 +1,47 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$id": "https://openlineage.io/spec/facets/1-0-1/SymlinksDatasetFacet.json",
|
4
|
+
"$defs": {
|
5
|
+
"SymlinksDatasetFacet": {
|
6
|
+
"allOf": [
|
7
|
+
{
|
8
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
|
9
|
+
},
|
10
|
+
{
|
11
|
+
"type": "object",
|
12
|
+
"properties": {
|
13
|
+
"identifiers": {
|
14
|
+
"type": "array",
|
15
|
+
"items": {
|
16
|
+
"type": "object",
|
17
|
+
"properties": {
|
18
|
+
"namespace": {
|
19
|
+
"type": "string",
|
20
|
+
"description": "The dataset namespace"
|
21
|
+
},
|
22
|
+
"name": {
|
23
|
+
"type": "string",
|
24
|
+
"description": "The dataset name"
|
25
|
+
},
|
26
|
+
"type": {
|
27
|
+
"type": "string",
|
28
|
+
"description": "Identifier type",
|
29
|
+
"example": "table"
|
30
|
+
}
|
31
|
+
},
|
32
|
+
"required": ["namespace", "name", "type"]
|
33
|
+
}
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
],
|
38
|
+
"type": "object"
|
39
|
+
}
|
40
|
+
},
|
41
|
+
"type": "object",
|
42
|
+
"properties": {
|
43
|
+
"symlinks": {
|
44
|
+
"$ref": "#/$defs/SymlinksDatasetFacet"
|
45
|
+
}
|
46
|
+
}
|
47
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
{
|
2
|
+
"producer": {
|
3
|
+
"root_doc_url": "https://github.com/OpenLineage/OpenLineage/tree/main/spec",
|
4
|
+
"produced_facets": [
|
5
|
+
"ol:core:ColumnLineageDatasetFacet.json",
|
6
|
+
"ol:core:DataQualityAssertionsDatasetFacet.json",
|
7
|
+
"ol:core:DataQualityMetricsInputDatasetFacet.json",
|
8
|
+
"ol:core:DatasetVersionDatasetFacet.json",
|
9
|
+
"ol:core:DatasourceDatasetFacet.json",
|
10
|
+
"ol:core:DocumentationDatasetFacet.json",
|
11
|
+
"ol:core:DocumentationJobFacet.json",
|
12
|
+
"ol:core:ErrorMessageRunFacet.json",
|
13
|
+
"ol:core:ExternalQueryRunFacet.json",
|
14
|
+
"ol:core:ExtractionErrorRunFacet.json",
|
15
|
+
"ol:core:JobTypeJobFacet.json",
|
16
|
+
"ol:core:LifecycleStateChangeDatasetFacet.json",
|
17
|
+
"ol:core:NominalTimeRunFacet.json",
|
18
|
+
"ol:core:OutputStatisticsOutputDatasetFacet.json",
|
19
|
+
"ol:core:OwnershipDatasetFacet.json",
|
20
|
+
"ol:core:OwnershipJobFacet.json",
|
21
|
+
"ol:core:ParentRunFacet.json",
|
22
|
+
"ol:core:ProcessingEngineRunFacet.json",
|
23
|
+
"ol:core:SQLJobFacet.json",
|
24
|
+
"ol:core:SchemaDatasetFacet.json",
|
25
|
+
"ol:core:SourceCodeJobFacet.json",
|
26
|
+
"ol:core:SourceCodeLocationJobFacet.json",
|
27
|
+
"ol:core:StorageDatasetFacet.json",
|
28
|
+
"ol:core:SymlinksDatasetFacet.json"
|
29
|
+
]
|
30
|
+
}
|
31
|
+
}
|