fluentd-openlineage-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. checksums.yaml +7 -0
  2. data/.idea/.gitignore +8 -0
  3. data/.idea/fluentd.iml +204 -0
  4. data/.idea/misc.xml +4 -0
  5. data/.idea/modules/benchmark-memory-0.2.iml +12 -0
  6. data/.idea/modules/bigdecimal-3.1.iml +11 -0
  7. data/.idea/modules/certstore_c-0.1.iml +15 -0
  8. data/.idea/modules/concurrent-ruby-1.3.iml +18 -0
  9. data/.idea/modules/concurrent-ruby-1.31.iml +15 -0
  10. data/.idea/modules/connection_pool-2.4.iml +11 -0
  11. data/.idea/modules/cool.io-1.8.iml +16 -0
  12. data/.idea/modules/drb-2.2.iml +14 -0
  13. data/.idea/modules/drb-2.21.iml +11 -0
  14. data/.idea/modules/ffi-1.17.iml +20 -0
  15. data/.idea/modules/ffi-win32-extensions-1.0.iml +19 -0
  16. data/.idea/modules/fluentd-1.17.iml +43 -0
  17. data/.idea/modules/http_parser.rb-0.8.iml +17 -0
  18. data/.idea/modules/json-2.7.iml +14 -0
  19. data/.idea/modules/json-2.71.iml +11 -0
  20. data/.idea/modules/msgpack-1.7.iml +15 -0
  21. data/.idea/modules/mutex_m-0.2.iml +15 -0
  22. data/.idea/modules/new_gem.iml +15 -0
  23. data/.idea/modules/power_assert-2.0.iml +19 -0
  24. data/.idea/modules/rake-13.2.iml +18 -0
  25. data/.idea/modules/rake-13.21.iml +15 -0
  26. data/.idea/modules/rake-compiler-1.2.iml +13 -0
  27. data/.idea/modules/rusty_json_schema-0.15.iml +15 -0
  28. data/.idea/modules/serverengine-2.3.iml +17 -0
  29. data/.idea/modules/sigdump-0.2.iml +16 -0
  30. data/.idea/modules/specifications.iml +14 -0
  31. data/.idea/modules/specifications1.iml +11 -0
  32. data/.idea/modules/strptime-0.2.iml +16 -0
  33. data/.idea/modules/thermite-0.13.iml +17 -0
  34. data/.idea/modules/webrick-1.8.iml +18 -0
  35. data/.idea/modules/win32-event-0.6.iml +21 -0
  36. data/.idea/modules/win32-ipc-0.7.iml +20 -0
  37. data/.idea/modules/yajl-ruby-1.4.iml +779 -0
  38. data/.idea/modules.xml +41 -0
  39. data/Gemfile +3 -0
  40. data/README.md +223 -0
  41. data/Rakefile +13 -0
  42. data/config/conf/fluent.conf +101 -0
  43. data/config/test-complete.json +73 -0
  44. data/config/test-start.json +73 -0
  45. data/events/event_full.json +206 -0
  46. data/events/event_invalid_dataset_facet.json +31 -0
  47. data/events/event_invalid_input_dataset_facet.json +29 -0
  48. data/events/event_invalid_job_facet.json +26 -0
  49. data/events/event_invalid_output_dataset_facet.json +29 -0
  50. data/events/event_invalid_run_facet.json +28 -0
  51. data/events/event_no_run_id.json +28 -0
  52. data/events/event_simple.json +29 -0
  53. data/fluentd-openlineage-parser.gemspec +28 -0
  54. data/lib/fluent/plugin/fluentd-openlineage-parser.rb +182 -0
  55. data/spec/Naming.md +500 -0
  56. data/spec/OpenLineage.json +304 -0
  57. data/spec/OpenLineage.md +179 -0
  58. data/spec/OpenLineage.yml +27 -0
  59. data/spec/OpenLineageModel.svg +1 -0
  60. data/spec/Versioning.md +49 -0
  61. data/spec/facets/ColumnLineageDatasetFacet.json +96 -0
  62. data/spec/facets/ColumnLineageDatasetFacet.md +106 -0
  63. data/spec/facets/DataQualityAssertionsDatasetFacet.json +49 -0
  64. data/spec/facets/DataQualityMetricsInputDatasetFacet.json +76 -0
  65. data/spec/facets/DatasetVersionDatasetFacet.json +31 -0
  66. data/spec/facets/DatasourceDatasetFacet.json +32 -0
  67. data/spec/facets/DocumentationDatasetFacet.json +31 -0
  68. data/spec/facets/DocumentationJobFacet.json +30 -0
  69. data/spec/facets/ErrorMessageRunFacet.json +41 -0
  70. data/spec/facets/ExternalQueryRunFacet.json +36 -0
  71. data/spec/facets/ExternalQueryRunFacet.md +49 -0
  72. data/spec/facets/ExtractionErrorRunFacet.json +58 -0
  73. data/spec/facets/JobTypeJobFacet.json +41 -0
  74. data/spec/facets/LifecycleStateChangeDatasetFacet.json +46 -0
  75. data/spec/facets/NominalTimeRunFacet.json +38 -0
  76. data/spec/facets/OutputStatisticsOutputDatasetFacet.json +36 -0
  77. data/spec/facets/OwnershipDatasetFacet.json +45 -0
  78. data/spec/facets/OwnershipJobFacet.json +45 -0
  79. data/spec/facets/ParentRunFacet.json +54 -0
  80. data/spec/facets/ProcessingEngineRunFacet.json +41 -0
  81. data/spec/facets/SQLJobFacet.json +30 -0
  82. data/spec/facets/SchemaDatasetFacet.json +59 -0
  83. data/spec/facets/SourceCodeJobFacet.json +34 -0
  84. data/spec/facets/SourceCodeLocationJobFacet.json +60 -0
  85. data/spec/facets/StorageDatasetFacet.json +35 -0
  86. data/spec/facets/SymlinksDatasetFacet.json +47 -0
  87. data/spec/registry/core/registry.json +31 -0
  88. data/spec/registry/gcp/facets/GcpCommonJobFacet.json +43 -0
  89. data/spec/registry/gcp/registry.json +6 -0
  90. data/spec/release.sh +80 -0
  91. data/spec/tests/ColumnLineageDatasetFacet/1.json +172 -0
  92. data/spec/tests/DataQualityAssertionsDatasetFacet/1.json +58 -0
  93. data/spec/tests/DataQualityMetricsInputDatasetFacet/1.json +23 -0
  94. data/spec/tests/DatasetVersionDatasetFacet/1.json +7 -0
  95. data/spec/tests/DatasourceDatasetFacet/1.json +7 -0
  96. data/spec/tests/DocumentationDatasetFacet/1.json +7 -0
  97. data/spec/tests/DocumentationJobFacet/1.json +7 -0
  98. data/spec/tests/ErrorMessageRunFacet/1.json +9 -0
  99. data/spec/tests/ExternalQueryRunFacet/1.json +8 -0
  100. data/spec/tests/ExtractionErrorRunFacet/1.json +15 -0
  101. data/spec/tests/JobTypeJobFacet/1.json +9 -0
  102. data/spec/tests/LifecycleStateChangeDatasetFacet/1.json +11 -0
  103. data/spec/tests/NominalTimeRunFacet/1.json +8 -0
  104. data/spec/tests/OutputStatisticsOutputDatasetFacet/1.json +9 -0
  105. data/spec/tests/OwnershipDatasetFacet/1.json +11 -0
  106. data/spec/tests/OwnershipJobFacet/1.json +11 -0
  107. data/spec/tests/ParentRunFacet/1.json +13 -0
  108. data/spec/tests/ProcessingEngineRunFacet/1.json +9 -0
  109. data/spec/tests/SQLJobFacet/1.json +7 -0
  110. data/spec/tests/SchemaDatasetFacet/1.json +92 -0
  111. data/spec/tests/SourceCodeJobFacet/1.json +8 -0
  112. data/spec/tests/SourceCodeLocationJobFacet/1.json +8 -0
  113. data/spec/tests/StorageDatasetFacet/1.json +8 -0
  114. data/spec/tests/SymlinksDatasetFacet/1.json +13 -0
  115. data/spec/tests/example_full_event.json +24 -0
  116. data/test/helper.rb +8 -0
  117. data/test/plugin/test_parser_openlineage.rb +141 -0
  118. metadata +298 -0
@@ -0,0 +1,41 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/2-0-3/JobTypeJobFacet.json",
4
+ "$defs": {
5
+ "JobTypeJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "processingType": {
14
+ "description": "Job processing type like: BATCH or STREAMING",
15
+ "type": "string",
16
+ "example": "BATCH"
17
+ },
18
+ "integration": {
19
+ "description": "OpenLineage integration type of this job: for example SPARK|DBT|AIRFLOW|FLINK",
20
+ "type": "string",
21
+ "example": "SPARK"
22
+ },
23
+ "jobType": {
24
+ "description": "Run type, for example: QUERY|COMMAND|DAG|TASK|JOB|MODEL. This is an integration-specific field.",
25
+ "type": "string",
26
+ "example": "QUERY"
27
+ }
28
+ },
29
+ "required": ["processingType", "integration"]
30
+ }
31
+ ],
32
+ "type": "object"
33
+ }
34
+ },
35
+ "type": "object",
36
+ "properties": {
37
+ "jobType": {
38
+ "$ref": "#/$defs/JobTypeJobFacet"
39
+ }
40
+ }
41
+ }
@@ -0,0 +1,46 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/LifecycleStateChangeDatasetFacet.json",
4
+ "$defs": {
5
+ "LifecycleStateChangeDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "lifecycleStateChange": {
14
+ "description": "The lifecycle state change.",
15
+ "type": "string",
16
+ "enum": ["ALTER", "CREATE", "DROP", "OVERWRITE", "RENAME", "TRUNCATE"]
17
+ },
18
+ "previousIdentifier": {
19
+ "description": "Previous name of the dataset in case of renaming it.",
20
+ "type": "object",
21
+ "properties": {
22
+ "name": {
23
+ "documentation": "Previous dataset name.",
24
+ "type": "string"
25
+ },
26
+ "namespace": {
27
+ "documentation": "Previous dataset namespace.",
28
+ "type": "string"
29
+ }
30
+ },
31
+ "required": ["name", "namespace"]
32
+ }
33
+ },
34
+ "required": ["lifecycleStateChange"]
35
+ }
36
+ ],
37
+ "type": "object"
38
+ }
39
+ },
40
+ "type": "object",
41
+ "properties": {
42
+ "lifecycleStateChange": {
43
+ "$ref": "#/$defs/LifecycleStateChangeDatasetFacet"
44
+ }
45
+ }
46
+ }
@@ -0,0 +1,38 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/NominalTimeRunFacet.json",
4
+ "$defs": {
5
+ "NominalTimeRunFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "nominalStartTime": {
14
+ "description": "An [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal start time (included) of the run. AKA the schedule time",
15
+ "type": "string",
16
+ "format": "date-time",
17
+ "example": "2020-12-17T03:00:00.000Z"
18
+ },
19
+ "nominalEndTime": {
20
+ "description": "An [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal end time (excluded) of the run. (Should be the nominal start time of the next run)",
21
+ "type": "string",
22
+ "format": "date-time",
23
+ "example": "2020-12-17T04:00:00.000Z"
24
+ }
25
+ },
26
+ "required": ["nominalStartTime"]
27
+ }
28
+ ],
29
+ "type": "object"
30
+ }
31
+ },
32
+ "type": "object",
33
+ "properties": {
34
+ "nominalTime": {
35
+ "$ref": "#/$defs/NominalTimeRunFacet"
36
+ }
37
+ }
38
+ }
@@ -0,0 +1,36 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-2/OutputStatisticsOutputDatasetFacet.json",
4
+ "$defs": {
5
+ "OutputStatisticsOutputDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/OutputDatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "rowCount": {
14
+ "description": "The number of rows written to the dataset",
15
+ "type": "integer"
16
+ },
17
+ "size": {
18
+ "description": "The size in bytes written to the dataset",
19
+ "type": "integer"
20
+ },
21
+ "fileCount": {
22
+ "description": "The number of files written to the dataset",
23
+ "type": "integer"
24
+ }
25
+ }
26
+ }
27
+ ]
28
+ }
29
+ },
30
+ "type": "object",
31
+ "properties": {
32
+ "outputStatistics": {
33
+ "$ref": "#/$defs/OutputStatisticsOutputDatasetFacet"
34
+ }
35
+ }
36
+ }
@@ -0,0 +1,45 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/OwnershipDatasetFacet.json",
4
+ "$defs": {
5
+ "OwnershipDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "owners": {
14
+ "description": "The owners of the dataset.",
15
+ "type": "array",
16
+ "items": {
17
+ "type": "object",
18
+ "properties": {
19
+ "name": {
20
+ "description": "the identifier of the owner of the Dataset. It is recommended to define this as a URN. For example application:foo, user:jdoe, team:data",
21
+ "type": "string",
22
+ "example": "application:app_name"
23
+ },
24
+ "type": {
25
+ "description": "The type of ownership (optional)",
26
+ "type": "string",
27
+ "example": "MAINTAINER"
28
+ }
29
+ },
30
+ "required": ["name"]
31
+ }
32
+ }
33
+ }
34
+ }
35
+ ],
36
+ "type": "object"
37
+ }
38
+ },
39
+ "type": "object",
40
+ "properties": {
41
+ "ownership": {
42
+ "$ref": "#/$defs/OwnershipDatasetFacet"
43
+ }
44
+ }
45
+ }
@@ -0,0 +1,45 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/OwnershipJobFacet.json",
4
+ "$defs": {
5
+ "OwnershipJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "owners": {
14
+ "description": "The owners of the job.",
15
+ "type": "array",
16
+ "items": {
17
+ "type": "object",
18
+ "properties": {
19
+ "name": {
20
+ "description": "the identifier of the owner of the Job. It is recommended to define this as a URN. For example application:foo, user:jdoe, team:data",
21
+ "type": "string",
22
+ "example": "application:app_name"
23
+ },
24
+ "type": {
25
+ "description": "The type of ownership (optional)",
26
+ "type": "string",
27
+ "example": "MAINTAINER"
28
+ }
29
+ },
30
+ "required": ["name"]
31
+ }
32
+ }
33
+ }
34
+ }
35
+ ],
36
+ "type": "object"
37
+ }
38
+ },
39
+ "type": "object",
40
+ "properties": {
41
+ "ownership": {
42
+ "$ref": "#/$defs/OwnershipJobFacet"
43
+ }
44
+ }
45
+ }
@@ -0,0 +1,54 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/ParentRunFacet.json",
4
+ "$defs": {
5
+ "ParentRunFacet": {
6
+ "description": "the id of the parent run and job, iff this run was spawn from an other run (for example, the Dag run scheduling its tasks)",
7
+ "allOf": [
8
+ {
9
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
10
+ },
11
+ {
12
+ "type": "object",
13
+ "properties": {
14
+ "run": {
15
+ "type": "object",
16
+ "properties": {
17
+ "runId": {
18
+ "description": "The globally unique ID of the run associated with the job.",
19
+ "type": "string",
20
+ "format": "uuid"
21
+ }
22
+ },
23
+ "required": ["runId"]
24
+ },
25
+ "job": {
26
+ "type": "object",
27
+ "properties": {
28
+ "namespace": {
29
+ "description": "The namespace containing that job",
30
+ "type": "string",
31
+ "example": "my-scheduler-namespace"
32
+ },
33
+ "name": {
34
+ "description": "The unique name for that job within that namespace",
35
+ "type": "string",
36
+ "example": "myjob.mytask"
37
+ }
38
+ },
39
+ "required": ["namespace", "name"]
40
+ }
41
+ },
42
+ "required": ["run", "job"]
43
+ }
44
+ ],
45
+ "type": "object"
46
+ }
47
+ },
48
+ "type": "object",
49
+ "properties": {
50
+ "parent": {
51
+ "$ref": "#/$defs/ParentRunFacet"
52
+ }
53
+ }
54
+ }
@@ -0,0 +1,41 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-1-1/ProcessingEngineRunFacet.json",
4
+ "$defs": {
5
+ "ProcessingEngineRunFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "version": {
14
+ "description": "Processing engine version. Might be Airflow or Spark version.",
15
+ "type": "string",
16
+ "example": "2.5.0"
17
+ },
18
+ "name": {
19
+ "description": "Processing engine name, e.g. Airflow or Spark",
20
+ "type": "string",
21
+ "example": "Airflow"
22
+ },
23
+ "openlineageAdapterVersion": {
24
+ "description": "OpenLineage adapter package version. Might be e.g. OpenLineage Airflow integration package version",
25
+ "type": "string",
26
+ "example": "0.19.0"
27
+ }
28
+ },
29
+ "required": ["version"]
30
+ }
31
+ ],
32
+ "type": "object"
33
+ }
34
+ },
35
+ "type": "object",
36
+ "properties": {
37
+ "processing_engine": {
38
+ "$ref": "#/$defs/ProcessingEngineRunFacet"
39
+ }
40
+ }
41
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/SQLJobFacet.json",
4
+ "$defs": {
5
+ "SQLJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "query": {
14
+ "type": "string",
15
+ "example": "SELECT * FROM foo"
16
+ }
17
+ },
18
+ "required": ["query"]
19
+ }
20
+ ],
21
+ "type": "object"
22
+ }
23
+ },
24
+ "type": "object",
25
+ "properties": {
26
+ "sql": {
27
+ "$ref": "#/$defs/SQLJobFacet"
28
+ }
29
+ }
30
+ }
@@ -0,0 +1,59 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-1-1/SchemaDatasetFacet.json",
4
+ "$defs": {
5
+ "SchemaDatasetFacetFields": {
6
+ "type": "object",
7
+ "properties": {
8
+ "name": {
9
+ "description": "The name of the field.",
10
+ "type": "string",
11
+ "example": "column1"
12
+ },
13
+ "type": {
14
+ "description": "The type of the field.",
15
+ "type": "string",
16
+ "example": "VARCHAR|INT|..."
17
+ },
18
+ "description": {
19
+ "description": "The description of the field.",
20
+ "type": "string"
21
+ },
22
+ "fields": {
23
+ "description": "Nested struct fields.",
24
+ "type": "array",
25
+ "items": {
26
+ "$ref": "#/$defs/SchemaDatasetFacetFields"
27
+ }
28
+ }
29
+ },
30
+ "required": ["name"]
31
+ },
32
+ "SchemaDatasetFacet": {
33
+ "allOf": [
34
+ {
35
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
36
+ },
37
+ {
38
+ "type": "object",
39
+ "properties": {
40
+ "fields": {
41
+ "description": "The fields of the data source.",
42
+ "type": "array",
43
+ "items": {
44
+ "$ref": "#/$defs/SchemaDatasetFacetFields"
45
+ }
46
+ }
47
+ }
48
+ }
49
+ ],
50
+ "type": "object"
51
+ }
52
+ },
53
+ "type": "object",
54
+ "properties": {
55
+ "schema": {
56
+ "$ref": "#/$defs/SchemaDatasetFacet"
57
+ }
58
+ }
59
+ }
@@ -0,0 +1,34 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/SourceCodeJobFacet.json",
4
+ "$defs": {
5
+ "SourceCodeJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "language": {
14
+ "description": "Language in which source code of this job was written.",
15
+ "type": "string"
16
+ },
17
+ "sourceCode": {
18
+ "description": "Source code of this job.",
19
+ "type": "string"
20
+ }
21
+ },
22
+ "required": ["language", "sourceCode"]
23
+ }
24
+ ],
25
+ "type": "object"
26
+ }
27
+ },
28
+ "type": "object",
29
+ "properties": {
30
+ "sourceCode": {
31
+ "$ref": "#/$defs/SourceCodeJobFacet"
32
+ }
33
+ }
34
+ }
@@ -0,0 +1,60 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/SourceCodeLocationJobFacet.json",
4
+ "$defs": {
5
+ "SourceCodeLocationJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "type": {
14
+ "type": "string",
15
+ "description": "the source control system",
16
+ "example": "git|svn"
17
+ },
18
+ "url": {
19
+ "type": "string",
20
+ "description": "the full http URL to locate the file",
21
+ "format": "uri",
22
+ "example": "https://github.com/MarquezProject/marquez-airflow-quickstart/blob/693e35482bc2e526ced2b5f9f76ef83dec6ec691/dags/dummy_example.py"
23
+ },
24
+ "repoUrl": {
25
+ "type": "string",
26
+ "description": "the URL to the repository",
27
+ "example": "git@github.com:{org}/{repo}.git or https://github.com/{org}/{repo}.git|svn://<your_ip>/<repository_name>"
28
+ },
29
+ "path": {
30
+ "type": "string",
31
+ "description": "the path in the repo containing the source files",
32
+ "example": "path/to/my/dags"
33
+ },
34
+ "version": {
35
+ "type": "string",
36
+ "description": "the current version deployed (not a branch name, the actual unique version)",
37
+ "example": "git: the git sha | Svn: the revision number"
38
+ },
39
+ "tag": {
40
+ "type": "string",
41
+ "description": "optional tag name"
42
+ },
43
+ "branch": {
44
+ "type": "string",
45
+ "description": "optional branch name"
46
+ }
47
+ },
48
+ "required": ["type", "url"]
49
+ }
50
+ ],
51
+ "type": "object"
52
+ }
53
+ },
54
+ "type": "object",
55
+ "properties": {
56
+ "sourceCodeLocation": {
57
+ "$ref": "#/$defs/SourceCodeLocationJobFacet"
58
+ }
59
+ }
60
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/StorageDatasetFacet.json",
4
+ "$defs": {
5
+ "StorageDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "storageLayer": {
14
+ "description": "Storage layer provider with allowed values: iceberg, delta.",
15
+ "type": "string"
16
+ },
17
+ "fileFormat": {
18
+ "description": "File format with allowed values: parquet, orc, avro, json, csv, text, xml.",
19
+ "type": "string"
20
+ }
21
+ },
22
+ "additionalProperties": true,
23
+ "required": ["storageLayer"]
24
+ }
25
+ ],
26
+ "type": "object"
27
+ }
28
+ },
29
+ "type": "object",
30
+ "properties": {
31
+ "storage": {
32
+ "$ref": "#/$defs/StorageDatasetFacet"
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,47 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/SymlinksDatasetFacet.json",
4
+ "$defs": {
5
+ "SymlinksDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "identifiers": {
14
+ "type": "array",
15
+ "items": {
16
+ "type": "object",
17
+ "properties": {
18
+ "namespace": {
19
+ "type": "string",
20
+ "description": "The dataset namespace"
21
+ },
22
+ "name": {
23
+ "type": "string",
24
+ "description": "The dataset name"
25
+ },
26
+ "type": {
27
+ "type": "string",
28
+ "description": "Identifier type",
29
+ "example": "table"
30
+ }
31
+ },
32
+ "required": ["namespace", "name", "type"]
33
+ }
34
+ }
35
+ }
36
+ }
37
+ ],
38
+ "type": "object"
39
+ }
40
+ },
41
+ "type": "object",
42
+ "properties": {
43
+ "symlinks": {
44
+ "$ref": "#/$defs/SymlinksDatasetFacet"
45
+ }
46
+ }
47
+ }
@@ -0,0 +1,31 @@
1
+ {
2
+ "producer": {
3
+ "root_doc_url": "https://github.com/OpenLineage/OpenLineage/tree/main/spec",
4
+ "produced_facets": [
5
+ "ol:core:ColumnLineageDatasetFacet.json",
6
+ "ol:core:DataQualityAssertionsDatasetFacet.json",
7
+ "ol:core:DataQualityMetricsInputDatasetFacet.json",
8
+ "ol:core:DatasetVersionDatasetFacet.json",
9
+ "ol:core:DatasourceDatasetFacet.json",
10
+ "ol:core:DocumentationDatasetFacet.json",
11
+ "ol:core:DocumentationJobFacet.json",
12
+ "ol:core:ErrorMessageRunFacet.json",
13
+ "ol:core:ExternalQueryRunFacet.json",
14
+ "ol:core:ExtractionErrorRunFacet.json",
15
+ "ol:core:JobTypeJobFacet.json",
16
+ "ol:core:LifecycleStateChangeDatasetFacet.json",
17
+ "ol:core:NominalTimeRunFacet.json",
18
+ "ol:core:OutputStatisticsOutputDatasetFacet.json",
19
+ "ol:core:OwnershipDatasetFacet.json",
20
+ "ol:core:OwnershipJobFacet.json",
21
+ "ol:core:ParentRunFacet.json",
22
+ "ol:core:ProcessingEngineRunFacet.json",
23
+ "ol:core:SQLJobFacet.json",
24
+ "ol:core:SchemaDatasetFacet.json",
25
+ "ol:core:SourceCodeJobFacet.json",
26
+ "ol:core:SourceCodeLocationJobFacet.json",
27
+ "ol:core:StorageDatasetFacet.json",
28
+ "ol:core:SymlinksDatasetFacet.json"
29
+ ]
30
+ }
31
+ }