fluent-plugin-openlineage 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +6 -0
  3. data/.github/workflows/linux.yml +30 -0
  4. data/.gitignore +16 -0
  5. data/.idea/.gitignore +8 -0
  6. data/.idea/fluentd.iml +204 -0
  7. data/.idea/misc.xml +4 -0
  8. data/.idea/modules/benchmark-memory-0.2.iml +12 -0
  9. data/.idea/modules/bigdecimal-3.1.iml +11 -0
  10. data/.idea/modules/certstore_c-0.1.iml +15 -0
  11. data/.idea/modules/concurrent-ruby-1.3.iml +18 -0
  12. data/.idea/modules/concurrent-ruby-1.31.iml +15 -0
  13. data/.idea/modules/connection_pool-2.4.iml +11 -0
  14. data/.idea/modules/cool.io-1.8.iml +16 -0
  15. data/.idea/modules/drb-2.2.iml +14 -0
  16. data/.idea/modules/drb-2.21.iml +11 -0
  17. data/.idea/modules/ffi-1.17.iml +20 -0
  18. data/.idea/modules/ffi-win32-extensions-1.0.iml +19 -0
  19. data/.idea/modules/fluentd-1.17.iml +43 -0
  20. data/.idea/modules/http_parser.rb-0.8.iml +17 -0
  21. data/.idea/modules/json-2.7.iml +14 -0
  22. data/.idea/modules/json-2.71.iml +11 -0
  23. data/.idea/modules/msgpack-1.7.iml +15 -0
  24. data/.idea/modules/mutex_m-0.2.iml +15 -0
  25. data/.idea/modules/new_gem.iml +15 -0
  26. data/.idea/modules/power_assert-2.0.iml +19 -0
  27. data/.idea/modules/rake-13.2.iml +18 -0
  28. data/.idea/modules/rake-13.21.iml +15 -0
  29. data/.idea/modules/rake-compiler-1.2.iml +13 -0
  30. data/.idea/modules/rusty_json_schema-0.15.iml +15 -0
  31. data/.idea/modules/serverengine-2.3.iml +17 -0
  32. data/.idea/modules/sigdump-0.2.iml +16 -0
  33. data/.idea/modules/specifications.iml +14 -0
  34. data/.idea/modules/specifications1.iml +11 -0
  35. data/.idea/modules/strptime-0.2.iml +16 -0
  36. data/.idea/modules/thermite-0.13.iml +17 -0
  37. data/.idea/modules/webrick-1.8.iml +18 -0
  38. data/.idea/modules/win32-event-0.6.iml +21 -0
  39. data/.idea/modules/win32-ipc-0.7.iml +20 -0
  40. data/.idea/modules/yajl-ruby-1.4.iml +779 -0
  41. data/.idea/modules.xml +41 -0
  42. data/.rspec +2 -0
  43. data/ChangeLog +3 -0
  44. data/Gemfile +3 -0
  45. data/LICENSE +202 -0
  46. data/README.md +250 -0
  47. data/Rakefile +13 -0
  48. data/fluent-plugin-openlineage.gemspec +28 -0
  49. data/lib/fluent/plugin/parser_openlineage.rb +182 -0
  50. data/misc/fluent.conf +101 -0
  51. data/misc/test-complete.json +73 -0
  52. data/misc/test-start.json +73 -0
  53. data/spec/Naming.md +500 -0
  54. data/spec/OpenLineage.json +304 -0
  55. data/spec/Versioning.md +49 -0
  56. data/spec/events/event_full.json +206 -0
  57. data/spec/events/event_invalid_dataset_facet.json +31 -0
  58. data/spec/events/event_invalid_input_dataset_facet.json +29 -0
  59. data/spec/events/event_invalid_job_facet.json +26 -0
  60. data/spec/events/event_invalid_output_dataset_facet.json +29 -0
  61. data/spec/events/event_invalid_run_facet.json +28 -0
  62. data/spec/events/event_no_run_id.json +28 -0
  63. data/spec/events/event_simple.json +29 -0
  64. data/spec/facets/ColumnLineageDatasetFacet.json +96 -0
  65. data/spec/facets/ColumnLineageDatasetFacet.md +106 -0
  66. data/spec/facets/DataQualityAssertionsDatasetFacet.json +49 -0
  67. data/spec/facets/DataQualityMetricsInputDatasetFacet.json +76 -0
  68. data/spec/facets/DatasetVersionDatasetFacet.json +31 -0
  69. data/spec/facets/DatasourceDatasetFacet.json +32 -0
  70. data/spec/facets/DocumentationDatasetFacet.json +31 -0
  71. data/spec/facets/DocumentationJobFacet.json +30 -0
  72. data/spec/facets/ErrorMessageRunFacet.json +41 -0
  73. data/spec/facets/ExternalQueryRunFacet.json +36 -0
  74. data/spec/facets/ExternalQueryRunFacet.md +49 -0
  75. data/spec/facets/ExtractionErrorRunFacet.json +58 -0
  76. data/spec/facets/JobTypeJobFacet.json +41 -0
  77. data/spec/facets/LifecycleStateChangeDatasetFacet.json +46 -0
  78. data/spec/facets/NominalTimeRunFacet.json +38 -0
  79. data/spec/facets/OutputStatisticsOutputDatasetFacet.json +36 -0
  80. data/spec/facets/OwnershipDatasetFacet.json +45 -0
  81. data/spec/facets/OwnershipJobFacet.json +45 -0
  82. data/spec/facets/ParentRunFacet.json +54 -0
  83. data/spec/facets/ProcessingEngineRunFacet.json +41 -0
  84. data/spec/facets/SQLJobFacet.json +30 -0
  85. data/spec/facets/SchemaDatasetFacet.json +59 -0
  86. data/spec/facets/SourceCodeJobFacet.json +34 -0
  87. data/spec/facets/SourceCodeLocationJobFacet.json +60 -0
  88. data/spec/facets/StorageDatasetFacet.json +35 -0
  89. data/spec/facets/SymlinksDatasetFacet.json +47 -0
  90. data/spec/fluent/plugin/test_parser_openlineage.rb +141 -0
  91. data/spec/registry/core/registry.json +31 -0
  92. data/spec/registry/gcp/facets/GcpCommonJobFacet.json +43 -0
  93. data/spec/registry/gcp/registry.json +6 -0
  94. data/spec/spec_helper.rb +8 -0
  95. data/spec/tests/ColumnLineageDatasetFacet/1.json +172 -0
  96. data/spec/tests/DataQualityAssertionsDatasetFacet/1.json +58 -0
  97. data/spec/tests/DataQualityMetricsInputDatasetFacet/1.json +23 -0
  98. data/spec/tests/DatasetVersionDatasetFacet/1.json +7 -0
  99. data/spec/tests/DatasourceDatasetFacet/1.json +7 -0
  100. data/spec/tests/DocumentationDatasetFacet/1.json +7 -0
  101. data/spec/tests/DocumentationJobFacet/1.json +7 -0
  102. data/spec/tests/ErrorMessageRunFacet/1.json +9 -0
  103. data/spec/tests/ExternalQueryRunFacet/1.json +8 -0
  104. data/spec/tests/ExtractionErrorRunFacet/1.json +15 -0
  105. data/spec/tests/JobTypeJobFacet/1.json +9 -0
  106. data/spec/tests/LifecycleStateChangeDatasetFacet/1.json +11 -0
  107. data/spec/tests/NominalTimeRunFacet/1.json +8 -0
  108. data/spec/tests/OutputStatisticsOutputDatasetFacet/1.json +9 -0
  109. data/spec/tests/OwnershipDatasetFacet/1.json +11 -0
  110. data/spec/tests/OwnershipJobFacet/1.json +11 -0
  111. data/spec/tests/ParentRunFacet/1.json +13 -0
  112. data/spec/tests/ProcessingEngineRunFacet/1.json +9 -0
  113. data/spec/tests/SQLJobFacet/1.json +7 -0
  114. data/spec/tests/SchemaDatasetFacet/1.json +92 -0
  115. data/spec/tests/SourceCodeJobFacet/1.json +8 -0
  116. data/spec/tests/SourceCodeLocationJobFacet/1.json +8 -0
  117. data/spec/tests/StorageDatasetFacet/1.json +8 -0
  118. data/spec/tests/SymlinksDatasetFacet/1.json +13 -0
  119. data/spec/tests/example_full_event.json +24 -0
  120. metadata +188 -3
@@ -0,0 +1,49 @@
1
+ # External Query Run
2
+
3
+ The [external query run facet](ExternalQueryRunFacet.json) captures the identifier of the query that ran on an external
4
+ source systems such as [bigquery](https://cloud.google.com/bigquery).
5
+
6
+ _externalQueryId_ and _source_ fields.
7
+
8
+ - _externalQueryId_: unique identifier of the query from the source system.
9
+ - _source_: type of the source (e.g. bigquery)
10
+
11
+ Full lineage event example:
12
+
13
+ ```
14
+ {
15
+ "eventType": "START",
16
+ "eventTime": "2020-12-09T23:37:31.081Z",
17
+ "run": {
18
+ "runId": "3b452093-782c-4ef2-9c0c-aafe2aa6f34d",
19
+ "facets": {
20
+ > "externalQuery": {
21
+ > "externalQueryId": "my-project-1234:US.bquijob_123x456_123y123z123c",
22
+ > "source": "bigquery"
23
+ > }
24
+ }
25
+ },
26
+ "job": {
27
+ "namespace": "my-namespace",
28
+ "name": "myjob.mytask",
29
+ },
30
+ "inputs": [
31
+ {
32
+ "namespace": "my-datasource-namespace",
33
+ "name": "instance.schema.table",
34
+ }
35
+ ],
36
+ "outputs": [
37
+ {
38
+ "namespace": "my-datasource-namespace",
39
+ "name": "instance.schema.output_table",
40
+ }
41
+ ],
42
+ "schemaURL": "https://openlineage.io/spec/1-0-0/OpenLineage.json#/definitions/RunEvent"
43
+ }
44
+ ```
45
+
46
+ ---
47
+
48
+ SPDX-License-Identifier: Apache-2.0\
49
+ Copyright 2018-2024 contributors to the OpenLineage project
@@ -0,0 +1,58 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-1-2/ExtractionErrorRunFacet.json",
4
+ "$defs": {
5
+ "ExtractionErrorRunFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "totalTasks": {
14
+ "description": "The number of distinguishable tasks in a run that were processed by OpenLineage, whether successfully or not. Those could be, for example, distinct SQL statements.",
15
+ "type": "integer"
16
+ },
17
+ "failedTasks": {
18
+ "description": "The number of distinguishable tasks in a run that were processed not successfully by OpenLineage. Those could be, for example, distinct SQL statements.",
19
+ "type": "integer"
20
+ },
21
+ "errors": {
22
+ "type": "array",
23
+ "items": {
24
+ "type": "object",
25
+ "properties": {
26
+ "errorMessage": {
27
+ "description": "Text representation of extraction error message.",
28
+ "type": "string"
29
+ },
30
+ "stackTrace": {
31
+ "description": "Stack trace of extraction error message",
32
+ "type": "string"
33
+ },
34
+ "task": {
35
+ "description": "Text representation of task that failed. This can be, for example, SQL statement that parser could not interpret.",
36
+ "type": "string"
37
+ },
38
+ "taskNumber": {
39
+ "description": "Order of task (counted from 0).",
40
+ "type": "integer"
41
+ }
42
+ },
43
+ "required": ["errorMessage"]
44
+ }
45
+ }
46
+ },
47
+ "required": ["totalTasks", "failedTasks", "errors"]
48
+ }
49
+ ]
50
+ }
51
+ },
52
+ "type": "object",
53
+ "properties": {
54
+ "extractionError": {
55
+ "$ref": "#/$defs/ExtractionErrorRunFacet"
56
+ }
57
+ }
58
+ }
@@ -0,0 +1,41 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/2-0-3/JobTypeJobFacet.json",
4
+ "$defs": {
5
+ "JobTypeJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "processingType": {
14
+ "description": "Job processing type like: BATCH or STREAMING",
15
+ "type": "string",
16
+ "example": "BATCH"
17
+ },
18
+ "integration": {
19
+ "description": "OpenLineage integration type of this job: for example SPARK|DBT|AIRFLOW|FLINK",
20
+ "type": "string",
21
+ "example": "SPARK"
22
+ },
23
+ "jobType": {
24
+ "description": "Run type, for example: QUERY|COMMAND|DAG|TASK|JOB|MODEL. This is an integration-specific field.",
25
+ "type": "string",
26
+ "example": "QUERY"
27
+ }
28
+ },
29
+ "required": ["processingType", "integration"]
30
+ }
31
+ ],
32
+ "type": "object"
33
+ }
34
+ },
35
+ "type": "object",
36
+ "properties": {
37
+ "jobType": {
38
+ "$ref": "#/$defs/JobTypeJobFacet"
39
+ }
40
+ }
41
+ }
@@ -0,0 +1,46 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/LifecycleStateChangeDatasetFacet.json",
4
+ "$defs": {
5
+ "LifecycleStateChangeDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "lifecycleStateChange": {
14
+ "description": "The lifecycle state change.",
15
+ "type": "string",
16
+ "enum": ["ALTER", "CREATE", "DROP", "OVERWRITE", "RENAME", "TRUNCATE"]
17
+ },
18
+ "previousIdentifier": {
19
+ "description": "Previous name of the dataset in case of renaming it.",
20
+ "type": "object",
21
+ "properties": {
22
+ "name": {
23
+ "documentation": "Previous dataset name.",
24
+ "type": "string"
25
+ },
26
+ "namespace": {
27
+ "documentation": "Previous dataset namespace.",
28
+ "type": "string"
29
+ }
30
+ },
31
+ "required": ["name", "namespace"]
32
+ }
33
+ },
34
+ "required": ["lifecycleStateChange"]
35
+ }
36
+ ],
37
+ "type": "object"
38
+ }
39
+ },
40
+ "type": "object",
41
+ "properties": {
42
+ "lifecycleStateChange": {
43
+ "$ref": "#/$defs/LifecycleStateChangeDatasetFacet"
44
+ }
45
+ }
46
+ }
@@ -0,0 +1,38 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/NominalTimeRunFacet.json",
4
+ "$defs": {
5
+ "NominalTimeRunFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "nominalStartTime": {
14
+ "description": "An [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal start time (included) of the run. AKA the schedule time",
15
+ "type": "string",
16
+ "format": "date-time",
17
+ "example": "2020-12-17T03:00:00.000Z"
18
+ },
19
+ "nominalEndTime": {
20
+ "description": "An [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal end time (excluded) of the run. (Should be the nominal start time of the next run)",
21
+ "type": "string",
22
+ "format": "date-time",
23
+ "example": "2020-12-17T04:00:00.000Z"
24
+ }
25
+ },
26
+ "required": ["nominalStartTime"]
27
+ }
28
+ ],
29
+ "type": "object"
30
+ }
31
+ },
32
+ "type": "object",
33
+ "properties": {
34
+ "nominalTime": {
35
+ "$ref": "#/$defs/NominalTimeRunFacet"
36
+ }
37
+ }
38
+ }
@@ -0,0 +1,36 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-2/OutputStatisticsOutputDatasetFacet.json",
4
+ "$defs": {
5
+ "OutputStatisticsOutputDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/OutputDatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "rowCount": {
14
+ "description": "The number of rows written to the dataset",
15
+ "type": "integer"
16
+ },
17
+ "size": {
18
+ "description": "The size in bytes written to the dataset",
19
+ "type": "integer"
20
+ },
21
+ "fileCount": {
22
+ "description": "The number of files written to the dataset",
23
+ "type": "integer"
24
+ }
25
+ }
26
+ }
27
+ ]
28
+ }
29
+ },
30
+ "type": "object",
31
+ "properties": {
32
+ "outputStatistics": {
33
+ "$ref": "#/$defs/OutputStatisticsOutputDatasetFacet"
34
+ }
35
+ }
36
+ }
@@ -0,0 +1,45 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/OwnershipDatasetFacet.json",
4
+ "$defs": {
5
+ "OwnershipDatasetFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "owners": {
14
+ "description": "The owners of the dataset.",
15
+ "type": "array",
16
+ "items": {
17
+ "type": "object",
18
+ "properties": {
19
+ "name": {
20
+ "description": "the identifier of the owner of the Dataset. It is recommended to define this as a URN. For example application:foo, user:jdoe, team:data",
21
+ "type": "string",
22
+ "example": "application:app_name"
23
+ },
24
+ "type": {
25
+ "description": "The type of ownership (optional)",
26
+ "type": "string",
27
+ "example": "MAINTAINER"
28
+ }
29
+ },
30
+ "required": ["name"]
31
+ }
32
+ }
33
+ }
34
+ }
35
+ ],
36
+ "type": "object"
37
+ }
38
+ },
39
+ "type": "object",
40
+ "properties": {
41
+ "ownership": {
42
+ "$ref": "#/$defs/OwnershipDatasetFacet"
43
+ }
44
+ }
45
+ }
@@ -0,0 +1,45 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/OwnershipJobFacet.json",
4
+ "$defs": {
5
+ "OwnershipJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "owners": {
14
+ "description": "The owners of the job.",
15
+ "type": "array",
16
+ "items": {
17
+ "type": "object",
18
+ "properties": {
19
+ "name": {
20
+ "description": "the identifier of the owner of the Job. It is recommended to define this as a URN. For example application:foo, user:jdoe, team:data",
21
+ "type": "string",
22
+ "example": "application:app_name"
23
+ },
24
+ "type": {
25
+ "description": "The type of ownership (optional)",
26
+ "type": "string",
27
+ "example": "MAINTAINER"
28
+ }
29
+ },
30
+ "required": ["name"]
31
+ }
32
+ }
33
+ }
34
+ }
35
+ ],
36
+ "type": "object"
37
+ }
38
+ },
39
+ "type": "object",
40
+ "properties": {
41
+ "ownership": {
42
+ "$ref": "#/$defs/OwnershipJobFacet"
43
+ }
44
+ }
45
+ }
@@ -0,0 +1,54 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/ParentRunFacet.json",
4
+ "$defs": {
5
+ "ParentRunFacet": {
6
+ "description": "the id of the parent run and job, iff this run was spawn from an other run (for example, the Dag run scheduling its tasks)",
7
+ "allOf": [
8
+ {
9
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
10
+ },
11
+ {
12
+ "type": "object",
13
+ "properties": {
14
+ "run": {
15
+ "type": "object",
16
+ "properties": {
17
+ "runId": {
18
+ "description": "The globally unique ID of the run associated with the job.",
19
+ "type": "string",
20
+ "format": "uuid"
21
+ }
22
+ },
23
+ "required": ["runId"]
24
+ },
25
+ "job": {
26
+ "type": "object",
27
+ "properties": {
28
+ "namespace": {
29
+ "description": "The namespace containing that job",
30
+ "type": "string",
31
+ "example": "my-scheduler-namespace"
32
+ },
33
+ "name": {
34
+ "description": "The unique name for that job within that namespace",
35
+ "type": "string",
36
+ "example": "myjob.mytask"
37
+ }
38
+ },
39
+ "required": ["namespace", "name"]
40
+ }
41
+ },
42
+ "required": ["run", "job"]
43
+ }
44
+ ],
45
+ "type": "object"
46
+ }
47
+ },
48
+ "type": "object",
49
+ "properties": {
50
+ "parent": {
51
+ "$ref": "#/$defs/ParentRunFacet"
52
+ }
53
+ }
54
+ }
@@ -0,0 +1,41 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-1-1/ProcessingEngineRunFacet.json",
4
+ "$defs": {
5
+ "ProcessingEngineRunFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "version": {
14
+ "description": "Processing engine version. Might be Airflow or Spark version.",
15
+ "type": "string",
16
+ "example": "2.5.0"
17
+ },
18
+ "name": {
19
+ "description": "Processing engine name, e.g. Airflow or Spark",
20
+ "type": "string",
21
+ "example": "Airflow"
22
+ },
23
+ "openlineageAdapterVersion": {
24
+ "description": "OpenLineage adapter package version. Might be e.g. OpenLineage Airflow integration package version",
25
+ "type": "string",
26
+ "example": "0.19.0"
27
+ }
28
+ },
29
+ "required": ["version"]
30
+ }
31
+ ],
32
+ "type": "object"
33
+ }
34
+ },
35
+ "type": "object",
36
+ "properties": {
37
+ "processing_engine": {
38
+ "$ref": "#/$defs/ProcessingEngineRunFacet"
39
+ }
40
+ }
41
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/SQLJobFacet.json",
4
+ "$defs": {
5
+ "SQLJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "query": {
14
+ "type": "string",
15
+ "example": "SELECT * FROM foo"
16
+ }
17
+ },
18
+ "required": ["query"]
19
+ }
20
+ ],
21
+ "type": "object"
22
+ }
23
+ },
24
+ "type": "object",
25
+ "properties": {
26
+ "sql": {
27
+ "$ref": "#/$defs/SQLJobFacet"
28
+ }
29
+ }
30
+ }
@@ -0,0 +1,59 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-1-1/SchemaDatasetFacet.json",
4
+ "$defs": {
5
+ "SchemaDatasetFacetFields": {
6
+ "type": "object",
7
+ "properties": {
8
+ "name": {
9
+ "description": "The name of the field.",
10
+ "type": "string",
11
+ "example": "column1"
12
+ },
13
+ "type": {
14
+ "description": "The type of the field.",
15
+ "type": "string",
16
+ "example": "VARCHAR|INT|..."
17
+ },
18
+ "description": {
19
+ "description": "The description of the field.",
20
+ "type": "string"
21
+ },
22
+ "fields": {
23
+ "description": "Nested struct fields.",
24
+ "type": "array",
25
+ "items": {
26
+ "$ref": "#/$defs/SchemaDatasetFacetFields"
27
+ }
28
+ }
29
+ },
30
+ "required": ["name"]
31
+ },
32
+ "SchemaDatasetFacet": {
33
+ "allOf": [
34
+ {
35
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/DatasetFacet"
36
+ },
37
+ {
38
+ "type": "object",
39
+ "properties": {
40
+ "fields": {
41
+ "description": "The fields of the data source.",
42
+ "type": "array",
43
+ "items": {
44
+ "$ref": "#/$defs/SchemaDatasetFacetFields"
45
+ }
46
+ }
47
+ }
48
+ }
49
+ ],
50
+ "type": "object"
51
+ }
52
+ },
53
+ "type": "object",
54
+ "properties": {
55
+ "schema": {
56
+ "$ref": "#/$defs/SchemaDatasetFacet"
57
+ }
58
+ }
59
+ }
@@ -0,0 +1,34 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/SourceCodeJobFacet.json",
4
+ "$defs": {
5
+ "SourceCodeJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "language": {
14
+ "description": "Language in which source code of this job was written.",
15
+ "type": "string"
16
+ },
17
+ "sourceCode": {
18
+ "description": "Source code of this job.",
19
+ "type": "string"
20
+ }
21
+ },
22
+ "required": ["language", "sourceCode"]
23
+ }
24
+ ],
25
+ "type": "object"
26
+ }
27
+ },
28
+ "type": "object",
29
+ "properties": {
30
+ "sourceCode": {
31
+ "$ref": "#/$defs/SourceCodeJobFacet"
32
+ }
33
+ }
34
+ }
@@ -0,0 +1,60 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/facets/1-0-1/SourceCodeLocationJobFacet.json",
4
+ "$defs": {
5
+ "SourceCodeLocationJobFacet": {
6
+ "allOf": [
7
+ {
8
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
9
+ },
10
+ {
11
+ "type": "object",
12
+ "properties": {
13
+ "type": {
14
+ "type": "string",
15
+ "description": "the source control system",
16
+ "example": "git|svn"
17
+ },
18
+ "url": {
19
+ "type": "string",
20
+ "description": "the full http URL to locate the file",
21
+ "format": "uri",
22
+ "example": "https://github.com/MarquezProject/marquez-airflow-quickstart/blob/693e35482bc2e526ced2b5f9f76ef83dec6ec691/dags/dummy_example.py"
23
+ },
24
+ "repoUrl": {
25
+ "type": "string",
26
+ "description": "the URL to the repository",
27
+ "example": "git@github.com:{org}/{repo}.git or https://github.com/{org}/{repo}.git|svn://<your_ip>/<repository_name>"
28
+ },
29
+ "path": {
30
+ "type": "string",
31
+ "description": "the path in the repo containing the source files",
32
+ "example": "path/to/my/dags"
33
+ },
34
+ "version": {
35
+ "type": "string",
36
+ "description": "the current version deployed (not a branch name, the actual unique version)",
37
+ "example": "git: the git sha | Svn: the revision number"
38
+ },
39
+ "tag": {
40
+ "type": "string",
41
+ "description": "optional tag name"
42
+ },
43
+ "branch": {
44
+ "type": "string",
45
+ "description": "optional branch name"
46
+ }
47
+ },
48
+ "required": ["type", "url"]
49
+ }
50
+ ],
51
+ "type": "object"
52
+ }
53
+ },
54
+ "type": "object",
55
+ "properties": {
56
+ "sourceCodeLocation": {
57
+ "$ref": "#/$defs/SourceCodeLocationJobFacet"
58
+ }
59
+ }
60
+ }