fluent-plugin-openlineage 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +6 -0
  3. data/.github/workflows/linux.yml +30 -0
  4. data/.gitignore +16 -0
  5. data/.idea/.gitignore +8 -0
  6. data/.idea/fluentd.iml +204 -0
  7. data/.idea/misc.xml +4 -0
  8. data/.idea/modules/benchmark-memory-0.2.iml +12 -0
  9. data/.idea/modules/bigdecimal-3.1.iml +11 -0
  10. data/.idea/modules/certstore_c-0.1.iml +15 -0
  11. data/.idea/modules/concurrent-ruby-1.3.iml +18 -0
  12. data/.idea/modules/concurrent-ruby-1.31.iml +15 -0
  13. data/.idea/modules/connection_pool-2.4.iml +11 -0
  14. data/.idea/modules/cool.io-1.8.iml +16 -0
  15. data/.idea/modules/drb-2.2.iml +14 -0
  16. data/.idea/modules/drb-2.21.iml +11 -0
  17. data/.idea/modules/ffi-1.17.iml +20 -0
  18. data/.idea/modules/ffi-win32-extensions-1.0.iml +19 -0
  19. data/.idea/modules/fluentd-1.17.iml +43 -0
  20. data/.idea/modules/http_parser.rb-0.8.iml +17 -0
  21. data/.idea/modules/json-2.7.iml +14 -0
  22. data/.idea/modules/json-2.71.iml +11 -0
  23. data/.idea/modules/msgpack-1.7.iml +15 -0
  24. data/.idea/modules/mutex_m-0.2.iml +15 -0
  25. data/.idea/modules/new_gem.iml +15 -0
  26. data/.idea/modules/power_assert-2.0.iml +19 -0
  27. data/.idea/modules/rake-13.2.iml +18 -0
  28. data/.idea/modules/rake-13.21.iml +15 -0
  29. data/.idea/modules/rake-compiler-1.2.iml +13 -0
  30. data/.idea/modules/rusty_json_schema-0.15.iml +15 -0
  31. data/.idea/modules/serverengine-2.3.iml +17 -0
  32. data/.idea/modules/sigdump-0.2.iml +16 -0
  33. data/.idea/modules/specifications.iml +14 -0
  34. data/.idea/modules/specifications1.iml +11 -0
  35. data/.idea/modules/strptime-0.2.iml +16 -0
  36. data/.idea/modules/thermite-0.13.iml +17 -0
  37. data/.idea/modules/webrick-1.8.iml +18 -0
  38. data/.idea/modules/win32-event-0.6.iml +21 -0
  39. data/.idea/modules/win32-ipc-0.7.iml +20 -0
  40. data/.idea/modules/yajl-ruby-1.4.iml +779 -0
  41. data/.idea/modules.xml +41 -0
  42. data/.rspec +2 -0
  43. data/ChangeLog +3 -0
  44. data/Gemfile +3 -0
  45. data/LICENSE +202 -0
  46. data/README.md +250 -0
  47. data/Rakefile +13 -0
  48. data/fluent-plugin-openlineage.gemspec +28 -0
  49. data/lib/fluent/plugin/parser_openlineage.rb +182 -0
  50. data/misc/fluent.conf +101 -0
  51. data/misc/test-complete.json +73 -0
  52. data/misc/test-start.json +73 -0
  53. data/spec/Naming.md +500 -0
  54. data/spec/OpenLineage.json +304 -0
  55. data/spec/Versioning.md +49 -0
  56. data/spec/events/event_full.json +206 -0
  57. data/spec/events/event_invalid_dataset_facet.json +31 -0
  58. data/spec/events/event_invalid_input_dataset_facet.json +29 -0
  59. data/spec/events/event_invalid_job_facet.json +26 -0
  60. data/spec/events/event_invalid_output_dataset_facet.json +29 -0
  61. data/spec/events/event_invalid_run_facet.json +28 -0
  62. data/spec/events/event_no_run_id.json +28 -0
  63. data/spec/events/event_simple.json +29 -0
  64. data/spec/facets/ColumnLineageDatasetFacet.json +96 -0
  65. data/spec/facets/ColumnLineageDatasetFacet.md +106 -0
  66. data/spec/facets/DataQualityAssertionsDatasetFacet.json +49 -0
  67. data/spec/facets/DataQualityMetricsInputDatasetFacet.json +76 -0
  68. data/spec/facets/DatasetVersionDatasetFacet.json +31 -0
  69. data/spec/facets/DatasourceDatasetFacet.json +32 -0
  70. data/spec/facets/DocumentationDatasetFacet.json +31 -0
  71. data/spec/facets/DocumentationJobFacet.json +30 -0
  72. data/spec/facets/ErrorMessageRunFacet.json +41 -0
  73. data/spec/facets/ExternalQueryRunFacet.json +36 -0
  74. data/spec/facets/ExternalQueryRunFacet.md +49 -0
  75. data/spec/facets/ExtractionErrorRunFacet.json +58 -0
  76. data/spec/facets/JobTypeJobFacet.json +41 -0
  77. data/spec/facets/LifecycleStateChangeDatasetFacet.json +46 -0
  78. data/spec/facets/NominalTimeRunFacet.json +38 -0
  79. data/spec/facets/OutputStatisticsOutputDatasetFacet.json +36 -0
  80. data/spec/facets/OwnershipDatasetFacet.json +45 -0
  81. data/spec/facets/OwnershipJobFacet.json +45 -0
  82. data/spec/facets/ParentRunFacet.json +54 -0
  83. data/spec/facets/ProcessingEngineRunFacet.json +41 -0
  84. data/spec/facets/SQLJobFacet.json +30 -0
  85. data/spec/facets/SchemaDatasetFacet.json +59 -0
  86. data/spec/facets/SourceCodeJobFacet.json +34 -0
  87. data/spec/facets/SourceCodeLocationJobFacet.json +60 -0
  88. data/spec/facets/StorageDatasetFacet.json +35 -0
  89. data/spec/facets/SymlinksDatasetFacet.json +47 -0
  90. data/spec/fluent/plugin/test_parser_openlineage.rb +141 -0
  91. data/spec/registry/core/registry.json +31 -0
  92. data/spec/registry/gcp/facets/GcpCommonJobFacet.json +43 -0
  93. data/spec/registry/gcp/registry.json +6 -0
  94. data/spec/spec_helper.rb +8 -0
  95. data/spec/tests/ColumnLineageDatasetFacet/1.json +172 -0
  96. data/spec/tests/DataQualityAssertionsDatasetFacet/1.json +58 -0
  97. data/spec/tests/DataQualityMetricsInputDatasetFacet/1.json +23 -0
  98. data/spec/tests/DatasetVersionDatasetFacet/1.json +7 -0
  99. data/spec/tests/DatasourceDatasetFacet/1.json +7 -0
  100. data/spec/tests/DocumentationDatasetFacet/1.json +7 -0
  101. data/spec/tests/DocumentationJobFacet/1.json +7 -0
  102. data/spec/tests/ErrorMessageRunFacet/1.json +9 -0
  103. data/spec/tests/ExternalQueryRunFacet/1.json +8 -0
  104. data/spec/tests/ExtractionErrorRunFacet/1.json +15 -0
  105. data/spec/tests/JobTypeJobFacet/1.json +9 -0
  106. data/spec/tests/LifecycleStateChangeDatasetFacet/1.json +11 -0
  107. data/spec/tests/NominalTimeRunFacet/1.json +8 -0
  108. data/spec/tests/OutputStatisticsOutputDatasetFacet/1.json +9 -0
  109. data/spec/tests/OwnershipDatasetFacet/1.json +11 -0
  110. data/spec/tests/OwnershipJobFacet/1.json +11 -0
  111. data/spec/tests/ParentRunFacet/1.json +13 -0
  112. data/spec/tests/ProcessingEngineRunFacet/1.json +9 -0
  113. data/spec/tests/SQLJobFacet/1.json +7 -0
  114. data/spec/tests/SchemaDatasetFacet/1.json +92 -0
  115. data/spec/tests/SourceCodeJobFacet/1.json +8 -0
  116. data/spec/tests/SourceCodeLocationJobFacet/1.json +8 -0
  117. data/spec/tests/StorageDatasetFacet/1.json +8 -0
  118. data/spec/tests/SymlinksDatasetFacet/1.json +13 -0
  119. data/spec/tests/example_full_event.json +24 -0
  120. metadata +188 -3
@@ -0,0 +1,304 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openlineage.io/spec/2-0-2/OpenLineage.json",
4
+ "$defs": {
5
+ "BaseEvent": {
6
+ "type": "object",
7
+ "properties": {
8
+ "eventTime": {
9
+ "description": "the time the event occurred at",
10
+ "type": "string",
11
+ "format": "date-time"
12
+ },
13
+ "producer": {
14
+ "description": "URI identifying the producer of this metadata. For example this could be a git url with a given tag or sha",
15
+ "type": "string",
16
+ "format": "uri",
17
+ "example": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client"
18
+ },
19
+ "schemaURL": {
20
+ "description": "The JSON Pointer (https://tools.ietf.org/html/rfc6901) URL to the corresponding version of the schema definition for this RunEvent",
21
+ "type": "string",
22
+ "format": "uri",
23
+ "example": "https://openlineage.io/spec/0-0-1/OpenLineage.json"
24
+ }
25
+ },
26
+ "required": ["eventTime", "producer", "schemaURL"]
27
+ },
28
+ "RunEvent": {
29
+ "allOf": [
30
+ { "$ref": "#/$defs/BaseEvent" },
31
+ {
32
+ "type": "object",
33
+ "properties": {
34
+ "eventType": {
35
+ "description": "the current transition of the run state. It is required to issue 1 START event and 1 of [ COMPLETE, ABORT, FAIL ] event per run. Additional events with OTHER eventType can be added to the same run. For example to send additional metadata after the run is complete",
36
+ "type": "string",
37
+ "enum": ["START", "RUNNING", "COMPLETE", "ABORT", "FAIL", "OTHER"],
38
+ "example": "START|RUNNING|COMPLETE|ABORT|FAIL|OTHER"
39
+ },
40
+ "run": {
41
+ "$ref": "#/$defs/Run"
42
+ },
43
+ "job": {
44
+ "$ref": "#/$defs/Job"
45
+ },
46
+ "inputs": {
47
+ "description": "The set of **input** datasets.",
48
+ "type": "array",
49
+ "items": {
50
+ "$ref": "#/$defs/InputDataset"
51
+ }
52
+ },
53
+ "outputs": {
54
+ "description": "The set of **output** datasets.",
55
+ "type": "array",
56
+ "items": {
57
+ "$ref": "#/$defs/OutputDataset"
58
+ }
59
+ }
60
+ },
61
+ "required": ["run", "job"]
62
+ }
63
+ ]
64
+ },
65
+ "DatasetEvent": {
66
+ "allOf": [
67
+ { "$ref": "#/$defs/BaseEvent" },
68
+ {
69
+ "type": "object",
70
+ "properties": {
71
+ "dataset": {
72
+ "$ref": "#/$defs/StaticDataset"
73
+ }
74
+ },
75
+ "required": ["dataset"],
76
+ "not": { "required": ["job", "run"] }
77
+ }
78
+ ]
79
+ },
80
+ "JobEvent": {
81
+ "allOf": [
82
+ { "$ref": "#/$defs/BaseEvent" },
83
+ {
84
+ "type": "object",
85
+ "properties": {
86
+ "job": {
87
+ "$ref": "#/$defs/Job"
88
+ },
89
+ "inputs": {
90
+ "description": "The set of **input** datasets.",
91
+ "type": "array",
92
+ "items": {
93
+ "$ref": "#/$defs/InputDataset"
94
+ }
95
+ },
96
+ "outputs": {
97
+ "description": "The set of **output** datasets.",
98
+ "type": "array",
99
+ "items": {
100
+ "$ref": "#/$defs/OutputDataset"
101
+ }
102
+ }
103
+ },
104
+ "required": ["job"],
105
+ "not": { "required": ["run"] }
106
+ }
107
+ ]
108
+ },
109
+ "Run": {
110
+ "type": "object",
111
+ "properties": {
112
+ "runId": {
113
+ "description": "The globally unique ID of the run associated with the job.",
114
+ "type": "string",
115
+ "format": "uuid"
116
+ },
117
+ "facets": {
118
+ "description": "The run facets.",
119
+ "type": "object",
120
+ "anyOf": [
121
+ {
122
+ "type": "object",
123
+ "additionalProperties": { "$ref": "#/$defs/RunFacet" }
124
+ }
125
+ ]
126
+ }
127
+ },
128
+ "required": ["runId"]
129
+ },
130
+ "RunFacet": {
131
+ "description": "A Run Facet",
132
+ "type": "object",
133
+ "allOf": [{ "$ref": "#/$defs/BaseFacet" }]
134
+ },
135
+ "Job": {
136
+ "type": "object",
137
+ "properties": {
138
+ "namespace": {
139
+ "description": "The namespace containing that job",
140
+ "type": "string",
141
+ "example": "my-scheduler-namespace"
142
+ },
143
+ "name": {
144
+ "description": "The unique name for that job within that namespace",
145
+ "type": "string",
146
+ "example": "myjob.mytask"
147
+ },
148
+ "facets": {
149
+ "description": "The job facets.",
150
+ "type": "object",
151
+ "anyOf": [
152
+ {
153
+ "type": "object",
154
+ "additionalProperties": { "$ref": "#/$defs/JobFacet" }
155
+ }
156
+ ]
157
+ }
158
+ },
159
+ "required": ["namespace", "name"]
160
+ },
161
+ "JobFacet": {
162
+ "description": "A Job Facet",
163
+ "type": "object",
164
+ "allOf": [
165
+ { "$ref": "#/$defs/BaseFacet" },
166
+ {
167
+ "type": "object",
168
+ "properties": {
169
+ "_deleted": {
170
+ "description": "set to true to delete a facet",
171
+ "type": "boolean"
172
+ }
173
+ }
174
+ }
175
+ ]
176
+ },
177
+ "InputDataset": {
178
+ "description": "An input dataset",
179
+ "type": "object",
180
+ "allOf": [
181
+ { "$ref": "#/$defs/Dataset" },
182
+ {
183
+ "type": "object",
184
+ "properties": {
185
+ "inputFacets": {
186
+ "description": "The input facets for this dataset.",
187
+ "type": "object",
188
+ "anyOf": [
189
+ {
190
+ "type": "object",
191
+ "additionalProperties": {
192
+ "$ref": "#/$defs/InputDatasetFacet"
193
+ }
194
+ }
195
+ ]
196
+ }
197
+ }
198
+ }
199
+ ]
200
+ },
201
+ "InputDatasetFacet": {
202
+ "description": "An Input Dataset Facet",
203
+ "type": "object",
204
+ "allOf": [{ "$ref": "#/$defs/BaseFacet" }]
205
+ },
206
+ "OutputDataset": {
207
+ "description": "An output dataset",
208
+ "type": "object",
209
+ "allOf": [
210
+ { "$ref": "#/$defs/Dataset" },
211
+ {
212
+ "type": "object",
213
+ "properties": {
214
+ "outputFacets": {
215
+ "description": "The output facets for this dataset",
216
+ "type": "object",
217
+ "anyOf": [
218
+ {
219
+ "type": "object",
220
+ "additionalProperties": {
221
+ "$ref": "#/$defs/OutputDatasetFacet"
222
+ }
223
+ }
224
+ ]
225
+ }
226
+ }
227
+ }
228
+ ]
229
+ },
230
+ "OutputDatasetFacet": {
231
+ "description": "An Output Dataset Facet",
232
+ "type": "object",
233
+ "allOf": [{ "$ref": "#/$defs/BaseFacet" }]
234
+ },
235
+ "Dataset": {
236
+ "type": "object",
237
+ "properties": {
238
+ "namespace": {
239
+ "description": "The namespace containing that dataset",
240
+ "type": "string",
241
+ "example": "my-datasource-namespace"
242
+ },
243
+ "name": {
244
+ "description": "The unique name for that dataset within that namespace",
245
+ "type": "string",
246
+ "example": "instance.schema.table"
247
+ },
248
+ "facets": {
249
+ "description": "The facets for this dataset",
250
+ "type": "object",
251
+ "anyOf": [
252
+ {
253
+ "type": "object",
254
+ "additionalProperties": { "$ref": "#/$defs/DatasetFacet" }
255
+ }
256
+ ]
257
+ }
258
+ },
259
+ "required": ["namespace", "name"]
260
+ },
261
+ "StaticDataset": {
262
+ "description": "A Dataset sent within static metadata events",
263
+ "type": "object",
264
+ "allOf": [{ "$ref": "#/$defs/Dataset" }]
265
+ },
266
+ "DatasetFacet": {
267
+ "description": "A Dataset Facet",
268
+ "type": "object",
269
+ "allOf": [
270
+ { "$ref": "#/$defs/BaseFacet" },
271
+ {
272
+ "type": "object",
273
+ "properties": {
274
+ "_deleted": {
275
+ "description": "set to true to delete a facet",
276
+ "type": "boolean"
277
+ }
278
+ }
279
+ }
280
+ ]
281
+ },
282
+ "BaseFacet": {
283
+ "description": "all fields of the base facet are prefixed with _ to avoid name conflicts in facets",
284
+ "type": "object",
285
+ "properties": {
286
+ "_producer": {
287
+ "description": "URI identifying the producer of this metadata. For example this could be a git url with a given tag or sha",
288
+ "type": "string",
289
+ "format": "uri",
290
+ "example": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client"
291
+ },
292
+ "_schemaURL": {
293
+ "description": "The JSON Pointer (https://tools.ietf.org/html/rfc6901) URL to the corresponding version of the schema definition for this facet",
294
+ "type": "string",
295
+ "format": "uri",
296
+ "example": "https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/BaseFacet"
297
+ }
298
+ },
299
+ "additionalProperties": true,
300
+ "required": ["_producer", "_schemaURL"]
301
+ }
302
+ },
303
+ "oneOf": [{ "$ref": "#/$defs/RunEvent" }, { "$ref": "#/$defs/DatasetEvent" }, { "$ref": "#/$defs/JobEvent" }]
304
+ }
@@ -0,0 +1,49 @@
1
+ # Versioning
2
+
3
+ ## Context
4
+
5
+ The OpenLineage spec gets versioned and published.
6
+
7
+ Requirements:
8
+
9
+ - The OpenLineage spec and related libraries are in the OpenLineage repo.
10
+ - The OpenLineage spec version changes only when the spec itself changes.
11
+ - The libraries in the repo change more frequently than the spec (including when the spec changes).
12
+ - We want to version the OpenLineage spec independently of the API spec.
13
+ - The mechanism to version and publish the OpenLineage core spec applies to publishing custom facets.
14
+
15
+ ## Mechanism
16
+
17
+ - The spec defines its current version using the `“$id”` field:
18
+ - See:
19
+ - [json schema core doc](https://json-schema.org/draft/2020-12/json-schema-core.html#rfc.section.8.2.1)
20
+ - [Json schema spec $id](https://json-schema.org/draft/2019-09/schema)
21
+ - [also on github](https://github.com/json-schema-org/json-schema-spec/blob/draft-next/meta/core.json)
22
+ - Example: `"$id": "https://openlineage.io/spec/1-0-0/OpenLineage.json"` The URL in $id is resolvable and returns that
23
+ version of the spec. We use github pages to publish the spec to openlineage.io
24
+ - The $id urls uses a SEMVER compliant version, following the
25
+ [SCHEMAVER semantics](https://docs.snowplowanalytics.com/docs/pipeline-components-and-applications/iglu/common-architecture/schemaver/)
26
+ MODEL-REVISION-ADDITION
27
+ - MODEL when you make a breaking schema change which will prevent interaction with any historical data
28
+ - REVISION when you introduce a schema change which may prevent interaction with some historical data
29
+ - ADDITION when you make a schema change that is compatible with all historical data
30
+
31
+ ## Implementation Plan
32
+
33
+ - CI verifies that:
34
+ - The $id field has the right domain prefix.
35
+ - The version changes when the spec changes: When resolving “$id”, the build fails if the spec is not exactly the
36
+ same.
37
+ - The version does not change when the spec does not change. We can verify that the current version of the spec is not
38
+ already published with a different version.
39
+ - Libraries are generating events with the current version.
40
+ - The spec is backward-compatible (only add optional fields) and consistent with the versioning semantics.
41
+ - git pre commit: Increments the versions automatically when the spec changes.
42
+ - Spec publication:
43
+ - CI publishes to github pages when the $id changes on main (when this particular URL does not exist yet).
44
+ - CI tags main with OpenLineage.json-{version}.
45
+
46
+ ---
47
+
48
+ SPDX-License-Identifier: Apache-2.0\
49
+ Copyright 2018-2024 contributors to the OpenLineage project
@@ -0,0 +1,206 @@
1
+ {
2
+ "eventType": "COMPLETE",
3
+ "eventTime": "2020-12-28T19:51:01.641Z",
4
+ "run": {
5
+ "runId": "ea041791-68bc-4ae1-bd89-4c8106a157e4",
6
+ "facets": {
7
+ "nominalTime": {
8
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
9
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
10
+ "nominalStartTime": "2020-12-17T03:00:00.001Z",
11
+ "nominalEndTime": "2020-12-17T04:00:00.001Z"
12
+ },
13
+ "parent": {
14
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
15
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
16
+ "run": {
17
+ "runId": "3f5e83fa-3480-44ff-99c5-ff943904e5e8"
18
+ },
19
+ "job": {
20
+ "namespace": "my-scheduler-namespace",
21
+ "name": "myjob.mytask"
22
+ }
23
+ },
24
+ "additionalProp1": {
25
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
26
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
27
+ "additionalProp1": {}
28
+ },
29
+ "additionalProp2": {
30
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
31
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
32
+ "additionalProp1": {}
33
+ },
34
+ "additionalProp3": {
35
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
36
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
37
+ "additionalProp1": {}
38
+ }
39
+ }
40
+ },
41
+ "job": {
42
+ "namespace": "my-scheduler-namespace",
43
+ "name": "myjob.mytask",
44
+ "facets": {
45
+ "documentation": {
46
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
47
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
48
+ "description": "string"
49
+ },
50
+ "sourceCodeLocation": {
51
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
52
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
53
+ "type": "git",
54
+ "url": "http://example.com"
55
+ },
56
+ "sql": {
57
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
58
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
59
+ "additionalPropExample": {
60
+ "example": true
61
+ },
62
+ "query": "SELECT * FROM foo"
63
+ },
64
+ "additionalProp1": {
65
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
66
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
67
+ "additionalProp1": {}
68
+ },
69
+ "additionalProp2": {
70
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
71
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
72
+ "additionalProp1": {}
73
+ },
74
+ "additionalProp3": {
75
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
76
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
77
+ "additionalProp1": {}
78
+ }
79
+ }
80
+ },
81
+ "inputs": [
82
+ {
83
+ "namespace": "my-datasource-namespace",
84
+ "name": "instance.schema.table",
85
+ "inputFacets": {
86
+ "dataQualityMetrics": {
87
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
88
+ "_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/DataQualityMetricsInputDatasetFacet",
89
+ "rowCount": 1000,
90
+ "bytes": 1048576,
91
+ "fileCount": 5
92
+ },
93
+ "dataQualityAssertions": {
94
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
95
+ "_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/DataQualityAssertionsDatasetFacet",
96
+ "assertions": [
97
+ {
98
+ "assertion": "row_count_equal_to",
99
+ "success": true
100
+ },
101
+ {
102
+ "assertion": "no_null_values",
103
+ "success": true,
104
+ "column": "id"
105
+ }
106
+ ]
107
+ }
108
+ },
109
+ "facets": {
110
+ "documentation": {
111
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
112
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
113
+ "description": "canonical representation of entity Foo"
114
+ },
115
+ "schema": {
116
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
117
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
118
+ "fields": [
119
+ {
120
+ "name": "column1",
121
+ "type": "VARCHAR",
122
+ "description": "string"
123
+ }
124
+ ]
125
+ },
126
+ "dataSource": {
127
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
128
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
129
+ "name": "string",
130
+ "uri": "string"
131
+ },
132
+ "additionalProp1": {
133
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
134
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
135
+ "additionalProp1": {}
136
+ },
137
+ "additionalProp2": {
138
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
139
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
140
+ "additionalProp1": {}
141
+ },
142
+ "additionalProp3": {
143
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
144
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
145
+ "additionalProp1": {}
146
+ }
147
+ }
148
+ }
149
+ ],
150
+ "outputs": [
151
+ {
152
+ "namespace": "my-datasource-namespace",
153
+ "name": "instance.schema.table",
154
+ "outputFacets": {
155
+ "outputStatistics": {
156
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
157
+ "_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/OutputStatisticsOutputDatasetFacet",
158
+ "rowCount": 2000,
159
+ "size": 2097152,
160
+ "fileCount": 5
161
+ }
162
+ },
163
+ "facets": {
164
+ "documentation": {
165
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
166
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
167
+ "description": "canonical representation of entity Foo"
168
+ },
169
+ "schema": {
170
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
171
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
172
+ "fields": [
173
+ {
174
+ "name": "column1",
175
+ "type": "VARCHAR",
176
+ "description": "string"
177
+ }
178
+ ]
179
+ },
180
+ "dataSource": {
181
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
182
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
183
+ "name": "string",
184
+ "uri": "string"
185
+ },
186
+ "additionalProp1": {
187
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
188
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
189
+ "additionalProp1": {}
190
+ },
191
+ "additionalProp2": {
192
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
193
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
194
+ "additionalProp1": {}
195
+ },
196
+ "additionalProp3": {
197
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
198
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
199
+ "additionalProp1": {}
200
+ }
201
+ }
202
+ }
203
+ ],
204
+ "producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
205
+ "schemaURL": "https://openlineage.io/spec/1-0-5/OpenLineage.json#/definitions/RunEvent"
206
+ }
@@ -0,0 +1,31 @@
1
+ {
2
+ "eventType": "COMPLETE",
3
+ "eventTime": "2020-12-28T19:52:00.001+10:00",
4
+ "run": {
5
+ "runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7"
6
+ },
7
+ "job": {
8
+ "namespace": "my-scheduler-namespace",
9
+ "name": "myjob"
10
+ },
11
+ "inputs": [ ],
12
+ "outputs": [
13
+ {
14
+ "namespace": "my-datasource-namespace",
15
+ "name": "instance.schema.output-1",
16
+ "facets": {
17
+ "ownership": {
18
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
19
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#whatever",
20
+ "owners": [
21
+ {
22
+ "no-name": "no-name-owner"
23
+ }
24
+ ]
25
+ }
26
+ }
27
+ }
28
+ ],
29
+ "producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
30
+ "schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
31
+ }
@@ -0,0 +1,29 @@
1
+ {
2
+ "eventType": "COMPLETE",
3
+ "eventTime": "2020-12-28T19:52:00.001+10:00",
4
+ "run": {
5
+ "runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7"
6
+ },
7
+ "job": {
8
+ "namespace": "my-scheduler-namespace",
9
+ "name": "myjob"
10
+ },
11
+ "inputs": [
12
+ {
13
+ "namespace": "my-datasource-namespace",
14
+ "name": "instance.schema.input-1",
15
+ "inputFacets": {
16
+ "dataQualityMetrics": {
17
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
18
+ "_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/DataQualityMetricsInputDatasetFacet",
19
+ "noRowCount": 1000,
20
+ "bytes": 1048576,
21
+ "fileCount": 5
22
+ }
23
+ }
24
+ }
25
+ ],
26
+ "outputs": [],
27
+ "producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
28
+ "schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
29
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "eventType": "COMPLETE",
3
+ "eventTime": "2020-12-28T19:52:00.001+10:00",
4
+ "run": {
5
+ "runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7"
6
+ },
7
+ "job": {
8
+ "namespace": "my-scheduler-namespace",
9
+ "name": "myjob",
10
+ "facets": {
11
+ "ownership": {
12
+ "_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
13
+ "_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#whatever",
14
+ "owners": [
15
+ {
16
+ "no-name": "no-name-owner"
17
+ }
18
+ ]
19
+ }
20
+ }
21
+ },
22
+ "inputs": [ ],
23
+ "outputs": [ ],
24
+ "producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
25
+ "schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
26
+ }