fluentd-openlineage-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.idea/.gitignore +8 -0
- data/.idea/fluentd.iml +204 -0
- data/.idea/misc.xml +4 -0
- data/.idea/modules/benchmark-memory-0.2.iml +12 -0
- data/.idea/modules/bigdecimal-3.1.iml +11 -0
- data/.idea/modules/certstore_c-0.1.iml +15 -0
- data/.idea/modules/concurrent-ruby-1.3.iml +18 -0
- data/.idea/modules/concurrent-ruby-1.31.iml +15 -0
- data/.idea/modules/connection_pool-2.4.iml +11 -0
- data/.idea/modules/cool.io-1.8.iml +16 -0
- data/.idea/modules/drb-2.2.iml +14 -0
- data/.idea/modules/drb-2.21.iml +11 -0
- data/.idea/modules/ffi-1.17.iml +20 -0
- data/.idea/modules/ffi-win32-extensions-1.0.iml +19 -0
- data/.idea/modules/fluentd-1.17.iml +43 -0
- data/.idea/modules/http_parser.rb-0.8.iml +17 -0
- data/.idea/modules/json-2.7.iml +14 -0
- data/.idea/modules/json-2.71.iml +11 -0
- data/.idea/modules/msgpack-1.7.iml +15 -0
- data/.idea/modules/mutex_m-0.2.iml +15 -0
- data/.idea/modules/new_gem.iml +15 -0
- data/.idea/modules/power_assert-2.0.iml +19 -0
- data/.idea/modules/rake-13.2.iml +18 -0
- data/.idea/modules/rake-13.21.iml +15 -0
- data/.idea/modules/rake-compiler-1.2.iml +13 -0
- data/.idea/modules/rusty_json_schema-0.15.iml +15 -0
- data/.idea/modules/serverengine-2.3.iml +17 -0
- data/.idea/modules/sigdump-0.2.iml +16 -0
- data/.idea/modules/specifications.iml +14 -0
- data/.idea/modules/specifications1.iml +11 -0
- data/.idea/modules/strptime-0.2.iml +16 -0
- data/.idea/modules/thermite-0.13.iml +17 -0
- data/.idea/modules/webrick-1.8.iml +18 -0
- data/.idea/modules/win32-event-0.6.iml +21 -0
- data/.idea/modules/win32-ipc-0.7.iml +20 -0
- data/.idea/modules/yajl-ruby-1.4.iml +779 -0
- data/.idea/modules.xml +41 -0
- data/Gemfile +3 -0
- data/README.md +223 -0
- data/Rakefile +13 -0
- data/config/conf/fluent.conf +101 -0
- data/config/test-complete.json +73 -0
- data/config/test-start.json +73 -0
- data/events/event_full.json +206 -0
- data/events/event_invalid_dataset_facet.json +31 -0
- data/events/event_invalid_input_dataset_facet.json +29 -0
- data/events/event_invalid_job_facet.json +26 -0
- data/events/event_invalid_output_dataset_facet.json +29 -0
- data/events/event_invalid_run_facet.json +28 -0
- data/events/event_no_run_id.json +28 -0
- data/events/event_simple.json +29 -0
- data/fluentd-openlineage-parser.gemspec +28 -0
- data/lib/fluent/plugin/fluentd-openlineage-parser.rb +182 -0
- data/spec/Naming.md +500 -0
- data/spec/OpenLineage.json +304 -0
- data/spec/OpenLineage.md +179 -0
- data/spec/OpenLineage.yml +27 -0
- data/spec/OpenLineageModel.svg +1 -0
- data/spec/Versioning.md +49 -0
- data/spec/facets/ColumnLineageDatasetFacet.json +96 -0
- data/spec/facets/ColumnLineageDatasetFacet.md +106 -0
- data/spec/facets/DataQualityAssertionsDatasetFacet.json +49 -0
- data/spec/facets/DataQualityMetricsInputDatasetFacet.json +76 -0
- data/spec/facets/DatasetVersionDatasetFacet.json +31 -0
- data/spec/facets/DatasourceDatasetFacet.json +32 -0
- data/spec/facets/DocumentationDatasetFacet.json +31 -0
- data/spec/facets/DocumentationJobFacet.json +30 -0
- data/spec/facets/ErrorMessageRunFacet.json +41 -0
- data/spec/facets/ExternalQueryRunFacet.json +36 -0
- data/spec/facets/ExternalQueryRunFacet.md +49 -0
- data/spec/facets/ExtractionErrorRunFacet.json +58 -0
- data/spec/facets/JobTypeJobFacet.json +41 -0
- data/spec/facets/LifecycleStateChangeDatasetFacet.json +46 -0
- data/spec/facets/NominalTimeRunFacet.json +38 -0
- data/spec/facets/OutputStatisticsOutputDatasetFacet.json +36 -0
- data/spec/facets/OwnershipDatasetFacet.json +45 -0
- data/spec/facets/OwnershipJobFacet.json +45 -0
- data/spec/facets/ParentRunFacet.json +54 -0
- data/spec/facets/ProcessingEngineRunFacet.json +41 -0
- data/spec/facets/SQLJobFacet.json +30 -0
- data/spec/facets/SchemaDatasetFacet.json +59 -0
- data/spec/facets/SourceCodeJobFacet.json +34 -0
- data/spec/facets/SourceCodeLocationJobFacet.json +60 -0
- data/spec/facets/StorageDatasetFacet.json +35 -0
- data/spec/facets/SymlinksDatasetFacet.json +47 -0
- data/spec/registry/core/registry.json +31 -0
- data/spec/registry/gcp/facets/GcpCommonJobFacet.json +43 -0
- data/spec/registry/gcp/registry.json +6 -0
- data/spec/release.sh +80 -0
- data/spec/tests/ColumnLineageDatasetFacet/1.json +172 -0
- data/spec/tests/DataQualityAssertionsDatasetFacet/1.json +58 -0
- data/spec/tests/DataQualityMetricsInputDatasetFacet/1.json +23 -0
- data/spec/tests/DatasetVersionDatasetFacet/1.json +7 -0
- data/spec/tests/DatasourceDatasetFacet/1.json +7 -0
- data/spec/tests/DocumentationDatasetFacet/1.json +7 -0
- data/spec/tests/DocumentationJobFacet/1.json +7 -0
- data/spec/tests/ErrorMessageRunFacet/1.json +9 -0
- data/spec/tests/ExternalQueryRunFacet/1.json +8 -0
- data/spec/tests/ExtractionErrorRunFacet/1.json +15 -0
- data/spec/tests/JobTypeJobFacet/1.json +9 -0
- data/spec/tests/LifecycleStateChangeDatasetFacet/1.json +11 -0
- data/spec/tests/NominalTimeRunFacet/1.json +8 -0
- data/spec/tests/OutputStatisticsOutputDatasetFacet/1.json +9 -0
- data/spec/tests/OwnershipDatasetFacet/1.json +11 -0
- data/spec/tests/OwnershipJobFacet/1.json +11 -0
- data/spec/tests/ParentRunFacet/1.json +13 -0
- data/spec/tests/ProcessingEngineRunFacet/1.json +9 -0
- data/spec/tests/SQLJobFacet/1.json +7 -0
- data/spec/tests/SchemaDatasetFacet/1.json +92 -0
- data/spec/tests/SourceCodeJobFacet/1.json +8 -0
- data/spec/tests/SourceCodeLocationJobFacet/1.json +8 -0
- data/spec/tests/StorageDatasetFacet/1.json +8 -0
- data/spec/tests/SymlinksDatasetFacet/1.json +13 -0
- data/spec/tests/example_full_event.json +24 -0
- data/test/helper.rb +8 -0
- data/test/plugin/test_parser_openlineage.rb +141 -0
- metadata +298 -0
@@ -0,0 +1,206 @@
|
|
1
|
+
{
|
2
|
+
"eventType": "COMPLETE",
|
3
|
+
"eventTime": "2020-12-28T19:51:01.641Z",
|
4
|
+
"run": {
|
5
|
+
"runId": "ea041791-68bc-4ae1-bd89-4c8106a157e4",
|
6
|
+
"facets": {
|
7
|
+
"nominalTime": {
|
8
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
9
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
10
|
+
"nominalStartTime": "2020-12-17T03:00:00.001Z",
|
11
|
+
"nominalEndTime": "2020-12-17T04:00:00.001Z"
|
12
|
+
},
|
13
|
+
"parent": {
|
14
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
15
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
16
|
+
"run": {
|
17
|
+
"runId": "3f5e83fa-3480-44ff-99c5-ff943904e5e8"
|
18
|
+
},
|
19
|
+
"job": {
|
20
|
+
"namespace": "my-scheduler-namespace",
|
21
|
+
"name": "myjob.mytask"
|
22
|
+
}
|
23
|
+
},
|
24
|
+
"additionalProp1": {
|
25
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
26
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
27
|
+
"additionalProp1": {}
|
28
|
+
},
|
29
|
+
"additionalProp2": {
|
30
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
31
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
32
|
+
"additionalProp1": {}
|
33
|
+
},
|
34
|
+
"additionalProp3": {
|
35
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
36
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
37
|
+
"additionalProp1": {}
|
38
|
+
}
|
39
|
+
}
|
40
|
+
},
|
41
|
+
"job": {
|
42
|
+
"namespace": "my-scheduler-namespace",
|
43
|
+
"name": "myjob.mytask",
|
44
|
+
"facets": {
|
45
|
+
"documentation": {
|
46
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
47
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
48
|
+
"description": "string"
|
49
|
+
},
|
50
|
+
"sourceCodeLocation": {
|
51
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
52
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
53
|
+
"type": "git",
|
54
|
+
"url": "http://example.com"
|
55
|
+
},
|
56
|
+
"sql": {
|
57
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
58
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
59
|
+
"additionalPropExample": {
|
60
|
+
"example": true
|
61
|
+
},
|
62
|
+
"query": "SELECT * FROM foo"
|
63
|
+
},
|
64
|
+
"additionalProp1": {
|
65
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
66
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
67
|
+
"additionalProp1": {}
|
68
|
+
},
|
69
|
+
"additionalProp2": {
|
70
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
71
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
72
|
+
"additionalProp1": {}
|
73
|
+
},
|
74
|
+
"additionalProp3": {
|
75
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
76
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
77
|
+
"additionalProp1": {}
|
78
|
+
}
|
79
|
+
}
|
80
|
+
},
|
81
|
+
"inputs": [
|
82
|
+
{
|
83
|
+
"namespace": "my-datasource-namespace",
|
84
|
+
"name": "instance.schema.table",
|
85
|
+
"inputFacets": {
|
86
|
+
"dataQualityMetrics": {
|
87
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
88
|
+
"_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/DataQualityMetricsInputDatasetFacet",
|
89
|
+
"rowCount": 1000,
|
90
|
+
"bytes": 1048576,
|
91
|
+
"fileCount": 5
|
92
|
+
},
|
93
|
+
"dataQualityAssertions": {
|
94
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
95
|
+
"_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/DataQualityAssertionsDatasetFacet",
|
96
|
+
"assertions": [
|
97
|
+
{
|
98
|
+
"assertion": "row_count_equal_to",
|
99
|
+
"success": true
|
100
|
+
},
|
101
|
+
{
|
102
|
+
"assertion": "no_null_values",
|
103
|
+
"success": true,
|
104
|
+
"column": "id"
|
105
|
+
}
|
106
|
+
]
|
107
|
+
}
|
108
|
+
},
|
109
|
+
"facets": {
|
110
|
+
"documentation": {
|
111
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
112
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
113
|
+
"description": "canonical representation of entity Foo"
|
114
|
+
},
|
115
|
+
"schema": {
|
116
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
117
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
118
|
+
"fields": [
|
119
|
+
{
|
120
|
+
"name": "column1",
|
121
|
+
"type": "VARCHAR",
|
122
|
+
"description": "string"
|
123
|
+
}
|
124
|
+
]
|
125
|
+
},
|
126
|
+
"dataSource": {
|
127
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
128
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
129
|
+
"name": "string",
|
130
|
+
"uri": "string"
|
131
|
+
},
|
132
|
+
"additionalProp1": {
|
133
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
134
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
135
|
+
"additionalProp1": {}
|
136
|
+
},
|
137
|
+
"additionalProp2": {
|
138
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
139
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
140
|
+
"additionalProp1": {}
|
141
|
+
},
|
142
|
+
"additionalProp3": {
|
143
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
144
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
145
|
+
"additionalProp1": {}
|
146
|
+
}
|
147
|
+
}
|
148
|
+
}
|
149
|
+
],
|
150
|
+
"outputs": [
|
151
|
+
{
|
152
|
+
"namespace": "my-datasource-namespace",
|
153
|
+
"name": "instance.schema.table",
|
154
|
+
"outputFacets": {
|
155
|
+
"outputStatistics": {
|
156
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
157
|
+
"_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/OutputStatisticsOutputDatasetFacet",
|
158
|
+
"rowCount": 2000,
|
159
|
+
"size": 2097152,
|
160
|
+
"fileCount": 5
|
161
|
+
}
|
162
|
+
},
|
163
|
+
"facets": {
|
164
|
+
"documentation": {
|
165
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
166
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
167
|
+
"description": "canonical representation of entity Foo"
|
168
|
+
},
|
169
|
+
"schema": {
|
170
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
171
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
172
|
+
"fields": [
|
173
|
+
{
|
174
|
+
"name": "column1",
|
175
|
+
"type": "VARCHAR",
|
176
|
+
"description": "string"
|
177
|
+
}
|
178
|
+
]
|
179
|
+
},
|
180
|
+
"dataSource": {
|
181
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
182
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
183
|
+
"name": "string",
|
184
|
+
"uri": "string"
|
185
|
+
},
|
186
|
+
"additionalProp1": {
|
187
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
188
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
189
|
+
"additionalProp1": {}
|
190
|
+
},
|
191
|
+
"additionalProp2": {
|
192
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
193
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
194
|
+
"additionalProp1": {}
|
195
|
+
},
|
196
|
+
"additionalProp3": {
|
197
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
198
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#MyCustomJobFacet",
|
199
|
+
"additionalProp1": {}
|
200
|
+
}
|
201
|
+
}
|
202
|
+
}
|
203
|
+
],
|
204
|
+
"producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
205
|
+
"schemaURL": "https://openlineage.io/spec/1-0-5/OpenLineage.json#/definitions/RunEvent"
|
206
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
{
|
2
|
+
"eventType": "COMPLETE",
|
3
|
+
"eventTime": "2020-12-28T19:52:00.001+10:00",
|
4
|
+
"run": {
|
5
|
+
"runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7"
|
6
|
+
},
|
7
|
+
"job": {
|
8
|
+
"namespace": "my-scheduler-namespace",
|
9
|
+
"name": "myjob"
|
10
|
+
},
|
11
|
+
"inputs": [ ],
|
12
|
+
"outputs": [
|
13
|
+
{
|
14
|
+
"namespace": "my-datasource-namespace",
|
15
|
+
"name": "instance.schema.output-1",
|
16
|
+
"facets": {
|
17
|
+
"ownership": {
|
18
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
19
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#whatever",
|
20
|
+
"owners": [
|
21
|
+
{
|
22
|
+
"no-name": "no-name-owner"
|
23
|
+
}
|
24
|
+
]
|
25
|
+
}
|
26
|
+
}
|
27
|
+
}
|
28
|
+
],
|
29
|
+
"producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
30
|
+
"schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
|
31
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
{
|
2
|
+
"eventType": "COMPLETE",
|
3
|
+
"eventTime": "2020-12-28T19:52:00.001+10:00",
|
4
|
+
"run": {
|
5
|
+
"runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7"
|
6
|
+
},
|
7
|
+
"job": {
|
8
|
+
"namespace": "my-scheduler-namespace",
|
9
|
+
"name": "myjob"
|
10
|
+
},
|
11
|
+
"inputs": [
|
12
|
+
{
|
13
|
+
"namespace": "my-datasource-namespace",
|
14
|
+
"name": "instance.schema.input-1",
|
15
|
+
"inputFacets": {
|
16
|
+
"dataQualityMetrics": {
|
17
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
18
|
+
"_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/DataQualityMetricsInputDatasetFacet",
|
19
|
+
"noRowCount": 1000,
|
20
|
+
"bytes": 1048576,
|
21
|
+
"fileCount": 5
|
22
|
+
}
|
23
|
+
}
|
24
|
+
}
|
25
|
+
],
|
26
|
+
"outputs": [],
|
27
|
+
"producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
28
|
+
"schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
|
29
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"eventType": "COMPLETE",
|
3
|
+
"eventTime": "2020-12-28T19:52:00.001+10:00",
|
4
|
+
"run": {
|
5
|
+
"runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7"
|
6
|
+
},
|
7
|
+
"job": {
|
8
|
+
"namespace": "my-scheduler-namespace",
|
9
|
+
"name": "myjob",
|
10
|
+
"facets": {
|
11
|
+
"ownership": {
|
12
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
13
|
+
"_schemaURL": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/spec/OpenLineage.yml#whatever",
|
14
|
+
"owners": [
|
15
|
+
{
|
16
|
+
"no-name": "no-name-owner"
|
17
|
+
}
|
18
|
+
]
|
19
|
+
}
|
20
|
+
}
|
21
|
+
},
|
22
|
+
"inputs": [ ],
|
23
|
+
"outputs": [ ],
|
24
|
+
"producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
25
|
+
"schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
|
26
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
{
|
2
|
+
"eventType": "COMPLETE",
|
3
|
+
"eventTime": "2020-12-28T19:52:00.001+10:00",
|
4
|
+
"run": {
|
5
|
+
"runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7"
|
6
|
+
},
|
7
|
+
"job": {
|
8
|
+
"namespace": "my-scheduler-namespace",
|
9
|
+
"name": "myjob"
|
10
|
+
},
|
11
|
+
"inputs": [],
|
12
|
+
"outputs": [
|
13
|
+
{
|
14
|
+
"namespace": "my-datasource-namespace",
|
15
|
+
"name": "instance.schema.output-1",
|
16
|
+
"outputFacets": {
|
17
|
+
"outputStatistics": {
|
18
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
19
|
+
"_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/OutputStatisticsOutputDatasetFacet",
|
20
|
+
"rowCount": "wrong",
|
21
|
+
"size": 2097152,
|
22
|
+
"fileCount": 5
|
23
|
+
}
|
24
|
+
}
|
25
|
+
}
|
26
|
+
],
|
27
|
+
"producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
28
|
+
"schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
|
29
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
{
|
2
|
+
"eventType": "COMPLETE",
|
3
|
+
"eventTime": "2020-12-28T19:52:00.001+10:00",
|
4
|
+
"run": {
|
5
|
+
"runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7",
|
6
|
+
"facets": {
|
7
|
+
"parent": {
|
8
|
+
"run": {
|
9
|
+
"noRunId": "invalid run id"
|
10
|
+
},
|
11
|
+
"job": {
|
12
|
+
"namespace": "parent_namespace",
|
13
|
+
"name": "parent_name"
|
14
|
+
},
|
15
|
+
"_producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
16
|
+
"_schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunFacet"
|
17
|
+
}
|
18
|
+
}
|
19
|
+
},
|
20
|
+
"job": {
|
21
|
+
"namespace": "my-scheduler-namespace",
|
22
|
+
"name": "myjob"
|
23
|
+
},
|
24
|
+
"inputs": [ ],
|
25
|
+
"outputs": [ ],
|
26
|
+
"producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
27
|
+
"schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
|
28
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
{
|
2
|
+
"eventType": "COMPLETE",
|
3
|
+
"eventTime": "2020-12-28T19:52:00.001+10:00",
|
4
|
+
"run": {
|
5
|
+
},
|
6
|
+
"job": {
|
7
|
+
"namespace": "my-scheduler-namespace",
|
8
|
+
"name": "myjob"
|
9
|
+
},
|
10
|
+
"inputs": [
|
11
|
+
{
|
12
|
+
"namespace": "my-datasource-namespace",
|
13
|
+
"name": "instance.schema.input-1"
|
14
|
+
},
|
15
|
+
{
|
16
|
+
"namespace": "my-datasource-namespace",
|
17
|
+
"name": "instance.schema.input-2"
|
18
|
+
}
|
19
|
+
],
|
20
|
+
"outputs": [
|
21
|
+
{
|
22
|
+
"namespace": "my-datasource-namespace",
|
23
|
+
"name": "instance.schema.output-1"
|
24
|
+
}
|
25
|
+
],
|
26
|
+
"producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
27
|
+
"schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
|
28
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
{
|
2
|
+
"eventType": "COMPLETE",
|
3
|
+
"eventTime": "2020-12-28T19:52:00.001+10:00",
|
4
|
+
"run": {
|
5
|
+
"runId": "41fb5137-f0fd-4ee5-ba5c-56f8571d1bd7"
|
6
|
+
},
|
7
|
+
"job": {
|
8
|
+
"namespace": "my-scheduler-namespace",
|
9
|
+
"name": "myjob"
|
10
|
+
},
|
11
|
+
"inputs": [
|
12
|
+
{
|
13
|
+
"namespace": "my-datasource-namespace",
|
14
|
+
"name": "instance.schema.input-1"
|
15
|
+
},
|
16
|
+
{
|
17
|
+
"namespace": "my-datasource-namespace",
|
18
|
+
"name": "instance.schema.input-2"
|
19
|
+
}
|
20
|
+
],
|
21
|
+
"outputs": [
|
22
|
+
{
|
23
|
+
"namespace": "my-datasource-namespace",
|
24
|
+
"name": "instance.schema.output-1"
|
25
|
+
}
|
26
|
+
],
|
27
|
+
"producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client",
|
28
|
+
"schemaURL": "https://openlineage.io/spec/1-0-1/OpenLineage.json#/definitions/RunEvent"
|
29
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
|
4
|
+
Gem::Specification.new do |spec|
|
5
|
+
spec.name = "fluentd-openlineage-parser"
|
6
|
+
spec.version = "0.1.0"
|
7
|
+
spec.authors = ["Pawel Leszczynski"]
|
8
|
+
spec.email = ["leszczynski.pawel@gmail.com"]
|
9
|
+
|
10
|
+
spec.summary = %q{Parser to validate Openlineage events.}
|
11
|
+
spec.description = %q{Fluentd parser that validates if JSON is a valid Openlineage event.}
|
12
|
+
spec.homepage = "http://openlineage.io"
|
13
|
+
spec.license = "Apache-2.0"
|
14
|
+
|
15
|
+
test_files, files = `git ls-files -z`.split("\x0").partition do |f|
|
16
|
+
f.match(%r{^(test|spec|features)/})
|
17
|
+
end
|
18
|
+
spec.files = files
|
19
|
+
spec.executables = files.grep(%r{^bin/}) { |f| File.basename(f) }
|
20
|
+
spec.test_files = test_files
|
21
|
+
spec.require_paths = ["lib"]
|
22
|
+
|
23
|
+
spec.add_development_dependency "bundler"
|
24
|
+
spec.add_development_dependency "rake"
|
25
|
+
spec.add_development_dependency "test-unit"
|
26
|
+
spec.add_runtime_dependency "fluentd", [">= 0.14.10", "< 2"]
|
27
|
+
spec.add_dependency "rusty_json_schema"
|
28
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
require "fluent/plugin/parser"
|
2
|
+
require 'fluent/plugin/parser_json'
|
3
|
+
require 'json'
|
4
|
+
require "rusty_json_schema"
|
5
|
+
|
6
|
+
module Fluent
|
7
|
+
module Plugin
|
8
|
+
class OpenlineageParser < Fluent::Plugin::JSONParser
|
9
|
+
Fluent::Plugin.register_parser("openlineage", self)
|
10
|
+
|
11
|
+
DEFAULT_SPEC_DIRECTORY="/etc/spec"
|
12
|
+
|
13
|
+
def configure(conf)
|
14
|
+
if conf.has_key?('spec_directory')
|
15
|
+
@spec_directory = conf['spec_directory']
|
16
|
+
else
|
17
|
+
@spec_directory = DEFAULT_SPEC_DIRECTORY
|
18
|
+
end
|
19
|
+
if (not @spec_directory.end_with?("/"))
|
20
|
+
@spec_directory += "/"
|
21
|
+
end
|
22
|
+
@validate_input_dataset_facets = conf.fetch('validate_input_dataset_facets', false)
|
23
|
+
@validate_output_dataset_facets = conf.fetch('validate_output_dataset_facets', false)
|
24
|
+
@validate_dataset_facets = conf.fetch('validate_dataset_facets', false)
|
25
|
+
@validate_run_facets = conf.fetch('validate_run_facets', true)
|
26
|
+
@validate_job_facets = conf.fetch('validate_job_facets', true)
|
27
|
+
@schema = load_schema()
|
28
|
+
@validator = RustyJSONSchema.build(@schema)
|
29
|
+
super
|
30
|
+
end
|
31
|
+
|
32
|
+
# https://docs.fluentd.org/plugin-development/api-plugin-parser
|
33
|
+
def parse(text)
|
34
|
+
# parse JSON with default JSONParser
|
35
|
+
super(text) { | time, json |
|
36
|
+
validate_openlineage(json)
|
37
|
+
yield time, json
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
|
44
|
+
def validate_openlineage(json)
|
45
|
+
if json == nil
|
46
|
+
raise ParserError, "Openlineage validation failed: invalid json provided"
|
47
|
+
end
|
48
|
+
|
49
|
+
# https://github.com/driv3r/rusty_json_schema
|
50
|
+
# Rust json parser ported to ruby that supports Draft 2020-12
|
51
|
+
errors = @validator.validate(json)
|
52
|
+
|
53
|
+
if errors.join(", ").include? "is not valid under any of the given schemas"
|
54
|
+
errors = enrich_oneOf_errors(json)
|
55
|
+
end
|
56
|
+
if !errors.empty?
|
57
|
+
raise ParserError, "Openlineage validation failed: " + errors.join(", ")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Validator returns very generic OneOfNotValid error message
|
62
|
+
# We try to find better reason for mismatch with each candidate.
|
63
|
+
def enrich_oneOf_errors(json)
|
64
|
+
errors = []
|
65
|
+
@schema["oneOf"].each { |ref|
|
66
|
+
changed_schema = @schema
|
67
|
+
changed_schema.delete("oneOf")
|
68
|
+
changed_schema["$ref"] = ref["$ref"]
|
69
|
+
validator = RustyJSONSchema.build(changed_schema)
|
70
|
+
error = validator.validate(json)
|
71
|
+
if !error.empty?
|
72
|
+
errors.append("#{ref}: #{error.join(", ")}")
|
73
|
+
end
|
74
|
+
}
|
75
|
+
return errors
|
76
|
+
end
|
77
|
+
|
78
|
+
def load_schema()
|
79
|
+
schemaFile = @spec_directory + "OpenLineage.json"
|
80
|
+
|
81
|
+
if (not File.exist?(schemaFile))
|
82
|
+
raise ParserError, "Couldn't find Openlineage.json file within a defined spec directory: " + schemaFile
|
83
|
+
end
|
84
|
+
|
85
|
+
schema = File.read(schemaFile)
|
86
|
+
schema = rewrite_schema_to_include_facets(schema)
|
87
|
+
return schema
|
88
|
+
end
|
89
|
+
|
90
|
+
# Current Openlineage schema contains references to facets' definitions stored in files
|
91
|
+
# in facets directory which are not valid schemas for json_schema.
|
92
|
+
# In this step we rewrite Openlineage schema to contain facets definitions within it
|
93
|
+
def rewrite_schema_to_include_facets(schema)
|
94
|
+
# replace all the refs in schema to local refs
|
95
|
+
# "facets/ColumnLineageDatasetFacet.json" -> "#/defs/ColumnLineageDatasetFacet"
|
96
|
+
schema = schema.gsub(
|
97
|
+
/"facets\/([a-zA-Z]+)\.json"/,
|
98
|
+
'"#/$defs/\1"'
|
99
|
+
)
|
100
|
+
schema_json = JSON.parse(schema)
|
101
|
+
facets_path = @spec_directory + "facets/"
|
102
|
+
|
103
|
+
|
104
|
+
# list all the facets
|
105
|
+
Dir.glob("#{facets_path}/*.json").each { |facet_file|
|
106
|
+
facet_schema = JSON.parse(
|
107
|
+
File.read(facet_file).gsub(
|
108
|
+
/"https:\/\/openlineage\.io\/spec\/\d-\d-\d\/OpenLineage\.json#\/\$defs\/([a-zA-Z]+)"/,
|
109
|
+
'"#/$defs/\1"'
|
110
|
+
)
|
111
|
+
)
|
112
|
+
|
113
|
+
facet_schema["properties"].each { |property, ref|
|
114
|
+
facet_name = ref["$ref"]&.gsub("#/$defs/", "")
|
115
|
+
parents = []
|
116
|
+
facet_schema["$defs"][facet_name]["allOf"]&.each { |definition|
|
117
|
+
unless definition["$ref"].nil?
|
118
|
+
parents.append(definition["$ref"].gsub("#/$defs/", ""))
|
119
|
+
end
|
120
|
+
}
|
121
|
+
parents.each {|parent|
|
122
|
+
add_ref_as_parent_property(schema_json, parent, facet_name, property)
|
123
|
+
}
|
124
|
+
}
|
125
|
+
# include facets' definitions within schema
|
126
|
+
schema_json["$defs"] = schema_json["$defs"].merge(facet_schema["$defs"])
|
127
|
+
}
|
128
|
+
return schema_json
|
129
|
+
end
|
130
|
+
|
131
|
+
def add_ref_as_parent_property(schema, parent, facet_name, property)
|
132
|
+
getter = find_parent_object_getter(parent)
|
133
|
+
if getter.nil?
|
134
|
+
return
|
135
|
+
end
|
136
|
+
properties = getter.call(schema)["properties"] || {}
|
137
|
+
properties[property] = {"$ref" => "#/$defs/" + facet_name}
|
138
|
+
getter.call(schema)["properties"] = properties
|
139
|
+
end
|
140
|
+
|
141
|
+
# Based on facet name find path to object facets
|
142
|
+
def find_parent_object_getter(parent)
|
143
|
+
getter = nil
|
144
|
+
case parent
|
145
|
+
when "JobFacet"
|
146
|
+
if @validate_job_facets
|
147
|
+
getter = ->(schema) { schema["$defs"]["Job"]["properties"]["facets"] }
|
148
|
+
end
|
149
|
+
when "RunFacet"
|
150
|
+
if @validate_run_facets
|
151
|
+
getter = ->(schema) { schema["$defs"]["Run"]["properties"]["facets"] }
|
152
|
+
end
|
153
|
+
when "DatasetFacet"
|
154
|
+
if @validate_dataset_facets
|
155
|
+
getter = ->(schema) { schema["$defs"]["Dataset"]["properties"]["facets"] }
|
156
|
+
end
|
157
|
+
when "OutputDatasetFacet"
|
158
|
+
if @validate_output_dataset_facets
|
159
|
+
getter = ->(schema) { schema \
|
160
|
+
["$defs"] \
|
161
|
+
["OutputDataset"] \
|
162
|
+
["allOf"].select {|el| el.key?("type") } \
|
163
|
+
[0] \
|
164
|
+
["properties"] \
|
165
|
+
["outputFacets"] }
|
166
|
+
end
|
167
|
+
when "InputDatasetFacet"
|
168
|
+
if @validate_input_dataset_facets
|
169
|
+
getter = ->(schema) { schema \
|
170
|
+
["$defs"] \
|
171
|
+
["InputDataset"] \
|
172
|
+
["allOf"].select {|el| el.key?("type") } \
|
173
|
+
[0] \
|
174
|
+
["properties"] \
|
175
|
+
["inputFacets"] }
|
176
|
+
end
|
177
|
+
end
|
178
|
+
return getter
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|