acryl-datahub 1.0.0.1rc6__py3-none-any.whl → 1.0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2.dist-info}/METADATA +2557 -2557
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2.dist-info}/RECORD +81 -79
- datahub/_version.py +1 -1
- datahub/api/entities/datajob/dataflow.py +15 -0
- datahub/api/entities/datajob/datajob.py +17 -0
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataset/dataset.py +2 -2
- datahub/api/entities/structuredproperties/structuredproperties.py +1 -1
- datahub/cli/ingest_cli.py +4 -4
- datahub/cli/migrate.py +6 -6
- datahub/configuration/common.py +1 -1
- datahub/emitter/mcp_builder.py +4 -0
- datahub/errors.py +4 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/source.py +6 -2
- datahub/ingestion/api/source_helpers.py +35 -2
- datahub/ingestion/graph/client.py +122 -7
- datahub/ingestion/graph/filters.py +41 -16
- datahub/ingestion/run/pipeline.py +0 -6
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -10
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/fivetran/fivetran.py +1 -0
- datahub/ingestion/source/fivetran/fivetran_log_api.py +1 -1
- datahub/ingestion/source/hex/constants.py +5 -0
- datahub/ingestion/source/hex/hex.py +150 -22
- datahub/ingestion/source/hex/mapper.py +28 -2
- datahub/ingestion/source/hex/model.py +10 -2
- datahub/ingestion/source/hex/query_fetcher.py +300 -0
- datahub/ingestion/source/iceberg/iceberg.py +106 -18
- datahub/ingestion/source/kafka/kafka.py +1 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +1 -1
- datahub/ingestion/source/kafka_connect/source_connectors.py +1 -1
- datahub/ingestion/source/looker/looker_source.py +2 -3
- datahub/ingestion/source/mlflow.py +6 -7
- datahub/ingestion/source/mode.py +2 -2
- datahub/ingestion/source/nifi.py +3 -3
- datahub/ingestion/source/openapi.py +3 -3
- datahub/ingestion/source/openapi_parser.py +8 -8
- datahub/ingestion/source/powerbi/config.py +1 -1
- datahub/ingestion/source/powerbi/powerbi.py +16 -3
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/sigma/sigma.py +6 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +1 -1
- datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
- datahub/ingestion/source/sql/trino.py +4 -3
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/superset.py +108 -81
- datahub/ingestion/source/tableau/tableau.py +4 -4
- datahub/ingestion/source/tableau/tableau_common.py +2 -2
- datahub/ingestion/source/unity/source.py +1 -1
- datahub/ingestion/source/vertexai/vertexai.py +7 -7
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/_schema_classes.py +47 -2
- datahub/metadata/_urns/urn_defs.py +56 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/schema.avsc +121 -85
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
- datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
- datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
- datahub/metadata/schemas/QueryProperties.avsc +4 -2
- datahub/metadata/schemas/SystemMetadata.avsc +86 -0
- datahub/sdk/search_client.py +81 -8
- datahub/sdk/search_filters.py +73 -11
- datahub/testing/mcp_diff.py +1 -1
- datahub/utilities/file_backed_collections.py +6 -6
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2.dist-info}/top_level.txt +0 -0
datahub/metadata/schema.avsc
CHANGED
|
@@ -2343,12 +2343,14 @@
|
|
|
2343
2343
|
"type": {
|
|
2344
2344
|
"type": "enum",
|
|
2345
2345
|
"symbolDocs": {
|
|
2346
|
-
"SQL": "A SQL Query"
|
|
2346
|
+
"SQL": "A SQL Query",
|
|
2347
|
+
"UNKNOWN": "Unknown query language"
|
|
2347
2348
|
},
|
|
2348
2349
|
"name": "QueryLanguage",
|
|
2349
2350
|
"namespace": "com.linkedin.pegasus2avro.query",
|
|
2350
2351
|
"symbols": [
|
|
2351
|
-
"SQL"
|
|
2352
|
+
"SQL",
|
|
2353
|
+
"UNKNOWN"
|
|
2352
2354
|
]
|
|
2353
2355
|
},
|
|
2354
2356
|
"name": "language",
|
|
@@ -2887,6 +2889,11 @@
|
|
|
2887
2889
|
"namespace": "com.linkedin.pegasus2avro.form",
|
|
2888
2890
|
"fields": [
|
|
2889
2891
|
{
|
|
2892
|
+
"Searchable": {
|
|
2893
|
+
"fieldName": "promptId",
|
|
2894
|
+
"fieldType": "KEYWORD",
|
|
2895
|
+
"queryByDefault": false
|
|
2896
|
+
},
|
|
2890
2897
|
"type": "string",
|
|
2891
2898
|
"name": "id",
|
|
2892
2899
|
"doc": "The unique id for this prompt. This must be GLOBALLY unique."
|
|
@@ -4130,6 +4137,92 @@
|
|
|
4130
4137
|
],
|
|
4131
4138
|
"doc": "Editable information about an Asset Container as defined on the DataHub Platform"
|
|
4132
4139
|
},
|
|
4140
|
+
{
|
|
4141
|
+
"type": "record",
|
|
4142
|
+
"Aspect": {
|
|
4143
|
+
"name": "systemMetadata"
|
|
4144
|
+
},
|
|
4145
|
+
"name": "SystemMetadata",
|
|
4146
|
+
"namespace": "com.linkedin.pegasus2avro.mxe",
|
|
4147
|
+
"fields": [
|
|
4148
|
+
{
|
|
4149
|
+
"type": [
|
|
4150
|
+
"long",
|
|
4151
|
+
"null"
|
|
4152
|
+
],
|
|
4153
|
+
"name": "lastObserved",
|
|
4154
|
+
"default": 0,
|
|
4155
|
+
"doc": "The timestamp the metadata was observed at"
|
|
4156
|
+
},
|
|
4157
|
+
{
|
|
4158
|
+
"type": [
|
|
4159
|
+
"string",
|
|
4160
|
+
"null"
|
|
4161
|
+
],
|
|
4162
|
+
"name": "runId",
|
|
4163
|
+
"default": "no-run-id-provided",
|
|
4164
|
+
"doc": "The original run id that produced the metadata. Populated in case of batch-ingestion."
|
|
4165
|
+
},
|
|
4166
|
+
{
|
|
4167
|
+
"type": [
|
|
4168
|
+
"string",
|
|
4169
|
+
"null"
|
|
4170
|
+
],
|
|
4171
|
+
"name": "lastRunId",
|
|
4172
|
+
"default": "no-run-id-provided",
|
|
4173
|
+
"doc": "The last run id that produced the metadata. Populated in case of batch-ingestion."
|
|
4174
|
+
},
|
|
4175
|
+
{
|
|
4176
|
+
"type": [
|
|
4177
|
+
"null",
|
|
4178
|
+
"string"
|
|
4179
|
+
],
|
|
4180
|
+
"name": "pipelineName",
|
|
4181
|
+
"default": null,
|
|
4182
|
+
"doc": "The ingestion pipeline id that produced the metadata. Populated in case of batch ingestion."
|
|
4183
|
+
},
|
|
4184
|
+
{
|
|
4185
|
+
"type": [
|
|
4186
|
+
"null",
|
|
4187
|
+
"string"
|
|
4188
|
+
],
|
|
4189
|
+
"name": "registryName",
|
|
4190
|
+
"default": null,
|
|
4191
|
+
"doc": "The model registry name that was used to process this event"
|
|
4192
|
+
},
|
|
4193
|
+
{
|
|
4194
|
+
"type": [
|
|
4195
|
+
"null",
|
|
4196
|
+
"string"
|
|
4197
|
+
],
|
|
4198
|
+
"name": "registryVersion",
|
|
4199
|
+
"default": null,
|
|
4200
|
+
"doc": "The model registry version that was used to process this event"
|
|
4201
|
+
},
|
|
4202
|
+
{
|
|
4203
|
+
"type": [
|
|
4204
|
+
"null",
|
|
4205
|
+
{
|
|
4206
|
+
"type": "map",
|
|
4207
|
+
"values": "string"
|
|
4208
|
+
}
|
|
4209
|
+
],
|
|
4210
|
+
"name": "properties",
|
|
4211
|
+
"default": null,
|
|
4212
|
+
"doc": "Additional properties"
|
|
4213
|
+
},
|
|
4214
|
+
{
|
|
4215
|
+
"type": [
|
|
4216
|
+
"null",
|
|
4217
|
+
"string"
|
|
4218
|
+
],
|
|
4219
|
+
"name": "version",
|
|
4220
|
+
"default": null,
|
|
4221
|
+
"doc": "Aspect version\n Initial implementation will use the aspect version's number, however stored as\n a string in the case where a different aspect versioning scheme is later adopted."
|
|
4222
|
+
}
|
|
4223
|
+
],
|
|
4224
|
+
"doc": "Metadata associated with each metadata change that is processed by the system"
|
|
4225
|
+
},
|
|
4133
4226
|
{
|
|
4134
4227
|
"type": "record",
|
|
4135
4228
|
"name": "PlatformEvent",
|
|
@@ -4369,89 +4462,7 @@
|
|
|
4369
4462
|
{
|
|
4370
4463
|
"type": [
|
|
4371
4464
|
"null",
|
|
4372
|
-
|
|
4373
|
-
"type": "record",
|
|
4374
|
-
"name": "SystemMetadata",
|
|
4375
|
-
"namespace": "com.linkedin.pegasus2avro.mxe",
|
|
4376
|
-
"fields": [
|
|
4377
|
-
{
|
|
4378
|
-
"type": [
|
|
4379
|
-
"long",
|
|
4380
|
-
"null"
|
|
4381
|
-
],
|
|
4382
|
-
"name": "lastObserved",
|
|
4383
|
-
"default": 0,
|
|
4384
|
-
"doc": "The timestamp the metadata was observed at"
|
|
4385
|
-
},
|
|
4386
|
-
{
|
|
4387
|
-
"type": [
|
|
4388
|
-
"string",
|
|
4389
|
-
"null"
|
|
4390
|
-
],
|
|
4391
|
-
"name": "runId",
|
|
4392
|
-
"default": "no-run-id-provided",
|
|
4393
|
-
"doc": "The original run id that produced the metadata. Populated in case of batch-ingestion."
|
|
4394
|
-
},
|
|
4395
|
-
{
|
|
4396
|
-
"type": [
|
|
4397
|
-
"string",
|
|
4398
|
-
"null"
|
|
4399
|
-
],
|
|
4400
|
-
"name": "lastRunId",
|
|
4401
|
-
"default": "no-run-id-provided",
|
|
4402
|
-
"doc": "The last run id that produced the metadata. Populated in case of batch-ingestion."
|
|
4403
|
-
},
|
|
4404
|
-
{
|
|
4405
|
-
"type": [
|
|
4406
|
-
"null",
|
|
4407
|
-
"string"
|
|
4408
|
-
],
|
|
4409
|
-
"name": "pipelineName",
|
|
4410
|
-
"default": null,
|
|
4411
|
-
"doc": "The ingestion pipeline id that produced the metadata. Populated in case of batch ingestion."
|
|
4412
|
-
},
|
|
4413
|
-
{
|
|
4414
|
-
"type": [
|
|
4415
|
-
"null",
|
|
4416
|
-
"string"
|
|
4417
|
-
],
|
|
4418
|
-
"name": "registryName",
|
|
4419
|
-
"default": null,
|
|
4420
|
-
"doc": "The model registry name that was used to process this event"
|
|
4421
|
-
},
|
|
4422
|
-
{
|
|
4423
|
-
"type": [
|
|
4424
|
-
"null",
|
|
4425
|
-
"string"
|
|
4426
|
-
],
|
|
4427
|
-
"name": "registryVersion",
|
|
4428
|
-
"default": null,
|
|
4429
|
-
"doc": "The model registry version that was used to process this event"
|
|
4430
|
-
},
|
|
4431
|
-
{
|
|
4432
|
-
"type": [
|
|
4433
|
-
"null",
|
|
4434
|
-
{
|
|
4435
|
-
"type": "map",
|
|
4436
|
-
"values": "string"
|
|
4437
|
-
}
|
|
4438
|
-
],
|
|
4439
|
-
"name": "properties",
|
|
4440
|
-
"default": null,
|
|
4441
|
-
"doc": "Additional properties"
|
|
4442
|
-
},
|
|
4443
|
-
{
|
|
4444
|
-
"type": [
|
|
4445
|
-
"null",
|
|
4446
|
-
"string"
|
|
4447
|
-
],
|
|
4448
|
-
"name": "version",
|
|
4449
|
-
"default": null,
|
|
4450
|
-
"doc": "Aspect version\n Initial implementation will use the aspect version's number, however stored as\n a string in the case where a different aspect versioning scheme is later adopted."
|
|
4451
|
-
}
|
|
4452
|
-
],
|
|
4453
|
-
"doc": "Metadata associated with each metadata change that is processed by the system"
|
|
4454
|
-
}
|
|
4465
|
+
"com.linkedin.pegasus2avro.mxe.SystemMetadata"
|
|
4455
4466
|
],
|
|
4456
4467
|
"name": "systemMetadata",
|
|
4457
4468
|
"default": null,
|
|
@@ -10156,6 +10167,9 @@
|
|
|
10156
10167
|
"doc": "Version of the MLModelDeployment"
|
|
10157
10168
|
},
|
|
10158
10169
|
{
|
|
10170
|
+
"Searchable": {
|
|
10171
|
+
"fieldName": "deploymentStatus"
|
|
10172
|
+
},
|
|
10159
10173
|
"type": [
|
|
10160
10174
|
"null",
|
|
10161
10175
|
{
|
|
@@ -15705,6 +15719,28 @@
|
|
|
15705
15719
|
],
|
|
15706
15720
|
"doc": "Key for a External AccessManagement"
|
|
15707
15721
|
},
|
|
15722
|
+
{
|
|
15723
|
+
"type": "record",
|
|
15724
|
+
"Aspect": {
|
|
15725
|
+
"name": "dataHubOpenAPISchemaKey",
|
|
15726
|
+
"keyForEntity": "dataHubOpenAPISchema",
|
|
15727
|
+
"entityCategory": "internal",
|
|
15728
|
+
"entityAspects": [
|
|
15729
|
+
"systemMetadata"
|
|
15730
|
+
],
|
|
15731
|
+
"entityDoc": "Contains aspects which are used in OpenAPI requests/responses which are not otherwise present in the data model."
|
|
15732
|
+
},
|
|
15733
|
+
"name": "DataHubOpenAPISchemaKey",
|
|
15734
|
+
"namespace": "com.linkedin.pegasus2avro.metadata.key",
|
|
15735
|
+
"fields": [
|
|
15736
|
+
{
|
|
15737
|
+
"type": "string",
|
|
15738
|
+
"name": "id",
|
|
15739
|
+
"doc": "A unique id for the DataHub OpenAPI schema."
|
|
15740
|
+
}
|
|
15741
|
+
],
|
|
15742
|
+
"doc": "Key for a Query"
|
|
15743
|
+
},
|
|
15708
15744
|
{
|
|
15709
15745
|
"type": "record",
|
|
15710
15746
|
"Aspect": {
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "dataHubOpenAPISchemaKey",
|
|
5
|
+
"keyForEntity": "dataHubOpenAPISchema",
|
|
6
|
+
"entityCategory": "internal",
|
|
7
|
+
"entityAspects": [
|
|
8
|
+
"systemMetadata"
|
|
9
|
+
],
|
|
10
|
+
"entityDoc": "Contains aspects which are used in OpenAPI requests/responses which are not otherwise present in the data model."
|
|
11
|
+
},
|
|
12
|
+
"name": "DataHubOpenAPISchemaKey",
|
|
13
|
+
"namespace": "com.linkedin.pegasus2avro.metadata.key",
|
|
14
|
+
"fields": [
|
|
15
|
+
{
|
|
16
|
+
"type": "string",
|
|
17
|
+
"name": "id",
|
|
18
|
+
"doc": "A unique id for the DataHub OpenAPI schema."
|
|
19
|
+
}
|
|
20
|
+
],
|
|
21
|
+
"doc": "Key for a Query"
|
|
22
|
+
}
|
|
@@ -31,12 +31,14 @@
|
|
|
31
31
|
"type": {
|
|
32
32
|
"type": "enum",
|
|
33
33
|
"symbolDocs": {
|
|
34
|
-
"SQL": "A SQL Query"
|
|
34
|
+
"SQL": "A SQL Query",
|
|
35
|
+
"UNKNOWN": "Unknown query language"
|
|
35
36
|
},
|
|
36
37
|
"name": "QueryLanguage",
|
|
37
38
|
"namespace": "com.linkedin.pegasus2avro.query",
|
|
38
39
|
"symbols": [
|
|
39
|
-
"SQL"
|
|
40
|
+
"SQL",
|
|
41
|
+
"UNKNOWN"
|
|
40
42
|
]
|
|
41
43
|
},
|
|
42
44
|
"name": "language",
|
|
@@ -53,6 +53,11 @@
|
|
|
53
53
|
"namespace": "com.linkedin.pegasus2avro.form",
|
|
54
54
|
"fields": [
|
|
55
55
|
{
|
|
56
|
+
"Searchable": {
|
|
57
|
+
"fieldName": "promptId",
|
|
58
|
+
"fieldType": "KEYWORD",
|
|
59
|
+
"queryByDefault": false
|
|
60
|
+
},
|
|
56
61
|
"type": "string",
|
|
57
62
|
"name": "id",
|
|
58
63
|
"doc": "The unique id for this prompt. This must be GLOBALLY unique."
|
|
@@ -6755,6 +6755,9 @@
|
|
|
6755
6755
|
"doc": "Version of the MLModelDeployment"
|
|
6756
6756
|
},
|
|
6757
6757
|
{
|
|
6758
|
+
"Searchable": {
|
|
6759
|
+
"fieldName": "deploymentStatus"
|
|
6760
|
+
},
|
|
6758
6761
|
"type": [
|
|
6759
6762
|
"null",
|
|
6760
6763
|
{
|
|
@@ -8116,6 +8119,9 @@
|
|
|
8116
8119
|
"null",
|
|
8117
8120
|
{
|
|
8118
8121
|
"type": "record",
|
|
8122
|
+
"Aspect": {
|
|
8123
|
+
"name": "systemMetadata"
|
|
8124
|
+
},
|
|
8119
8125
|
"name": "SystemMetadata",
|
|
8120
8126
|
"namespace": "com.linkedin.pegasus2avro.mxe",
|
|
8121
8127
|
"fields": [
|
|
@@ -36,12 +36,14 @@
|
|
|
36
36
|
"type": {
|
|
37
37
|
"type": "enum",
|
|
38
38
|
"symbolDocs": {
|
|
39
|
-
"SQL": "A SQL Query"
|
|
39
|
+
"SQL": "A SQL Query",
|
|
40
|
+
"UNKNOWN": "Unknown query language"
|
|
40
41
|
},
|
|
41
42
|
"name": "QueryLanguage",
|
|
42
43
|
"namespace": "com.linkedin.pegasus2avro.query",
|
|
43
44
|
"symbols": [
|
|
44
|
-
"SQL"
|
|
45
|
+
"SQL",
|
|
46
|
+
"UNKNOWN"
|
|
45
47
|
]
|
|
46
48
|
},
|
|
47
49
|
"name": "language",
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "systemMetadata"
|
|
5
|
+
},
|
|
6
|
+
"name": "SystemMetadata",
|
|
7
|
+
"namespace": "com.linkedin.pegasus2avro.mxe",
|
|
8
|
+
"fields": [
|
|
9
|
+
{
|
|
10
|
+
"type": [
|
|
11
|
+
"long",
|
|
12
|
+
"null"
|
|
13
|
+
],
|
|
14
|
+
"name": "lastObserved",
|
|
15
|
+
"default": 0,
|
|
16
|
+
"doc": "The timestamp the metadata was observed at"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"type": [
|
|
20
|
+
"string",
|
|
21
|
+
"null"
|
|
22
|
+
],
|
|
23
|
+
"name": "runId",
|
|
24
|
+
"default": "no-run-id-provided",
|
|
25
|
+
"doc": "The original run id that produced the metadata. Populated in case of batch-ingestion."
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"type": [
|
|
29
|
+
"string",
|
|
30
|
+
"null"
|
|
31
|
+
],
|
|
32
|
+
"name": "lastRunId",
|
|
33
|
+
"default": "no-run-id-provided",
|
|
34
|
+
"doc": "The last run id that produced the metadata. Populated in case of batch-ingestion."
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"type": [
|
|
38
|
+
"null",
|
|
39
|
+
"string"
|
|
40
|
+
],
|
|
41
|
+
"name": "pipelineName",
|
|
42
|
+
"default": null,
|
|
43
|
+
"doc": "The ingestion pipeline id that produced the metadata. Populated in case of batch ingestion."
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"type": [
|
|
47
|
+
"null",
|
|
48
|
+
"string"
|
|
49
|
+
],
|
|
50
|
+
"name": "registryName",
|
|
51
|
+
"default": null,
|
|
52
|
+
"doc": "The model registry name that was used to process this event"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"type": [
|
|
56
|
+
"null",
|
|
57
|
+
"string"
|
|
58
|
+
],
|
|
59
|
+
"name": "registryVersion",
|
|
60
|
+
"default": null,
|
|
61
|
+
"doc": "The model registry version that was used to process this event"
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"type": [
|
|
65
|
+
"null",
|
|
66
|
+
{
|
|
67
|
+
"type": "map",
|
|
68
|
+
"values": "string"
|
|
69
|
+
}
|
|
70
|
+
],
|
|
71
|
+
"name": "properties",
|
|
72
|
+
"default": null,
|
|
73
|
+
"doc": "Additional properties"
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"type": [
|
|
77
|
+
"null",
|
|
78
|
+
"string"
|
|
79
|
+
],
|
|
80
|
+
"name": "version",
|
|
81
|
+
"default": null,
|
|
82
|
+
"doc": "Aspect version\n Initial implementation will use the aspect version's number, however stored as\n a string in the case where a different aspect versioning scheme is later adopted."
|
|
83
|
+
}
|
|
84
|
+
],
|
|
85
|
+
"doc": "Metadata associated with each metadata change that is processed by the system"
|
|
86
|
+
}
|
datahub/sdk/search_client.py
CHANGED
|
@@ -2,36 +2,106 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import (
|
|
4
4
|
TYPE_CHECKING,
|
|
5
|
-
Dict,
|
|
6
5
|
Iterable,
|
|
7
6
|
List,
|
|
8
7
|
Optional,
|
|
8
|
+
Tuple,
|
|
9
|
+
Type,
|
|
10
|
+
TypeVar,
|
|
9
11
|
)
|
|
10
12
|
|
|
11
|
-
from datahub.ingestion.graph.filters import
|
|
13
|
+
from datahub.ingestion.graph.filters import RawSearchFilter, RemovedStatusFilter
|
|
12
14
|
from datahub.metadata.urns import Urn
|
|
13
|
-
from datahub.sdk.search_filters import
|
|
15
|
+
from datahub.sdk.search_filters import (
|
|
16
|
+
Filter,
|
|
17
|
+
FilterDsl,
|
|
18
|
+
_EntityTypeFilter,
|
|
19
|
+
_OrFilters,
|
|
20
|
+
_StatusFilter,
|
|
21
|
+
)
|
|
14
22
|
|
|
15
23
|
if TYPE_CHECKING:
|
|
16
24
|
from datahub.sdk.main_client import DataHubClient
|
|
17
25
|
|
|
18
26
|
|
|
27
|
+
_FilterType = TypeVar("_FilterType", bound=Filter)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _typed_dfs(
|
|
31
|
+
filter: Optional[_FilterType], type: Type[_FilterType]
|
|
32
|
+
) -> Optional[List[_FilterType]]:
|
|
33
|
+
if filter is None:
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
found: Optional[List[_FilterType]] = None
|
|
37
|
+
for f in filter.dfs():
|
|
38
|
+
if isinstance(f, type):
|
|
39
|
+
if found is None:
|
|
40
|
+
found = []
|
|
41
|
+
found.append(f)
|
|
42
|
+
return found
|
|
43
|
+
|
|
44
|
+
|
|
19
45
|
def compile_filters(
|
|
20
46
|
filter: Optional[Filter],
|
|
21
|
-
) -> Optional[List[
|
|
47
|
+
) -> Tuple[Optional[List[str]], RawSearchFilter]:
|
|
22
48
|
# TODO: Not every filter type is supported for every entity type.
|
|
23
49
|
# If we can detect issues with the filters at compile time, we should
|
|
24
50
|
# raise an error.
|
|
25
51
|
|
|
26
|
-
|
|
27
|
-
|
|
52
|
+
existing_soft_deleted_filter = _typed_dfs(filter, _StatusFilter)
|
|
53
|
+
if existing_soft_deleted_filter is None:
|
|
54
|
+
soft_deleted_filter = FilterDsl.soft_deleted(
|
|
55
|
+
RemovedStatusFilter.NOT_SOFT_DELETED
|
|
56
|
+
)
|
|
57
|
+
if filter is None:
|
|
58
|
+
filter = soft_deleted_filter
|
|
59
|
+
else:
|
|
60
|
+
filter = FilterDsl.and_(filter, soft_deleted_filter)
|
|
61
|
+
|
|
62
|
+
# This should be safe - if filter were None coming in, then we would replace it
|
|
63
|
+
# with the soft-deleted filter.
|
|
64
|
+
assert filter is not None
|
|
28
65
|
|
|
29
66
|
initial_filters = filter.compile()
|
|
30
|
-
|
|
67
|
+
|
|
68
|
+
compiled_filters: RawSearchFilter = [
|
|
31
69
|
{"and": [rule.to_raw() for rule in andClause["and"]]}
|
|
32
70
|
for andClause in initial_filters
|
|
33
71
|
]
|
|
34
72
|
|
|
73
|
+
entity_types = compute_entity_types(initial_filters)
|
|
74
|
+
|
|
75
|
+
return entity_types, compiled_filters
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def compute_entity_types(
|
|
79
|
+
filters: _OrFilters,
|
|
80
|
+
) -> Optional[List[str]]:
|
|
81
|
+
found_filters = False
|
|
82
|
+
found_positive_filters = False
|
|
83
|
+
entity_types: List[str] = []
|
|
84
|
+
for ands in filters:
|
|
85
|
+
for clause in ands["and"]:
|
|
86
|
+
if clause.field == _EntityTypeFilter.ENTITY_TYPE_FIELD:
|
|
87
|
+
found_filters = True
|
|
88
|
+
if not clause.negated:
|
|
89
|
+
found_positive_filters = True
|
|
90
|
+
|
|
91
|
+
entity_types.extend(clause.values)
|
|
92
|
+
|
|
93
|
+
if not found_filters:
|
|
94
|
+
# If we didn't find any filters, use None so we use the default set.
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
if not found_positive_filters:
|
|
98
|
+
# If we only found negated filters, then it's probably a query like
|
|
99
|
+
# "find me all entities except for dashboards". In that case, we
|
|
100
|
+
# still want to use the default set.
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
return entity_types
|
|
104
|
+
|
|
35
105
|
|
|
36
106
|
class SearchClient:
|
|
37
107
|
def __init__(self, client: DataHubClient):
|
|
@@ -43,8 +113,11 @@ class SearchClient:
|
|
|
43
113
|
filter: Optional[Filter] = None,
|
|
44
114
|
) -> Iterable[Urn]:
|
|
45
115
|
# TODO: Add better limit / pagination support.
|
|
116
|
+
types, compiled_filters = compile_filters(filter)
|
|
46
117
|
for urn in self._client._graph.get_urns_by_filter(
|
|
47
118
|
query=query,
|
|
48
|
-
|
|
119
|
+
status=None,
|
|
120
|
+
extra_or_filters=compiled_filters,
|
|
121
|
+
entity_types=types,
|
|
49
122
|
):
|
|
50
123
|
yield Urn.from_string(urn)
|