acryl-datahub 1.1.1rc3__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/METADATA +2559 -2532
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/RECORD +226 -190
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +2 -1
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +239 -0
- datahub/api/entities/external/external_tag.py +145 -0
- datahub/api/entities/external/lake_formation_external_entites.py +161 -0
- datahub/api/entities/external/restricted_text.py +247 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +124 -27
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +149 -227
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +12 -16
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +50 -7
- datahub/cli/specific/assertions_cli.py +0 -4
- datahub/cli/specific/datacontract_cli.py +0 -3
- datahub/cli/specific/dataproduct_cli.py +0 -11
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +0 -2
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/emitter/response_helper.py +86 -1
- datahub/emitter/rest_emitter.py +71 -13
- datahub/entrypoints.py +4 -3
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +332 -3
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/api/source.py +48 -44
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3449 -0
- datahub/ingestion/autogenerated/lineage.json +401 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +100 -15
- datahub/ingestion/graph/config.py +1 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
- datahub/ingestion/run/pipeline.py +54 -2
- datahub/ingestion/sink/datahub_rest.py +13 -0
- datahub/ingestion/source/abs/source.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +4 -0
- datahub/ingestion/source/aws/glue.py +489 -244
- datahub/ingestion/source/aws/tag_entities.py +292 -0
- datahub/ingestion/source/azure/azure_common.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
- datahub/ingestion/source/common/subtypes.py +45 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
- datahub/ingestion/source/datahub/config.py +11 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +187 -35
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
- datahub/ingestion/source/dbt/dbt_common.py +6 -2
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_config.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
- datahub/ingestion/source/dremio/dremio_source.py +94 -81
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/gcs/gcs_source.py +13 -2
- datahub/ingestion/source/ge_data_profiler.py +76 -28
- datahub/ingestion/source/ge_profiling_config.py +11 -0
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/iceberg/iceberg.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
- datahub/ingestion/source/looker/looker_source.py +1 -0
- datahub/ingestion/source/mlflow.py +11 -1
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +472 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/openapi.py +12 -0
- datahub/ingestion/source/openapi_parser.py +56 -37
- datahub/ingestion/source/powerbi/powerbi.py +1 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/preset.py +2 -2
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +21 -1
- datahub/ingestion/source/redshift/usage.py +4 -3
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +367 -115
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +43 -7
- datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
- datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
- datahub/ingestion/source/snowflake/snowflake_v2.py +33 -8
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +119 -11
- datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
- datahub/ingestion/source/sql/clickhouse.py +3 -1
- datahub/ingestion/source/sql/cockroachdb.py +0 -1
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive_metastore.py +3 -11
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/source.py +239 -34
- datahub/ingestion/source/sql/mysql.py +0 -1
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/postgres.py +0 -1
- datahub/ingestion/source/sql/sql_common.py +121 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +997 -235
- datahub/ingestion/source/sql/vertica.py +10 -6
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
- datahub/ingestion/source/superset.py +58 -3
- datahub/ingestion/source/tableau/tableau.py +58 -37
- datahub/ingestion/source/tableau/tableau_common.py +4 -2
- datahub/ingestion/source/tableau/tableau_constant.py +0 -4
- datahub/ingestion/source/unity/config.py +5 -0
- datahub/ingestion/source/unity/proxy.py +118 -0
- datahub/ingestion/source/unity/source.py +195 -17
- datahub/ingestion/source/unity/tag_entities.py +295 -0
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +1446 -559
- datahub/metadata/_urns/urn_defs.py +1721 -1553
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +18055 -17802
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +200 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +1 -0
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
- datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/LogicalParent.avsc +140 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/sdk/__init__.py +6 -0
- datahub/sdk/_all_entities.py +11 -0
- datahub/sdk/_shared.py +118 -1
- datahub/sdk/chart.py +315 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +432 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +8 -2
- datahub/sdk/entity_client.py +90 -2
- datahub/sdk/lineage_client.py +683 -82
- datahub/sdk/main_client.py +46 -16
- datahub/sdk/mlmodel.py +101 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +4 -3
- datahub/specific/chart.py +1 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
- datahub/sql_parsing/sqlglot_lineage.py +62 -13
- datahub/telemetry/telemetry.py +17 -11
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +46 -13
- datahub/utilities/server_config_util.py +8 -0
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
"glossaryTerms",
|
|
9
9
|
"editableMlModelGroupProperties",
|
|
10
10
|
"domains",
|
|
11
|
+
"applications",
|
|
11
12
|
"mlModelGroupProperties",
|
|
12
13
|
"ownership",
|
|
13
14
|
"status",
|
|
@@ -60,13 +61,17 @@
|
|
|
60
61
|
"DEV": "Designates development fabrics",
|
|
61
62
|
"EI": "Designates early-integration fabrics",
|
|
62
63
|
"NON_PROD": "Designates non-production fabrics",
|
|
64
|
+
"PRD": "Alternative Prod spelling",
|
|
63
65
|
"PRE": "Designates pre-production fabrics",
|
|
64
66
|
"PROD": "Designates production fabrics",
|
|
65
67
|
"QA": "Designates quality assurance fabrics",
|
|
66
68
|
"RVW": "Designates review fabrics",
|
|
67
69
|
"SANDBOX": "Designates sandbox fabrics",
|
|
70
|
+
"SBX": "Alternative spelling for sandbox",
|
|
71
|
+
"SIT": "System Integration Testing",
|
|
68
72
|
"STG": "Designates staging fabrics",
|
|
69
73
|
"TEST": "Designates testing fabrics",
|
|
74
|
+
"TST": "Alternative Test spelling",
|
|
70
75
|
"UAT": "Designates user acceptance testing fabrics"
|
|
71
76
|
},
|
|
72
77
|
"name": "FabricType",
|
|
@@ -83,6 +88,10 @@
|
|
|
83
88
|
"PROD",
|
|
84
89
|
"CORP",
|
|
85
90
|
"RVW",
|
|
91
|
+
"PRD",
|
|
92
|
+
"TST",
|
|
93
|
+
"SIT",
|
|
94
|
+
"SBX",
|
|
86
95
|
"SANDBOX"
|
|
87
96
|
],
|
|
88
97
|
"doc": "Fabric group type"
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
"glossaryTerms",
|
|
9
9
|
"editableMlModelProperties",
|
|
10
10
|
"domains",
|
|
11
|
+
"applications",
|
|
11
12
|
"ownership",
|
|
12
13
|
"mlModelProperties",
|
|
13
14
|
"intendedUse",
|
|
@@ -74,13 +75,17 @@
|
|
|
74
75
|
"DEV": "Designates development fabrics",
|
|
75
76
|
"EI": "Designates early-integration fabrics",
|
|
76
77
|
"NON_PROD": "Designates non-production fabrics",
|
|
78
|
+
"PRD": "Alternative Prod spelling",
|
|
77
79
|
"PRE": "Designates pre-production fabrics",
|
|
78
80
|
"PROD": "Designates production fabrics",
|
|
79
81
|
"QA": "Designates quality assurance fabrics",
|
|
80
82
|
"RVW": "Designates review fabrics",
|
|
81
83
|
"SANDBOX": "Designates sandbox fabrics",
|
|
84
|
+
"SBX": "Alternative spelling for sandbox",
|
|
85
|
+
"SIT": "System Integration Testing",
|
|
82
86
|
"STG": "Designates staging fabrics",
|
|
83
87
|
"TEST": "Designates testing fabrics",
|
|
88
|
+
"TST": "Alternative Test spelling",
|
|
84
89
|
"UAT": "Designates user acceptance testing fabrics"
|
|
85
90
|
},
|
|
86
91
|
"name": "FabricType",
|
|
@@ -97,6 +102,10 @@
|
|
|
97
102
|
"PROD",
|
|
98
103
|
"CORP",
|
|
99
104
|
"RVW",
|
|
105
|
+
"PRD",
|
|
106
|
+
"TST",
|
|
107
|
+
"SIT",
|
|
108
|
+
"SBX",
|
|
100
109
|
"SANDBOX"
|
|
101
110
|
],
|
|
102
111
|
"doc": "Fabric group type"
|
|
@@ -2424,13 +2424,17 @@
|
|
|
2424
2424
|
"DEV": "Designates development fabrics",
|
|
2425
2425
|
"EI": "Designates early-integration fabrics",
|
|
2426
2426
|
"NON_PROD": "Designates non-production fabrics",
|
|
2427
|
+
"PRD": "Alternative Prod spelling",
|
|
2427
2428
|
"PRE": "Designates pre-production fabrics",
|
|
2428
2429
|
"PROD": "Designates production fabrics",
|
|
2429
2430
|
"QA": "Designates quality assurance fabrics",
|
|
2430
2431
|
"RVW": "Designates review fabrics",
|
|
2431
2432
|
"SANDBOX": "Designates sandbox fabrics",
|
|
2433
|
+
"SBX": "Alternative spelling for sandbox",
|
|
2434
|
+
"SIT": "System Integration Testing",
|
|
2432
2435
|
"STG": "Designates staging fabrics",
|
|
2433
2436
|
"TEST": "Designates testing fabrics",
|
|
2437
|
+
"TST": "Alternative Test spelling",
|
|
2434
2438
|
"UAT": "Designates user acceptance testing fabrics"
|
|
2435
2439
|
},
|
|
2436
2440
|
"name": "FabricType",
|
|
@@ -2447,6 +2451,10 @@
|
|
|
2447
2451
|
"PROD",
|
|
2448
2452
|
"CORP",
|
|
2449
2453
|
"RVW",
|
|
2454
|
+
"PRD",
|
|
2455
|
+
"TST",
|
|
2456
|
+
"SIT",
|
|
2457
|
+
"SBX",
|
|
2450
2458
|
"SANDBOX"
|
|
2451
2459
|
],
|
|
2452
2460
|
"doc": "Fabric group type"
|
|
@@ -7733,13 +7741,15 @@
|
|
|
7733
7741
|
"type": "enum",
|
|
7734
7742
|
"symbolDocs": {
|
|
7735
7743
|
"EQUALS": "Whether the field matches the value",
|
|
7744
|
+
"NOT_EQUALS": "Whether the field does not match the value",
|
|
7736
7745
|
"STARTS_WITH": "Whether the field value starts with the value"
|
|
7737
7746
|
},
|
|
7738
7747
|
"name": "PolicyMatchCondition",
|
|
7739
7748
|
"namespace": "com.linkedin.pegasus2avro.policy",
|
|
7740
7749
|
"symbols": [
|
|
7741
7750
|
"EQUALS",
|
|
7742
|
-
"STARTS_WITH"
|
|
7751
|
+
"STARTS_WITH",
|
|
7752
|
+
"NOT_EQUALS"
|
|
7743
7753
|
],
|
|
7744
7754
|
"doc": "The matching condition in a filter criterion"
|
|
7745
7755
|
},
|
|
@@ -7761,6 +7771,15 @@
|
|
|
7761
7771
|
"name": "filter",
|
|
7762
7772
|
"default": null,
|
|
7763
7773
|
"doc": "Filter to apply privileges to"
|
|
7774
|
+
},
|
|
7775
|
+
{
|
|
7776
|
+
"type": [
|
|
7777
|
+
"null",
|
|
7778
|
+
"com.linkedin.pegasus2avro.policy.PolicyMatchFilter"
|
|
7779
|
+
],
|
|
7780
|
+
"name": "privilegeConstraints",
|
|
7781
|
+
"default": null,
|
|
7782
|
+
"doc": "Constraints around what sub-resources operations are allowed to modify, i.e. NOT_EQUALS - cannot modify a particular defined tag, EQUALS - can only modify a particular defined tag, STARTS_WITH - can only modify a tag starting with xyz"
|
|
7764
7783
|
}
|
|
7765
7784
|
],
|
|
7766
7785
|
"doc": "Information used to filter DataHub resource."
|
|
@@ -15,13 +15,6 @@
|
|
|
15
15
|
"namespace": "com.linkedin.pegasus2avro.query",
|
|
16
16
|
"fields": [
|
|
17
17
|
{
|
|
18
|
-
"Relationship": {
|
|
19
|
-
"entityTypes": [
|
|
20
|
-
"dataset",
|
|
21
|
-
"schemaField"
|
|
22
|
-
],
|
|
23
|
-
"name": "IsAssociatedWith"
|
|
24
|
-
},
|
|
25
18
|
"Searchable": {
|
|
26
19
|
"fieldName": "entities",
|
|
27
20
|
"fieldType": "URN"
|
|
@@ -32,11 +25,7 @@
|
|
|
32
25
|
"type": "string",
|
|
33
26
|
"name": "entity",
|
|
34
27
|
"doc": "An entity which is the subject of a query.",
|
|
35
|
-
"Urn": "Urn"
|
|
36
|
-
"entityTypes": [
|
|
37
|
-
"dataset",
|
|
38
|
-
"schemaField"
|
|
39
|
-
]
|
|
28
|
+
"Urn": "Urn"
|
|
40
29
|
}
|
|
41
30
|
],
|
|
42
31
|
"doc": "A single subject of a particular query.\nIn the future, we may evolve this model to include richer details\nabout the Query Subject in relation to the query."
|
datahub/sdk/__init__.py
CHANGED
|
@@ -18,9 +18,15 @@ from datahub.metadata.urns import (
|
|
|
18
18
|
SchemaFieldUrn,
|
|
19
19
|
TagUrn,
|
|
20
20
|
)
|
|
21
|
+
from datahub.sdk.chart import Chart
|
|
21
22
|
from datahub.sdk.container import Container
|
|
23
|
+
from datahub.sdk.dashboard import Dashboard
|
|
24
|
+
from datahub.sdk.dataflow import DataFlow
|
|
25
|
+
from datahub.sdk.datajob import DataJob
|
|
22
26
|
from datahub.sdk.dataset import Dataset
|
|
23
27
|
from datahub.sdk.main_client import DataHubClient
|
|
28
|
+
from datahub.sdk.mlmodel import MLModel
|
|
29
|
+
from datahub.sdk.mlmodelgroup import MLModelGroup
|
|
24
30
|
from datahub.sdk.search_filters import Filter, FilterDsl
|
|
25
31
|
|
|
26
32
|
# We want to print out the warning if people do `from datahub.sdk import X`.
|
datahub/sdk/_all_entities.py
CHANGED
|
@@ -1,19 +1,30 @@
|
|
|
1
1
|
from typing import Dict, List, Type
|
|
2
2
|
|
|
3
|
+
from datahub.sdk.chart import Chart
|
|
3
4
|
from datahub.sdk.container import Container
|
|
5
|
+
from datahub.sdk.dashboard import Dashboard
|
|
6
|
+
from datahub.sdk.dataflow import DataFlow
|
|
7
|
+
from datahub.sdk.datajob import DataJob
|
|
4
8
|
from datahub.sdk.dataset import Dataset
|
|
5
9
|
from datahub.sdk.entity import Entity
|
|
6
10
|
from datahub.sdk.mlmodel import MLModel
|
|
7
11
|
from datahub.sdk.mlmodelgroup import MLModelGroup
|
|
8
12
|
|
|
13
|
+
# Base entity classes that don't have circular dependencies
|
|
14
|
+
# Those that do are imported in the EntityClient where needed
|
|
9
15
|
# TODO: Is there a better way to declare this?
|
|
10
16
|
ENTITY_CLASSES_LIST: List[Type[Entity]] = [
|
|
11
17
|
Container,
|
|
12
18
|
Dataset,
|
|
13
19
|
MLModel,
|
|
14
20
|
MLModelGroup,
|
|
21
|
+
DataFlow,
|
|
22
|
+
DataJob,
|
|
23
|
+
Dashboard,
|
|
24
|
+
Chart,
|
|
15
25
|
]
|
|
16
26
|
|
|
27
|
+
# Create the mapping of entity types to classes
|
|
17
28
|
ENTITY_CLASSES: Dict[str, Type[Entity]] = {
|
|
18
29
|
cls.get_urn_type().ENTITY_TYPE: cls for cls in ENTITY_CLASSES_LIST
|
|
19
30
|
}
|
datahub/sdk/_shared.py
CHANGED
|
@@ -26,9 +26,12 @@ from datahub.emitter.mce_builder import (
|
|
|
26
26
|
from datahub.emitter.mcp_builder import ContainerKey
|
|
27
27
|
from datahub.errors import MultipleSubtypesWarning, SdkUsageError
|
|
28
28
|
from datahub.metadata.urns import (
|
|
29
|
+
ChartUrn,
|
|
29
30
|
ContainerUrn,
|
|
30
31
|
CorpGroupUrn,
|
|
31
32
|
CorpUserUrn,
|
|
33
|
+
DashboardUrn,
|
|
34
|
+
DataFlowUrn,
|
|
32
35
|
DataJobUrn,
|
|
33
36
|
DataPlatformInstanceUrn,
|
|
34
37
|
DataPlatformUrn,
|
|
@@ -37,6 +40,7 @@ from datahub.metadata.urns import (
|
|
|
37
40
|
DomainUrn,
|
|
38
41
|
GlossaryTermUrn,
|
|
39
42
|
OwnershipTypeUrn,
|
|
43
|
+
StructuredPropertyUrn,
|
|
40
44
|
TagUrn,
|
|
41
45
|
Urn,
|
|
42
46
|
VersionSetUrn,
|
|
@@ -47,12 +51,21 @@ from datahub.utilities.urns.error import InvalidUrnError
|
|
|
47
51
|
|
|
48
52
|
if TYPE_CHECKING:
|
|
49
53
|
from datahub.sdk.container import Container
|
|
50
|
-
|
|
51
54
|
UrnOrStr: TypeAlias = Union[Urn, str]
|
|
55
|
+
ChartUrnOrStr: TypeAlias = Union[str, ChartUrn]
|
|
52
56
|
DatasetUrnOrStr: TypeAlias = Union[str, DatasetUrn]
|
|
53
57
|
DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
|
|
58
|
+
DataflowUrnOrStr: TypeAlias = Union[str, DataFlowUrn]
|
|
59
|
+
DashboardUrnOrStr: TypeAlias = Union[str, DashboardUrn]
|
|
60
|
+
DataPlatformInstanceUrnOrStr: TypeAlias = Union[str, DataPlatformInstanceUrn]
|
|
61
|
+
DataPlatformUrnOrStr: TypeAlias = Union[str, DataPlatformUrn]
|
|
54
62
|
|
|
55
63
|
ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
|
|
64
|
+
StructuredPropertyUrnOrStr: TypeAlias = Union[str, StructuredPropertyUrn]
|
|
65
|
+
StructuredPropertyValueType: TypeAlias = Union[str, float, int]
|
|
66
|
+
StructuredPropertyInputType: TypeAlias = Dict[
|
|
67
|
+
StructuredPropertyUrnOrStr, Sequence[StructuredPropertyValueType]
|
|
68
|
+
]
|
|
56
69
|
|
|
57
70
|
TrainingMetricsInputType: TypeAlias = Union[
|
|
58
71
|
List[models.MLMetricClass], Dict[str, Optional[str]]
|
|
@@ -716,3 +729,107 @@ class HasVersion(Entity):
|
|
|
716
729
|
a for a in version_props.aliases if a.versionTag != alias
|
|
717
730
|
]
|
|
718
731
|
self._set_aspect(version_props)
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
class HasStructuredProperties(Entity):
|
|
735
|
+
"""
|
|
736
|
+
Mixin for entities that support structured properties
|
|
737
|
+
"""
|
|
738
|
+
|
|
739
|
+
__slots__ = ()
|
|
740
|
+
|
|
741
|
+
@property
|
|
742
|
+
def structured_properties(
|
|
743
|
+
self,
|
|
744
|
+
) -> Optional[List[models.StructuredPropertyValueAssignmentClass]]:
|
|
745
|
+
"""
|
|
746
|
+
Retrieve structured properties for the entity
|
|
747
|
+
|
|
748
|
+
Returns:
|
|
749
|
+
Optional list of structured property value assignments
|
|
750
|
+
"""
|
|
751
|
+
sp_aspect = self._get_aspect(models.StructuredPropertiesClass)
|
|
752
|
+
return sp_aspect.properties if sp_aspect else None
|
|
753
|
+
|
|
754
|
+
def _ensure_structured_properties(self) -> models.StructuredPropertiesClass:
|
|
755
|
+
"""
|
|
756
|
+
Ensure structured properties aspect exists, creating it if necessary
|
|
757
|
+
|
|
758
|
+
Returns:
|
|
759
|
+
StructuredPropertiesClass aspect
|
|
760
|
+
"""
|
|
761
|
+
return self._setdefault_aspect(models.StructuredPropertiesClass(properties=[]))
|
|
762
|
+
|
|
763
|
+
def set_structured_property(
|
|
764
|
+
self,
|
|
765
|
+
property_urn: StructuredPropertyUrnOrStr,
|
|
766
|
+
values: Sequence[StructuredPropertyValueType],
|
|
767
|
+
) -> None:
|
|
768
|
+
"""
|
|
769
|
+
Update an existing structured property or add if it doesn't exist
|
|
770
|
+
|
|
771
|
+
Args:
|
|
772
|
+
property_urn: URN of the structured property
|
|
773
|
+
values: List of values for the property
|
|
774
|
+
"""
|
|
775
|
+
# validate property_urn is a valid structured property urn
|
|
776
|
+
property_urn = StructuredPropertyUrn.from_string(property_urn)
|
|
777
|
+
|
|
778
|
+
properties = self._ensure_structured_properties()
|
|
779
|
+
|
|
780
|
+
# Find existing property assignment
|
|
781
|
+
existing_prop = next(
|
|
782
|
+
(
|
|
783
|
+
prop
|
|
784
|
+
for prop in properties.properties
|
|
785
|
+
if prop.propertyUrn == str(property_urn)
|
|
786
|
+
),
|
|
787
|
+
None,
|
|
788
|
+
)
|
|
789
|
+
current_timestamp = make_ts_millis(datetime.now())
|
|
790
|
+
|
|
791
|
+
if existing_prop:
|
|
792
|
+
# Update existing property
|
|
793
|
+
existing_prop.values = list(values)
|
|
794
|
+
existing_prop.lastModified = models.AuditStampClass(
|
|
795
|
+
time=current_timestamp,
|
|
796
|
+
actor=DEFAULT_ACTOR_URN,
|
|
797
|
+
)
|
|
798
|
+
else:
|
|
799
|
+
# Create new property assignment
|
|
800
|
+
new_property = models.StructuredPropertyValueAssignmentClass(
|
|
801
|
+
propertyUrn=str(property_urn),
|
|
802
|
+
values=list(values),
|
|
803
|
+
created=models.AuditStampClass(
|
|
804
|
+
time=current_timestamp,
|
|
805
|
+
actor=DEFAULT_ACTOR_URN,
|
|
806
|
+
),
|
|
807
|
+
lastModified=models.AuditStampClass(
|
|
808
|
+
time=current_timestamp,
|
|
809
|
+
actor=DEFAULT_ACTOR_URN,
|
|
810
|
+
),
|
|
811
|
+
)
|
|
812
|
+
add_list_unique(
|
|
813
|
+
properties.properties,
|
|
814
|
+
key=lambda prop: prop.propertyUrn,
|
|
815
|
+
item=new_property,
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
self._set_aspect(properties)
|
|
819
|
+
|
|
820
|
+
def remove_structured_property(
|
|
821
|
+
self, property_urn: StructuredPropertyUrnOrStr
|
|
822
|
+
) -> None:
|
|
823
|
+
"""
|
|
824
|
+
Remove a structured property from the entity
|
|
825
|
+
|
|
826
|
+
Args:
|
|
827
|
+
property_urn: URN of the structured property to remove
|
|
828
|
+
"""
|
|
829
|
+
remove_list_unique(
|
|
830
|
+
self._ensure_structured_properties().properties,
|
|
831
|
+
key=lambda prop: prop.propertyUrn,
|
|
832
|
+
item=models.StructuredPropertyValueAssignmentClass(
|
|
833
|
+
propertyUrn=str(property_urn), values=[]
|
|
834
|
+
),
|
|
835
|
+
)
|