acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/METADATA +2511 -2484
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/RECORD +223 -189
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -1
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +239 -0
- datahub/api/entities/external/external_tag.py +145 -0
- datahub/api/entities/external/lake_formation_external_entites.py +161 -0
- datahub/api/entities/external/restricted_text.py +247 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +124 -27
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +149 -227
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +50 -7
- datahub/cli/specific/assertions_cli.py +0 -4
- datahub/cli/specific/datacontract_cli.py +0 -3
- datahub/cli/specific/dataproduct_cli.py +0 -11
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +0 -2
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/pydantic_migration_helpers.py +7 -5
- datahub/emitter/rest_emitter.py +70 -12
- datahub/entrypoints.py +4 -3
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +332 -3
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/api/source.py +48 -44
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3449 -0
- datahub/ingestion/autogenerated/lineage.json +401 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +100 -15
- datahub/ingestion/graph/config.py +1 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
- datahub/ingestion/run/pipeline.py +54 -2
- datahub/ingestion/sink/datahub_rest.py +13 -0
- datahub/ingestion/source/abs/source.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +4 -0
- datahub/ingestion/source/aws/glue.py +489 -244
- datahub/ingestion/source/aws/tag_entities.py +292 -0
- datahub/ingestion/source/azure/azure_common.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
- datahub/ingestion/source/common/subtypes.py +45 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
- datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
- datahub/ingestion/source/dbt/dbt_common.py +6 -2
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_config.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
- datahub/ingestion/source/dremio/dremio_source.py +94 -81
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/gcs/gcs_source.py +13 -2
- datahub/ingestion/source/ge_data_profiler.py +76 -28
- datahub/ingestion/source/ge_profiling_config.py +11 -0
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/iceberg/iceberg.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
- datahub/ingestion/source/looker/looker_source.py +1 -0
- datahub/ingestion/source/mlflow.py +11 -1
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +507 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/preset.py +2 -2
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +21 -1
- datahub/ingestion/source/redshift/usage.py +4 -3
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +367 -115
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
- datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
- datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
- datahub/ingestion/source/snowflake/snowflake_v2.py +16 -2
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +119 -11
- datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
- datahub/ingestion/source/sql/clickhouse.py +3 -1
- datahub/ingestion/source/sql/cockroachdb.py +0 -1
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive_metastore.py +3 -11
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/source.py +239 -34
- datahub/ingestion/source/sql/mysql.py +0 -1
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/postgres.py +0 -1
- datahub/ingestion/source/sql/sql_common.py +121 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +997 -235
- datahub/ingestion/source/sql/vertica.py +10 -6
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
- datahub/ingestion/source/superset.py +58 -3
- datahub/ingestion/source/tableau/tableau.py +58 -37
- datahub/ingestion/source/tableau/tableau_common.py +4 -2
- datahub/ingestion/source/tableau/tableau_constant.py +0 -4
- datahub/ingestion/source/unity/config.py +5 -0
- datahub/ingestion/source/unity/proxy.py +118 -0
- datahub/ingestion/source/unity/source.py +195 -17
- datahub/ingestion/source/unity/tag_entities.py +295 -0
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +1522 -569
- datahub/metadata/_urns/urn_defs.py +1826 -1658
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +29 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +17758 -17097
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +237 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +1 -0
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
- datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/LogicalParent.avsc +140 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +116 -0
- datahub/sdk/chart.py +315 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +432 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +8 -2
- datahub/sdk/entity_client.py +82 -2
- datahub/sdk/lineage_client.py +683 -82
- datahub/sdk/main_client.py +46 -16
- datahub/sdk/mlmodel.py +101 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +4 -3
- datahub/sdk/search_filters.py +95 -27
- datahub/specific/chart.py +1 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
- datahub/sql_parsing/sqlglot_lineage.py +62 -13
- datahub/telemetry/telemetry.py +17 -11
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +56 -14
- datahub/utilities/server_config_util.py +8 -0
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/top_level.txt +0 -0
datahub/sdk/main_client.py
CHANGED
|
@@ -7,16 +7,8 @@ from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
|
7
7
|
from datahub.ingestion.graph.config import ClientMode, DatahubClientConfig
|
|
8
8
|
from datahub.sdk.entity_client import EntityClient
|
|
9
9
|
from datahub.sdk.lineage_client import LineageClient
|
|
10
|
-
from datahub.sdk.resolver_client import ResolverClient
|
|
11
10
|
from datahub.sdk.search_client import SearchClient
|
|
12
11
|
|
|
13
|
-
try:
|
|
14
|
-
from acryl_datahub_cloud._sdk_extras import ( # type: ignore[import-not-found]
|
|
15
|
-
AssertionsClient,
|
|
16
|
-
)
|
|
17
|
-
except ImportError:
|
|
18
|
-
AssertionsClient = None
|
|
19
|
-
|
|
20
12
|
|
|
21
13
|
class DataHubClient:
|
|
22
14
|
"""Main client for interacting with DataHub.
|
|
@@ -74,7 +66,12 @@ class DataHubClient:
|
|
|
74
66
|
self._graph.test_connection()
|
|
75
67
|
|
|
76
68
|
@classmethod
|
|
77
|
-
def from_env(
|
|
69
|
+
def from_env(
|
|
70
|
+
cls,
|
|
71
|
+
*,
|
|
72
|
+
client_mode: ClientMode = ClientMode.SDK,
|
|
73
|
+
datahub_component: Optional[str] = None,
|
|
74
|
+
) -> "DataHubClient":
|
|
78
75
|
"""Initialize a DataHubClient from the environment variables or ~/.datahubenv file.
|
|
79
76
|
|
|
80
77
|
This will first check DATAHUB_GMS_URL and DATAHUB_GMS_TOKEN. If not present,
|
|
@@ -84,6 +81,10 @@ class DataHubClient:
|
|
|
84
81
|
If you're looking to specify the server/token in code, use the
|
|
85
82
|
DataHubClient(server=..., token=...) constructor instead.
|
|
86
83
|
|
|
84
|
+
Args:
|
|
85
|
+
client_mode: [internal] The client mode to use. Defaults to "SDK".
|
|
86
|
+
datahub_component: [internal] The DataHub component name to include in the user agent.
|
|
87
|
+
|
|
87
88
|
Returns:
|
|
88
89
|
A DataHubClient instance.
|
|
89
90
|
"""
|
|
@@ -91,7 +92,10 @@ class DataHubClient:
|
|
|
91
92
|
# Inspired by the DockerClient.from_env() method.
|
|
92
93
|
# TODO: This one also reads from ~/.datahubenv, so the "from_env" name might be a bit confusing.
|
|
93
94
|
# That file is part of the "environment", but is not a traditional "env variable".
|
|
94
|
-
graph = get_default_graph(
|
|
95
|
+
graph = get_default_graph(
|
|
96
|
+
client_mode=client_mode,
|
|
97
|
+
datahub_component=datahub_component,
|
|
98
|
+
)
|
|
95
99
|
|
|
96
100
|
return cls(graph=graph)
|
|
97
101
|
|
|
@@ -100,7 +104,15 @@ class DataHubClient:
|
|
|
100
104
|
return EntityClient(self)
|
|
101
105
|
|
|
102
106
|
@property
|
|
103
|
-
def resolve(self)
|
|
107
|
+
def resolve(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
|
|
108
|
+
try:
|
|
109
|
+
from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
|
|
110
|
+
ResolverClient,
|
|
111
|
+
)
|
|
112
|
+
except ImportError:
|
|
113
|
+
from datahub.sdk.resolver_client import ( # type: ignore[assignment] # If the client is not installed, use the one from the SDK
|
|
114
|
+
ResolverClient,
|
|
115
|
+
)
|
|
104
116
|
return ResolverClient(self)
|
|
105
117
|
|
|
106
118
|
@property
|
|
@@ -112,9 +124,27 @@ class DataHubClient:
|
|
|
112
124
|
return LineageClient(self)
|
|
113
125
|
|
|
114
126
|
@property
|
|
115
|
-
def assertions(self)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
)
|
|
127
|
+
def assertions(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
|
|
128
|
+
try:
|
|
129
|
+
from acryl_datahub_cloud.sdk import AssertionsClient
|
|
130
|
+
except ImportError as e:
|
|
131
|
+
if "acryl_datahub_cloud" in str(e):
|
|
132
|
+
raise SdkUsageError(
|
|
133
|
+
"AssertionsClient is not installed, please install it with `pip install acryl-datahub-cloud`"
|
|
134
|
+
) from e
|
|
135
|
+
else:
|
|
136
|
+
raise e
|
|
120
137
|
return AssertionsClient(self)
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def subscriptions(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
|
|
141
|
+
try:
|
|
142
|
+
from acryl_datahub_cloud.sdk import SubscriptionClient
|
|
143
|
+
except ImportError as e:
|
|
144
|
+
if "acryl_datahub_cloud" in str(e):
|
|
145
|
+
raise SdkUsageError(
|
|
146
|
+
"SubscriptionClient is not installed, please install it with `pip install acryl-datahub-cloud`"
|
|
147
|
+
) from e
|
|
148
|
+
else:
|
|
149
|
+
raise e
|
|
150
|
+
return SubscriptionClient(self)
|
datahub/sdk/mlmodel.py
CHANGED
|
@@ -24,6 +24,7 @@ from datahub.sdk._shared import (
|
|
|
24
24
|
HasInstitutionalMemory,
|
|
25
25
|
HasOwnership,
|
|
26
26
|
HasPlatformInstance,
|
|
27
|
+
HasStructuredProperties,
|
|
27
28
|
HasTags,
|
|
28
29
|
HasTerms,
|
|
29
30
|
HasVersion,
|
|
@@ -31,6 +32,7 @@ from datahub.sdk._shared import (
|
|
|
31
32
|
LinksInputType,
|
|
32
33
|
MLTrainingJobInputType,
|
|
33
34
|
OwnersInputType,
|
|
35
|
+
StructuredPropertyInputType,
|
|
34
36
|
TagsInputType,
|
|
35
37
|
TermsInputType,
|
|
36
38
|
TrainingMetricsInputType,
|
|
@@ -50,6 +52,7 @@ class MLModel(
|
|
|
50
52
|
HasTerms,
|
|
51
53
|
HasDomain,
|
|
52
54
|
HasVersion,
|
|
55
|
+
HasStructuredProperties,
|
|
53
56
|
Entity,
|
|
54
57
|
):
|
|
55
58
|
__slots__ = ()
|
|
@@ -82,53 +85,43 @@ class MLModel(
|
|
|
82
85
|
model_group: Optional[Union[str, MlModelGroupUrn]] = None,
|
|
83
86
|
training_jobs: Optional[MLTrainingJobInputType] = None,
|
|
84
87
|
downstream_jobs: Optional[MLTrainingJobInputType] = None,
|
|
88
|
+
structured_properties: Optional[StructuredPropertyInputType] = None,
|
|
85
89
|
extra_aspects: ExtraAspectsType = None,
|
|
86
90
|
):
|
|
87
91
|
urn = MlModelUrn(platform=platform, name=id, env=env)
|
|
88
92
|
super().__init__(urn)
|
|
89
93
|
self._set_extra_aspects(extra_aspects)
|
|
90
|
-
|
|
91
94
|
self._set_platform_instance(urn.platform, platform_instance)
|
|
92
|
-
|
|
93
95
|
self._ensure_model_props()
|
|
94
96
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
self.set_hyper_params(hyper_params)
|
|
107
|
-
if external_url is not None:
|
|
108
|
-
self.set_external_url(external_url)
|
|
109
|
-
if custom_properties is not None:
|
|
110
|
-
self.set_custom_properties(custom_properties)
|
|
111
|
-
if created is not None:
|
|
112
|
-
self.set_created(created)
|
|
113
|
-
if last_modified is not None:
|
|
114
|
-
self.set_last_modified(last_modified)
|
|
97
|
+
# Initialize properties in logical groups
|
|
98
|
+
self._init_basic_properties(
|
|
99
|
+
version=version,
|
|
100
|
+
name=name,
|
|
101
|
+
aliases=aliases,
|
|
102
|
+
description=description,
|
|
103
|
+
external_url=external_url,
|
|
104
|
+
custom_properties=custom_properties,
|
|
105
|
+
created=created,
|
|
106
|
+
last_modified=last_modified,
|
|
107
|
+
)
|
|
115
108
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
109
|
+
self._init_ml_specific_properties(
|
|
110
|
+
training_metrics=training_metrics,
|
|
111
|
+
hyper_params=hyper_params,
|
|
112
|
+
model_group=model_group,
|
|
113
|
+
training_jobs=training_jobs,
|
|
114
|
+
downstream_jobs=downstream_jobs,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
self._init_metadata_properties(
|
|
118
|
+
owners=owners,
|
|
119
|
+
links=links,
|
|
120
|
+
tags=tags,
|
|
121
|
+
terms=terms,
|
|
122
|
+
domain=domain,
|
|
123
|
+
structured_properties=structured_properties,
|
|
124
|
+
)
|
|
132
125
|
|
|
133
126
|
@classmethod
|
|
134
127
|
def _new_from_graph(cls, urn: Urn, current_aspects: AspectBag) -> Self:
|
|
@@ -299,3 +292,73 @@ class MLModel(
|
|
|
299
292
|
props.downstreamJobs = [
|
|
300
293
|
job for job in props.downstreamJobs if job != job_str
|
|
301
294
|
]
|
|
295
|
+
|
|
296
|
+
def _init_basic_properties(
|
|
297
|
+
self,
|
|
298
|
+
version: Optional[str] = None,
|
|
299
|
+
name: Optional[str] = None,
|
|
300
|
+
aliases: Optional[List[str]] = None,
|
|
301
|
+
description: Optional[str] = None,
|
|
302
|
+
external_url: Optional[str] = None,
|
|
303
|
+
custom_properties: Optional[Dict[str, str]] = None,
|
|
304
|
+
created: Optional[datetime] = None,
|
|
305
|
+
last_modified: Optional[datetime] = None,
|
|
306
|
+
) -> None:
|
|
307
|
+
if version is not None:
|
|
308
|
+
self.set_version(version)
|
|
309
|
+
if name is not None:
|
|
310
|
+
self.set_name(name)
|
|
311
|
+
if aliases is not None:
|
|
312
|
+
self.set_version_aliases(aliases)
|
|
313
|
+
if description is not None:
|
|
314
|
+
self.set_description(description)
|
|
315
|
+
if external_url is not None:
|
|
316
|
+
self.set_external_url(external_url)
|
|
317
|
+
if custom_properties is not None:
|
|
318
|
+
self.set_custom_properties(custom_properties)
|
|
319
|
+
if created is not None:
|
|
320
|
+
self.set_created(created)
|
|
321
|
+
if last_modified is not None:
|
|
322
|
+
self.set_last_modified(last_modified)
|
|
323
|
+
|
|
324
|
+
def _init_ml_specific_properties(
|
|
325
|
+
self,
|
|
326
|
+
training_metrics: Optional[TrainingMetricsInputType] = None,
|
|
327
|
+
hyper_params: Optional[HyperParamsInputType] = None,
|
|
328
|
+
model_group: Optional[Union[str, MlModelGroupUrn]] = None,
|
|
329
|
+
training_jobs: Optional[MLTrainingJobInputType] = None,
|
|
330
|
+
downstream_jobs: Optional[MLTrainingJobInputType] = None,
|
|
331
|
+
) -> None:
|
|
332
|
+
if training_metrics is not None:
|
|
333
|
+
self.set_training_metrics(training_metrics)
|
|
334
|
+
if hyper_params is not None:
|
|
335
|
+
self.set_hyper_params(hyper_params)
|
|
336
|
+
if model_group is not None:
|
|
337
|
+
self.set_model_group(model_group)
|
|
338
|
+
if training_jobs is not None:
|
|
339
|
+
self.set_training_jobs(training_jobs)
|
|
340
|
+
if downstream_jobs is not None:
|
|
341
|
+
self.set_downstream_jobs(downstream_jobs)
|
|
342
|
+
|
|
343
|
+
def _init_metadata_properties(
|
|
344
|
+
self,
|
|
345
|
+
owners: Optional[OwnersInputType] = None,
|
|
346
|
+
links: Optional[LinksInputType] = None,
|
|
347
|
+
tags: Optional[TagsInputType] = None,
|
|
348
|
+
terms: Optional[TermsInputType] = None,
|
|
349
|
+
domain: Optional[DomainInputType] = None,
|
|
350
|
+
structured_properties: Optional[StructuredPropertyInputType] = None,
|
|
351
|
+
) -> None:
|
|
352
|
+
if owners is not None:
|
|
353
|
+
self.set_owners(owners)
|
|
354
|
+
if links is not None:
|
|
355
|
+
self.set_links(links)
|
|
356
|
+
if tags is not None:
|
|
357
|
+
self.set_tags(tags)
|
|
358
|
+
if terms is not None:
|
|
359
|
+
self.set_terms(terms)
|
|
360
|
+
if domain is not None:
|
|
361
|
+
self.set_domain(domain)
|
|
362
|
+
if structured_properties is not None:
|
|
363
|
+
for key, value in structured_properties.items():
|
|
364
|
+
self.set_structured_property(property_urn=key, values=value)
|
datahub/sdk/mlmodelgroup.py
CHANGED
|
@@ -17,10 +17,12 @@ from datahub.sdk._shared import (
|
|
|
17
17
|
HasInstitutionalMemory,
|
|
18
18
|
HasOwnership,
|
|
19
19
|
HasPlatformInstance,
|
|
20
|
+
HasStructuredProperties,
|
|
20
21
|
HasTags,
|
|
21
22
|
HasTerms,
|
|
22
23
|
LinksInputType,
|
|
23
24
|
OwnersInputType,
|
|
25
|
+
StructuredPropertyInputType,
|
|
24
26
|
TagsInputType,
|
|
25
27
|
TermsInputType,
|
|
26
28
|
make_time_stamp,
|
|
@@ -36,6 +38,7 @@ class MLModelGroup(
|
|
|
36
38
|
HasTags,
|
|
37
39
|
HasTerms,
|
|
38
40
|
HasDomain,
|
|
41
|
+
HasStructuredProperties,
|
|
39
42
|
Entity,
|
|
40
43
|
):
|
|
41
44
|
__slots__ = ()
|
|
@@ -66,6 +69,7 @@ class MLModelGroup(
|
|
|
66
69
|
domain: Optional[DomainInputType] = None,
|
|
67
70
|
training_jobs: Optional[Sequence[Union[str, DataProcessInstanceUrn]]] = None,
|
|
68
71
|
downstream_jobs: Optional[Sequence[Union[str, DataProcessInstanceUrn]]] = None,
|
|
72
|
+
structured_properties: Optional[StructuredPropertyInputType] = None,
|
|
69
73
|
extra_aspects: ExtraAspectsType = None,
|
|
70
74
|
):
|
|
71
75
|
urn = MlModelGroupUrn(platform=platform, name=id, env=env)
|
|
@@ -105,6 +109,9 @@ class MLModelGroup(
|
|
|
105
109
|
self.set_training_jobs(training_jobs)
|
|
106
110
|
if downstream_jobs is not None:
|
|
107
111
|
self.set_downstream_jobs(downstream_jobs)
|
|
112
|
+
if structured_properties is not None:
|
|
113
|
+
for key, value in structured_properties.items():
|
|
114
|
+
self.set_structured_property(property_urn=key, values=value)
|
|
108
115
|
|
|
109
116
|
@classmethod
|
|
110
117
|
def _new_from_graph(cls, urn: Urn, current_aspects: AspectBag) -> Self:
|
datahub/sdk/search_client.py
CHANGED
|
@@ -19,6 +19,7 @@ from datahub.sdk.search_filters import (
|
|
|
19
19
|
_OrFilters,
|
|
20
20
|
_StatusFilter,
|
|
21
21
|
)
|
|
22
|
+
from datahub.utilities.ordered_set import OrderedSet
|
|
22
23
|
|
|
23
24
|
if TYPE_CHECKING:
|
|
24
25
|
from datahub.sdk.main_client import DataHubClient
|
|
@@ -80,7 +81,7 @@ def compute_entity_types(
|
|
|
80
81
|
) -> Optional[List[str]]:
|
|
81
82
|
found_filters = False
|
|
82
83
|
found_positive_filters = False
|
|
83
|
-
entity_types:
|
|
84
|
+
entity_types: OrderedSet[str] = OrderedSet()
|
|
84
85
|
for ands in filters:
|
|
85
86
|
for clause in ands["and"]:
|
|
86
87
|
if clause.field == _EntityTypeFilter.ENTITY_TYPE_FIELD:
|
|
@@ -88,7 +89,7 @@ def compute_entity_types(
|
|
|
88
89
|
if not clause.negated:
|
|
89
90
|
found_positive_filters = True
|
|
90
91
|
|
|
91
|
-
entity_types.
|
|
92
|
+
entity_types.update(clause.values)
|
|
92
93
|
|
|
93
94
|
if not found_filters:
|
|
94
95
|
# If we didn't find any filters, use None so we use the default set.
|
|
@@ -100,7 +101,7 @@ def compute_entity_types(
|
|
|
100
101
|
# still want to use the default set.
|
|
101
102
|
return None
|
|
102
103
|
|
|
103
|
-
return entity_types
|
|
104
|
+
return list(entity_types)
|
|
104
105
|
|
|
105
106
|
|
|
106
107
|
class SearchClient:
|
datahub/sdk/search_filters.py
CHANGED
|
@@ -2,6 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
4
|
from typing import (
|
|
5
|
+
TYPE_CHECKING,
|
|
6
|
+
Annotated,
|
|
5
7
|
Any,
|
|
6
8
|
ClassVar,
|
|
7
9
|
Iterator,
|
|
@@ -15,7 +17,10 @@ from typing import (
|
|
|
15
17
|
import pydantic
|
|
16
18
|
|
|
17
19
|
from datahub.configuration.common import ConfigModel
|
|
18
|
-
from datahub.configuration.pydantic_migration_helpers import
|
|
20
|
+
from datahub.configuration.pydantic_migration_helpers import (
|
|
21
|
+
PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR,
|
|
22
|
+
PYDANTIC_VERSION_2,
|
|
23
|
+
)
|
|
19
24
|
from datahub.ingestion.graph.client import flexible_entity_type_to_graphql
|
|
20
25
|
from datahub.ingestion.graph.filters import (
|
|
21
26
|
FilterOperator,
|
|
@@ -42,12 +47,29 @@ class _BaseFilter(ConfigModel):
|
|
|
42
47
|
populate_by_name = True
|
|
43
48
|
|
|
44
49
|
@abc.abstractmethod
|
|
45
|
-
def compile(self) -> _OrFilters:
|
|
46
|
-
pass
|
|
50
|
+
def compile(self) -> _OrFilters: ...
|
|
47
51
|
|
|
48
52
|
def dfs(self) -> Iterator[_BaseFilter]:
|
|
49
53
|
yield self
|
|
50
54
|
|
|
55
|
+
@classmethod
|
|
56
|
+
def _field_discriminator(cls) -> str:
|
|
57
|
+
if cls is _BaseFilter:
|
|
58
|
+
raise ValueError("Cannot get discriminator for _BaseFilter")
|
|
59
|
+
if PYDANTIC_VERSION_2:
|
|
60
|
+
fields: dict = cls.model_fields # type: ignore
|
|
61
|
+
else:
|
|
62
|
+
fields = cls.__fields__ # type: ignore
|
|
63
|
+
|
|
64
|
+
# Assumes that there's only one field name per filter.
|
|
65
|
+
# If that's not the case, this method should be overridden.
|
|
66
|
+
if len(fields.keys()) != 1:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"Found multiple fields that could be the discriminator for this filter: {list(fields.keys())}"
|
|
69
|
+
)
|
|
70
|
+
name, field = next(iter(fields.items()))
|
|
71
|
+
return field.alias or name # type: ignore
|
|
72
|
+
|
|
51
73
|
|
|
52
74
|
class _EntityTypeFilter(_BaseFilter):
|
|
53
75
|
"""Filter for specific entity types.
|
|
@@ -74,15 +96,19 @@ class _EntityTypeFilter(_BaseFilter):
|
|
|
74
96
|
|
|
75
97
|
|
|
76
98
|
class _EntitySubtypeFilter(_BaseFilter):
|
|
77
|
-
entity_subtype: str = pydantic.Field(
|
|
99
|
+
entity_subtype: List[str] = pydantic.Field(
|
|
78
100
|
description="The entity subtype to filter on. Can be 'Table', 'View', 'Source', etc. depending on the native platform's concepts.",
|
|
79
101
|
)
|
|
80
102
|
|
|
103
|
+
@pydantic.validator("entity_subtype", pre=True)
|
|
104
|
+
def validate_entity_subtype(cls, v: str) -> List[str]:
|
|
105
|
+
return [v] if not isinstance(v, list) else v
|
|
106
|
+
|
|
81
107
|
def _build_rule(self) -> SearchFilterRule:
|
|
82
108
|
return SearchFilterRule(
|
|
83
109
|
field="typeNames",
|
|
84
110
|
condition="EQUAL",
|
|
85
|
-
values=
|
|
111
|
+
values=self.entity_subtype,
|
|
86
112
|
)
|
|
87
113
|
|
|
88
114
|
def compile(self) -> _OrFilters:
|
|
@@ -196,6 +222,10 @@ class _CustomCondition(_BaseFilter):
|
|
|
196
222
|
)
|
|
197
223
|
return [{"and": [rule]}]
|
|
198
224
|
|
|
225
|
+
@classmethod
|
|
226
|
+
def _field_discriminator(cls) -> str:
|
|
227
|
+
return "_custom"
|
|
228
|
+
|
|
199
229
|
|
|
200
230
|
class _And(_BaseFilter):
|
|
201
231
|
"""Represents an AND conjunction of filters."""
|
|
@@ -302,31 +332,69 @@ class _Not(_BaseFilter):
|
|
|
302
332
|
yield from self.not_.dfs()
|
|
303
333
|
|
|
304
334
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
335
|
+
def _filter_discriminator(v: Any) -> Optional[str]:
|
|
336
|
+
if isinstance(v, _BaseFilter):
|
|
337
|
+
return v._field_discriminator()
|
|
338
|
+
|
|
339
|
+
if not isinstance(v, dict):
|
|
340
|
+
return None
|
|
341
|
+
|
|
342
|
+
keys = list(v.keys())
|
|
343
|
+
if len(keys) == 1:
|
|
344
|
+
return keys[0]
|
|
345
|
+
elif set(keys).issuperset({"field", "condition"}):
|
|
346
|
+
return _CustomCondition._field_discriminator()
|
|
347
|
+
|
|
348
|
+
return None
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
|
|
352
|
+
# The `not TYPE_CHECKING` bit is required to make the linter happy,
|
|
353
|
+
# since we currently only run mypy with pydantic v1.
|
|
354
|
+
Filter = Union[
|
|
355
|
+
_And,
|
|
356
|
+
_Or,
|
|
357
|
+
_Not,
|
|
358
|
+
_EntityTypeFilter,
|
|
359
|
+
_EntitySubtypeFilter,
|
|
360
|
+
_StatusFilter,
|
|
361
|
+
_PlatformFilter,
|
|
362
|
+
_DomainFilter,
|
|
363
|
+
_EnvFilter,
|
|
364
|
+
_CustomCondition,
|
|
365
|
+
]
|
|
366
|
+
|
|
327
367
|
_And.update_forward_refs()
|
|
328
368
|
_Or.update_forward_refs()
|
|
329
369
|
_Not.update_forward_refs()
|
|
370
|
+
else:
|
|
371
|
+
from pydantic import Discriminator, Tag
|
|
372
|
+
|
|
373
|
+
# TODO: Once we're fully on pydantic 2, we can use a RootModel here.
|
|
374
|
+
# That way we'd be able to attach methods to the Filter type.
|
|
375
|
+
# e.g. replace load_filters(...) with Filter.load(...)
|
|
376
|
+
Filter = Annotated[
|
|
377
|
+
Union[
|
|
378
|
+
Annotated[_And, Tag(_And._field_discriminator())],
|
|
379
|
+
Annotated[_Or, Tag(_Or._field_discriminator())],
|
|
380
|
+
Annotated[_Not, Tag(_Not._field_discriminator())],
|
|
381
|
+
Annotated[_EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())],
|
|
382
|
+
Annotated[
|
|
383
|
+
_EntitySubtypeFilter, Tag(_EntitySubtypeFilter._field_discriminator())
|
|
384
|
+
],
|
|
385
|
+
Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
|
|
386
|
+
Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
|
|
387
|
+
Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
|
|
388
|
+
Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
|
|
389
|
+
Annotated[_CustomCondition, Tag(_CustomCondition._field_discriminator())],
|
|
390
|
+
],
|
|
391
|
+
Discriminator(_filter_discriminator),
|
|
392
|
+
]
|
|
393
|
+
|
|
394
|
+
# Required to resolve forward references to "Filter"
|
|
395
|
+
_And.model_rebuild() # type: ignore
|
|
396
|
+
_Or.model_rebuild() # type: ignore
|
|
397
|
+
_Not.model_rebuild() # type: ignore
|
|
330
398
|
|
|
331
399
|
|
|
332
400
|
def load_filters(obj: Any) -> Filter:
|
datahub/specific/chart.py
CHANGED
datahub/specific/dataproduct.py
CHANGED
|
@@ -9,6 +9,9 @@ from datahub.metadata.schema_classes import (
|
|
|
9
9
|
)
|
|
10
10
|
from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
|
|
11
11
|
from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
|
|
12
|
+
from datahub.specific.aspect_helpers.structured_properties import (
|
|
13
|
+
HasStructuredPropertiesPatch,
|
|
14
|
+
)
|
|
12
15
|
from datahub.specific.aspect_helpers.tags import HasTagsPatch
|
|
13
16
|
from datahub.specific.aspect_helpers.terms import HasTermsPatch
|
|
14
17
|
|
|
@@ -16,6 +19,7 @@ from datahub.specific.aspect_helpers.terms import HasTermsPatch
|
|
|
16
19
|
class DataProductPatchBuilder(
|
|
17
20
|
HasOwnershipPatch,
|
|
18
21
|
HasCustomPropertiesPatch,
|
|
22
|
+
HasStructuredPropertiesPatch,
|
|
19
23
|
HasTagsPatch,
|
|
20
24
|
HasTermsPatch,
|
|
21
25
|
MetadataPatchProposal,
|