acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2582 -2582
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +203 -201
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +6 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dremio/dremio_source.py +15 -15
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +4 -3
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +11 -8
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/azure_auth_config.py +15 -0
- datahub/ingestion/source/unity/config.py +51 -34
- datahub/ingestion/source/unity/connection.py +7 -1
- datahub/ingestion/source/unity/connection_test.py +1 -1
- datahub/ingestion/source/unity/proxy.py +216 -7
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +29 -3
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/schema.avsc +7 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +6 -1
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +2 -0
- datahub/sdk/search_filters.py +68 -40
- datahub/sdk/tag.py +112 -0
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ Manage the communication with DataBricks Server and provide equivalent dataclass
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import dataclasses
|
|
6
|
+
import json
|
|
6
7
|
import logging
|
|
7
8
|
import os
|
|
8
9
|
from concurrent.futures import ThreadPoolExecutor
|
|
@@ -11,6 +12,7 @@ from typing import Any, Dict, Iterable, List, Optional, Sequence, Union, cast
|
|
|
11
12
|
from unittest.mock import patch
|
|
12
13
|
|
|
13
14
|
import cachetools
|
|
15
|
+
import yaml
|
|
14
16
|
from cachetools import cached
|
|
15
17
|
from databricks.sdk import WorkspaceClient
|
|
16
18
|
from databricks.sdk.service.catalog import (
|
|
@@ -23,7 +25,11 @@ from databricks.sdk.service.catalog import (
|
|
|
23
25
|
SchemaInfo,
|
|
24
26
|
TableInfo,
|
|
25
27
|
)
|
|
28
|
+
from databricks.sdk.service.files import DownloadResponse, FilesAPI
|
|
26
29
|
from databricks.sdk.service.iam import ServicePrincipal as DatabricksServicePrincipal
|
|
30
|
+
from databricks.sdk.service.ml import (
|
|
31
|
+
ExperimentsAPI,
|
|
32
|
+
)
|
|
27
33
|
from databricks.sdk.service.sql import (
|
|
28
34
|
QueryFilter,
|
|
29
35
|
QueryInfo,
|
|
@@ -38,6 +44,7 @@ from typing_extensions import assert_never
|
|
|
38
44
|
from datahub._version import nice_version_name
|
|
39
45
|
from datahub.api.entities.external.unity_catalog_external_entites import UnityCatalogTag
|
|
40
46
|
from datahub.emitter.mce_builder import parse_ts_millis
|
|
47
|
+
from datahub.ingestion.source.unity.azure_auth_config import AzureAuthConfig
|
|
41
48
|
from datahub.ingestion.source.unity.config import (
|
|
42
49
|
LineageDataSource,
|
|
43
50
|
UsageDataSource,
|
|
@@ -54,6 +61,8 @@ from datahub.ingestion.source.unity.proxy_types import (
|
|
|
54
61
|
ExternalTableReference,
|
|
55
62
|
Metastore,
|
|
56
63
|
Model,
|
|
64
|
+
ModelRunDetails,
|
|
65
|
+
ModelSignature,
|
|
57
66
|
ModelVersion,
|
|
58
67
|
Notebook,
|
|
59
68
|
NotebookReference,
|
|
@@ -155,30 +164,44 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
155
164
|
_workspace_url: str
|
|
156
165
|
report: UnityCatalogReport
|
|
157
166
|
warehouse_id: str
|
|
167
|
+
_experiments_api: ExperimentsAPI
|
|
168
|
+
_files_api: FilesAPI
|
|
158
169
|
|
|
159
170
|
def __init__(
|
|
160
171
|
self,
|
|
161
172
|
workspace_url: str,
|
|
162
|
-
personal_access_token: str,
|
|
163
173
|
warehouse_id: Optional[str],
|
|
164
174
|
report: UnityCatalogReport,
|
|
165
175
|
hive_metastore_proxy: Optional[HiveMetastoreProxy] = None,
|
|
166
176
|
lineage_data_source: LineageDataSource = LineageDataSource.AUTO,
|
|
167
177
|
usage_data_source: UsageDataSource = UsageDataSource.AUTO,
|
|
168
178
|
databricks_api_page_size: int = 0,
|
|
179
|
+
personal_access_token: Optional[str] = None,
|
|
180
|
+
azure_auth: Optional[AzureAuthConfig] = None,
|
|
169
181
|
):
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
182
|
+
if azure_auth:
|
|
183
|
+
self._workspace_client = WorkspaceClient(
|
|
184
|
+
host=workspace_url,
|
|
185
|
+
azure_tenant_id=azure_auth.tenant_id,
|
|
186
|
+
azure_client_id=azure_auth.client_id,
|
|
187
|
+
azure_client_secret=azure_auth.client_secret.get_secret_value(),
|
|
188
|
+
product="datahub",
|
|
189
|
+
product_version=nice_version_name(),
|
|
190
|
+
)
|
|
191
|
+
else:
|
|
192
|
+
self._workspace_client = WorkspaceClient(
|
|
193
|
+
host=workspace_url,
|
|
194
|
+
token=personal_access_token,
|
|
195
|
+
product="datahub",
|
|
196
|
+
product_version=nice_version_name(),
|
|
197
|
+
)
|
|
176
198
|
self.warehouse_id = warehouse_id or ""
|
|
177
199
|
self.report = report
|
|
178
200
|
self.hive_metastore_proxy = hive_metastore_proxy
|
|
179
201
|
self.lineage_data_source = lineage_data_source
|
|
180
202
|
self.usage_data_source = usage_data_source
|
|
181
203
|
self.databricks_api_page_size = databricks_api_page_size
|
|
204
|
+
self._workspace_url = workspace_url
|
|
182
205
|
self._sql_connection_params = {
|
|
183
206
|
"server_hostname": self._workspace_client.config.host.replace(
|
|
184
207
|
"https://", ""
|
|
@@ -187,6 +210,179 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
187
210
|
"access_token": self._workspace_client.config.token,
|
|
188
211
|
"user_agent_entry": "datahub",
|
|
189
212
|
}
|
|
213
|
+
# Initialize MLflow APIs
|
|
214
|
+
self._experiments_api = ExperimentsAPI(self._workspace_client.api_client)
|
|
215
|
+
self._files_api = FilesAPI(self._workspace_client.api_client)
|
|
216
|
+
|
|
217
|
+
def get_run_details(self, run_id: str) -> Optional[ModelRunDetails]:
|
|
218
|
+
"""
|
|
219
|
+
Get comprehensive details from an MLflow run.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
run_id: The MLflow run ID
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
ModelRunDetails object with comprehensive run information
|
|
226
|
+
"""
|
|
227
|
+
try:
|
|
228
|
+
run_response = self._experiments_api.get_run(run_id)
|
|
229
|
+
run = run_response.run
|
|
230
|
+
|
|
231
|
+
if (
|
|
232
|
+
not run
|
|
233
|
+
or not run.info
|
|
234
|
+
or not run.info.run_id
|
|
235
|
+
or not run.info.experiment_id
|
|
236
|
+
):
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
# Extract metrics
|
|
240
|
+
metrics: Dict[str, Any] = {}
|
|
241
|
+
if run.data and run.data.metrics:
|
|
242
|
+
for metric in run.data.metrics:
|
|
243
|
+
if metric.key is not None:
|
|
244
|
+
metrics[metric.key] = metric.value
|
|
245
|
+
|
|
246
|
+
# Extract parameters
|
|
247
|
+
parameters: Dict[str, Any] = {}
|
|
248
|
+
if run.data and run.data.params:
|
|
249
|
+
for param in run.data.params:
|
|
250
|
+
if param.key is not None:
|
|
251
|
+
parameters[param.key] = param.value
|
|
252
|
+
|
|
253
|
+
# Extract tags
|
|
254
|
+
tags: Dict[str, str] = {}
|
|
255
|
+
if run.data and run.data.tags:
|
|
256
|
+
for tag in run.data.tags:
|
|
257
|
+
if tag.key is not None and tag.value is not None:
|
|
258
|
+
tags[tag.key] = tag.value
|
|
259
|
+
|
|
260
|
+
return ModelRunDetails(
|
|
261
|
+
run_id=run.info.run_id,
|
|
262
|
+
experiment_id=run.info.experiment_id,
|
|
263
|
+
status=run.info.status.value if run.info.status else None,
|
|
264
|
+
start_time=parse_ts_millis(run.info.start_time),
|
|
265
|
+
end_time=parse_ts_millis(run.info.end_time),
|
|
266
|
+
user_id=run.info.user_id,
|
|
267
|
+
metrics=metrics,
|
|
268
|
+
parameters=parameters,
|
|
269
|
+
tags=tags,
|
|
270
|
+
)
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logger.warning(
|
|
273
|
+
f"Unable to get run details for MLflow experiment, run-id: {run_id}",
|
|
274
|
+
exc_info=True,
|
|
275
|
+
)
|
|
276
|
+
self.report.report_warning(
|
|
277
|
+
title="Unable to get run details for MLflow experiment",
|
|
278
|
+
message="Error while getting run details for MLflow experiment",
|
|
279
|
+
context=f"run-id: {run_id}",
|
|
280
|
+
exc=e,
|
|
281
|
+
)
|
|
282
|
+
return None
|
|
283
|
+
|
|
284
|
+
def _extract_signature_from_files_api(
|
|
285
|
+
self, model_version: ModelVersionInfo
|
|
286
|
+
) -> Optional[ModelSignature]:
|
|
287
|
+
"""
|
|
288
|
+
Extract signature from MLmodel file using Databricks FilesAPI.
|
|
289
|
+
Uses the API endpoint: /api/2.0/fs/files/Models/{catalog}/{schema}/{model}/{version}/MLmodel
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
model_version: Unity Catalog ModelVersionInfo object with catalog_name, schema_name, model_name, version
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
ModelSignature if found, None otherwise
|
|
296
|
+
"""
|
|
297
|
+
try:
|
|
298
|
+
# Construct file path for FilesAPI
|
|
299
|
+
# The correct path format is: /Models/{catalog}/{schema}/{model}/{version}/MLmodel
|
|
300
|
+
file_path = (
|
|
301
|
+
f"/Models/{model_version.catalog_name}/{model_version.schema_name}/"
|
|
302
|
+
f"{model_version.model_name}/{model_version.version}/MLmodel"
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
logger.debug(f"Downloading MLmodel from FilesAPI: {file_path}")
|
|
306
|
+
|
|
307
|
+
# Download the file using FilesAPI
|
|
308
|
+
download_response: DownloadResponse = self._files_api.download(
|
|
309
|
+
file_path=file_path
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# Read the file content
|
|
313
|
+
# DownloadResponse.contents is a BinaryIO object
|
|
314
|
+
if download_response and download_response.contents:
|
|
315
|
+
content_stream = download_response.contents
|
|
316
|
+
|
|
317
|
+
# Read from the binary stream
|
|
318
|
+
if content_stream:
|
|
319
|
+
mlmodel_content: str = content_stream.read().decode("utf-8")
|
|
320
|
+
|
|
321
|
+
logger.debug(
|
|
322
|
+
f"MLmodel file contents from FilesAPI ({file_path}):\n{mlmodel_content}"
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Parse YAML content
|
|
326
|
+
mlmodel_data = yaml.safe_load(mlmodel_content)
|
|
327
|
+
|
|
328
|
+
# Extract signature from MLmodel YAML
|
|
329
|
+
if mlmodel_data and "signature" in mlmodel_data:
|
|
330
|
+
signature_raw = mlmodel_data["signature"]
|
|
331
|
+
|
|
332
|
+
# Signature inputs and outputs are stored as JSON strings in the YAML
|
|
333
|
+
# Parse them into proper dict/list format
|
|
334
|
+
signature_data = {}
|
|
335
|
+
if "inputs" in signature_raw:
|
|
336
|
+
try:
|
|
337
|
+
signature_data["inputs"] = json.loads(
|
|
338
|
+
signature_raw["inputs"]
|
|
339
|
+
)
|
|
340
|
+
except (json.JSONDecodeError, TypeError) as e:
|
|
341
|
+
logger.debug(f"Failed to parse inputs JSON: {e}")
|
|
342
|
+
|
|
343
|
+
if "outputs" in signature_raw:
|
|
344
|
+
try:
|
|
345
|
+
signature_data["outputs"] = json.loads(
|
|
346
|
+
signature_raw["outputs"]
|
|
347
|
+
)
|
|
348
|
+
except (json.JSONDecodeError, TypeError) as e:
|
|
349
|
+
logger.debug(f"Failed to parse outputs JSON: {e}")
|
|
350
|
+
|
|
351
|
+
if "params" in signature_raw:
|
|
352
|
+
try:
|
|
353
|
+
signature_data["params"] = json.loads(
|
|
354
|
+
signature_raw["params"]
|
|
355
|
+
)
|
|
356
|
+
except (json.JSONDecodeError, TypeError) as e:
|
|
357
|
+
logger.debug(f"Failed to parse params JSON: {e}")
|
|
358
|
+
|
|
359
|
+
return ModelSignature(
|
|
360
|
+
inputs=signature_data.get("inputs"),
|
|
361
|
+
outputs=signature_data.get("outputs"),
|
|
362
|
+
parameters=signature_data.get("params"),
|
|
363
|
+
)
|
|
364
|
+
else:
|
|
365
|
+
logger.debug(
|
|
366
|
+
f"No signature found in MLmodel data from {file_path}"
|
|
367
|
+
)
|
|
368
|
+
return None
|
|
369
|
+
|
|
370
|
+
return None
|
|
371
|
+
|
|
372
|
+
except Exception as e:
|
|
373
|
+
model_name = getattr(model_version, "model_name", "unknown")
|
|
374
|
+
version_num = getattr(model_version, "version", "unknown")
|
|
375
|
+
self.report.report_warning(
|
|
376
|
+
title="Unable to extract signature from MLmodel file",
|
|
377
|
+
message="Error while extracting signature from MLmodel file",
|
|
378
|
+
context=f"model-name: {model_name}, model-version: {version_num}",
|
|
379
|
+
exc=e,
|
|
380
|
+
)
|
|
381
|
+
logger.warning(
|
|
382
|
+
f"Unable to extract signature from MLmodel file, model-name: {model_name}, model-version: {version_num}",
|
|
383
|
+
exc_info=True,
|
|
384
|
+
)
|
|
385
|
+
return None
|
|
190
386
|
|
|
191
387
|
def check_basic_connectivity(self) -> bool:
|
|
192
388
|
return bool(
|
|
@@ -1019,6 +1215,17 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
1019
1215
|
for alias in obj.aliases:
|
|
1020
1216
|
if alias.alias_name:
|
|
1021
1217
|
aliases.append(alias.alias_name)
|
|
1218
|
+
|
|
1219
|
+
run_details: Optional[ModelRunDetails] = None
|
|
1220
|
+
# Fetch run details if run_id exists
|
|
1221
|
+
if obj.run_id:
|
|
1222
|
+
run_details = self.get_run_details(obj.run_id)
|
|
1223
|
+
|
|
1224
|
+
# Extract signature separately from Files API
|
|
1225
|
+
signature: Optional[ModelSignature] = self._extract_signature_from_files_api(
|
|
1226
|
+
obj
|
|
1227
|
+
)
|
|
1228
|
+
|
|
1022
1229
|
return ModelVersion(
|
|
1023
1230
|
id=f"{model.id}_{obj.version}",
|
|
1024
1231
|
name=f"{model.name}_{obj.version}",
|
|
@@ -1029,6 +1236,8 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
1029
1236
|
created_at=parse_ts_millis(obj.created_at),
|
|
1030
1237
|
updated_at=parse_ts_millis(obj.updated_at),
|
|
1031
1238
|
created_by=obj.created_by,
|
|
1239
|
+
run_details=run_details,
|
|
1240
|
+
signature=signature,
|
|
1032
1241
|
)
|
|
1033
1242
|
|
|
1034
1243
|
def _create_service_principal(
|
|
@@ -339,8 +339,75 @@ class Notebook:
|
|
|
339
339
|
)
|
|
340
340
|
|
|
341
341
|
|
|
342
|
+
@dataclass
|
|
343
|
+
class ModelSignature:
|
|
344
|
+
"""
|
|
345
|
+
Represents the model signature with input and output schemas extracted from MLflow.
|
|
346
|
+
|
|
347
|
+
In Unity Catalog, model signatures define the expected input/output formats for ML models.
|
|
348
|
+
Model signature is stored in the MLmodel YAML file.
|
|
349
|
+
|
|
350
|
+
Attributes:
|
|
351
|
+
inputs: List of input schema specifications, each containing name, type, dtype, shape
|
|
352
|
+
outputs: List of output schema specifications, each containing name, type, dtype, shape
|
|
353
|
+
parameters: List of model parameters
|
|
354
|
+
"""
|
|
355
|
+
|
|
356
|
+
inputs: Optional[List[Dict[str, str]]]
|
|
357
|
+
outputs: Optional[List[Dict[str, str]]]
|
|
358
|
+
parameters: Optional[List[Dict[str, str]]]
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
@dataclass
|
|
362
|
+
class ModelRunDetails:
|
|
363
|
+
"""
|
|
364
|
+
Represents comprehensive details from an MLflow run associated with a Unity Catalog model version.
|
|
365
|
+
|
|
366
|
+
In Unity Catalog, each model version is linked to an MLflow run via run_id. This dataclass
|
|
367
|
+
contains all the metadata extracted from that MLflow run, including metrics, parameters,
|
|
368
|
+
and tags.
|
|
369
|
+
|
|
370
|
+
Attributes:
|
|
371
|
+
run_id: MLflow run ID
|
|
372
|
+
experiment_id: MLflow experiment ID
|
|
373
|
+
status: Run status (e.g., "FINISHED", "RUNNING")
|
|
374
|
+
start_time: Run start timestamp (milliseconds since epoch)
|
|
375
|
+
end_time: Run end timestamp (milliseconds since epoch)
|
|
376
|
+
user_id: User who initiated the run
|
|
377
|
+
metrics: Training metrics (e.g., accuracy, loss)
|
|
378
|
+
parameters: Hyperparameters used for training
|
|
379
|
+
tags: Run tags/metadata
|
|
380
|
+
"""
|
|
381
|
+
|
|
382
|
+
run_id: str
|
|
383
|
+
experiment_id: str
|
|
384
|
+
status: Optional[str]
|
|
385
|
+
start_time: Optional[datetime]
|
|
386
|
+
end_time: Optional[datetime]
|
|
387
|
+
user_id: Optional[str]
|
|
388
|
+
metrics: Optional[Dict[str, str]]
|
|
389
|
+
parameters: Optional[Dict[str, str]]
|
|
390
|
+
tags: Optional[Dict[str, str]]
|
|
391
|
+
|
|
392
|
+
|
|
342
393
|
@dataclass
|
|
343
394
|
class Model:
|
|
395
|
+
"""
|
|
396
|
+
Represents a Unity Catalog registered ML model (model group).
|
|
397
|
+
|
|
398
|
+
In Unity Catalog, a registered model is a collection of model versions.
|
|
399
|
+
This dataclass corresponds to a Unity Catalog RegisteredModelInfo.
|
|
400
|
+
|
|
401
|
+
Attributes:
|
|
402
|
+
id: Full qualified name (e.g., "catalog.schema.model_name")
|
|
403
|
+
name: Model name without catalog/schema prefix
|
|
404
|
+
schema_name: Schema name containing the model
|
|
405
|
+
catalog_name: Catalog name containing the model
|
|
406
|
+
description: Model description/comment
|
|
407
|
+
created_at: Model creation timestamp
|
|
408
|
+
updated_at: Last update timestamp
|
|
409
|
+
"""
|
|
410
|
+
|
|
344
411
|
id: str
|
|
345
412
|
name: str
|
|
346
413
|
schema_name: str
|
|
@@ -352,6 +419,28 @@ class Model:
|
|
|
352
419
|
|
|
353
420
|
@dataclass
|
|
354
421
|
class ModelVersion:
|
|
422
|
+
"""
|
|
423
|
+
Represents a specific version of a Unity Catalog registered ML model.
|
|
424
|
+
|
|
425
|
+
In Unity Catalog, each model version is linked to an MLflow run (via run_id).
|
|
426
|
+
This dataclass corresponds to a Unity Catalog ModelVersionInfo.
|
|
427
|
+
|
|
428
|
+
Attributes:
|
|
429
|
+
id: Unique identifier combining model ID and version (e.g., "catalog.schema.model_1")
|
|
430
|
+
name: Versioned model name
|
|
431
|
+
model: Reference to the parent Model (model group)
|
|
432
|
+
version: Version number as string
|
|
433
|
+
aliases: List of aliases (e.g., ["prod", "latest"])
|
|
434
|
+
description: Version description/comment
|
|
435
|
+
created_at: Version creation timestamp
|
|
436
|
+
updated_at: Last update timestamp
|
|
437
|
+
created_by: User who created this version
|
|
438
|
+
run_details: Comprehensive MLflow run details (metrics, parameters, tags)
|
|
439
|
+
extracted from the MLflow run linked to this model version.
|
|
440
|
+
signature: Model signature extracted from the MLmodel file via Files API.
|
|
441
|
+
Contains input/output schema specifications and parameters.
|
|
442
|
+
"""
|
|
443
|
+
|
|
355
444
|
id: str
|
|
356
445
|
name: str
|
|
357
446
|
model: Model
|
|
@@ -361,3 +450,5 @@ class ModelVersion:
|
|
|
361
450
|
created_at: Optional[datetime]
|
|
362
451
|
updated_at: Optional[datetime]
|
|
363
452
|
created_by: Optional[str]
|
|
453
|
+
run_details: Optional["ModelRunDetails"]
|
|
454
|
+
signature: Optional["ModelSignature"]
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import json
|
|
1
3
|
import logging
|
|
2
4
|
import re
|
|
3
5
|
import time
|
|
4
|
-
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
|
|
6
|
+
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union, cast
|
|
5
7
|
from urllib.parse import urljoin
|
|
6
8
|
|
|
7
9
|
from datahub.api.entities.external.unity_catalog_external_entites import UnityCatalogTag
|
|
@@ -209,13 +211,14 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
209
211
|
|
|
210
212
|
self.unity_catalog_api_proxy = UnityCatalogApiProxy(
|
|
211
213
|
config.workspace_url,
|
|
212
|
-
config.token,
|
|
213
214
|
config.warehouse_id,
|
|
214
215
|
report=self.report,
|
|
215
216
|
hive_metastore_proxy=self.hive_metastore_proxy,
|
|
216
217
|
lineage_data_source=config.lineage_data_source,
|
|
217
218
|
usage_data_source=config.usage_data_source,
|
|
218
219
|
databricks_api_page_size=config.databricks_api_page_size,
|
|
220
|
+
personal_access_token=config.token if config.token else None,
|
|
221
|
+
azure_auth=config.azure_auth if config.azure_auth else None,
|
|
219
222
|
)
|
|
220
223
|
|
|
221
224
|
self.external_url_base = urljoin(self.config.workspace_url, "/explore/data")
|
|
@@ -317,7 +320,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
317
320
|
|
|
318
321
|
@classmethod
|
|
319
322
|
def create(cls, config_dict, ctx):
|
|
320
|
-
config = UnityCatalogSourceConfig.
|
|
323
|
+
config = UnityCatalogSourceConfig.model_validate(config_dict)
|
|
321
324
|
return cls(ctx=ctx, config=config)
|
|
322
325
|
|
|
323
326
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -741,6 +744,17 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
741
744
|
created=TimeStampClass(time=created_time, actor=created_actor),
|
|
742
745
|
)
|
|
743
746
|
)
|
|
747
|
+
custom_properties = {}
|
|
748
|
+
if ml_model_version.signature:
|
|
749
|
+
for key, value in dataclasses.asdict(ml_model_version.signature).items():
|
|
750
|
+
if value:
|
|
751
|
+
custom_properties[f"signature.{key}"] = json.dumps(value)
|
|
752
|
+
|
|
753
|
+
if ml_model_version.run_details:
|
|
754
|
+
if ml_model_version.run_details.tags:
|
|
755
|
+
for key, value in ml_model_version.run_details.tags.items():
|
|
756
|
+
if value:
|
|
757
|
+
custom_properties[key] = json.dumps(value)
|
|
744
758
|
|
|
745
759
|
ml_model = MLModel(
|
|
746
760
|
id=ml_model_version.id,
|
|
@@ -751,6 +765,18 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
751
765
|
model_group=ml_model_urn,
|
|
752
766
|
platform=self.platform,
|
|
753
767
|
last_modified=ml_model_version.updated_at,
|
|
768
|
+
training_metrics=cast(
|
|
769
|
+
Optional[Dict[str, Optional[str]]], ml_model_version.run_details.metrics
|
|
770
|
+
)
|
|
771
|
+
if ml_model_version.run_details and ml_model_version.run_details.metrics
|
|
772
|
+
else None,
|
|
773
|
+
hyper_params=cast(
|
|
774
|
+
Optional[Dict[str, Optional[str]]],
|
|
775
|
+
ml_model_version.run_details.parameters,
|
|
776
|
+
)
|
|
777
|
+
if ml_model_version.run_details and ml_model_version.run_details.parameters
|
|
778
|
+
else None,
|
|
779
|
+
custom_properties=custom_properties if custom_properties else None,
|
|
754
780
|
extra_aspects=extra_aspects,
|
|
755
781
|
)
|
|
756
782
|
|
|
@@ -115,7 +115,7 @@ class ClickHouseUsageSource(Source):
|
|
|
115
115
|
|
|
116
116
|
@classmethod
|
|
117
117
|
def create(cls, config_dict, ctx):
|
|
118
|
-
config = ClickHouseUsageConfig.
|
|
118
|
+
config = ClickHouseUsageConfig.model_validate(config_dict)
|
|
119
119
|
return cls(ctx, config)
|
|
120
120
|
|
|
121
121
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
@@ -133,7 +133,7 @@ class TrinoUsageSource(Source):
|
|
|
133
133
|
|
|
134
134
|
@classmethod
|
|
135
135
|
def create(cls, config_dict, ctx):
|
|
136
|
-
config = TrinoUsageConfig.
|
|
136
|
+
config = TrinoUsageConfig.model_validate(config_dict)
|
|
137
137
|
return cls(ctx, config)
|
|
138
138
|
|
|
139
139
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
@@ -15,6 +15,7 @@ from typing import (
|
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
import pydantic
|
|
18
|
+
from pydantic import ValidationInfo, field_validator
|
|
18
19
|
from pydantic.fields import Field
|
|
19
20
|
|
|
20
21
|
import datahub.emitter.mce_builder as builder
|
|
@@ -226,10 +227,11 @@ class BaseUsageConfig(BaseTimeWindowConfig):
|
|
|
226
227
|
default=True, description="Whether to ingest the top_n_queries."
|
|
227
228
|
)
|
|
228
229
|
|
|
229
|
-
@
|
|
230
|
-
|
|
230
|
+
@field_validator("top_n_queries", mode="after")
|
|
231
|
+
@classmethod
|
|
232
|
+
def ensure_top_n_queries_is_not_too_big(cls, v: int, info: ValidationInfo) -> int:
|
|
231
233
|
minimum_query_size = 20
|
|
232
|
-
|
|
234
|
+
values = info.data
|
|
233
235
|
max_queries = int(values["queries_character_limit"] / minimum_query_size)
|
|
234
236
|
if v > max_queries:
|
|
235
237
|
raise ValueError(
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
from typing import Any, Dict
|
|
2
|
-
|
|
3
1
|
import pydantic
|
|
2
|
+
from pydantic import field_validator
|
|
4
3
|
|
|
5
4
|
from datahub.configuration.common import ConfigModel
|
|
6
5
|
|
|
@@ -21,7 +20,8 @@ class CSVEnricherConfig(ConfigModel):
|
|
|
21
20
|
description="Delimiter to use when parsing array fields (tags, terms and owners)",
|
|
22
21
|
)
|
|
23
22
|
|
|
24
|
-
@
|
|
23
|
+
@field_validator("write_semantics", mode="after")
|
|
24
|
+
@classmethod
|
|
25
25
|
def validate_write_semantics(cls, write_semantics: str) -> str:
|
|
26
26
|
if write_semantics.lower() not in {"patch", "override"}:
|
|
27
27
|
raise ValueError(
|
|
@@ -31,9 +31,10 @@ class CSVEnricherConfig(ConfigModel):
|
|
|
31
31
|
)
|
|
32
32
|
return write_semantics
|
|
33
33
|
|
|
34
|
-
@
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
@field_validator("array_delimiter", mode="after")
|
|
35
|
+
@classmethod
|
|
36
|
+
def validator_diff(cls, array_delimiter: str, info: pydantic.ValidationInfo) -> str:
|
|
37
|
+
if array_delimiter == info.data["delimiter"]:
|
|
37
38
|
raise ValueError(
|
|
38
39
|
"array_delimiter and delimiter are the same. Please choose different delimiters."
|
|
39
40
|
)
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
5
|
import cachetools
|
|
6
|
-
import
|
|
6
|
+
from pydantic import field_validator, model_validator
|
|
7
7
|
from pydantic.fields import Field
|
|
8
8
|
|
|
9
9
|
from datahub.configuration.common import ConfigModel
|
|
@@ -26,7 +26,8 @@ class OperationConfig(ConfigModel):
|
|
|
26
26
|
description="Number between 1 to 31 for date of month (both inclusive). If not specified, defaults to Nothing and this field does not take affect.",
|
|
27
27
|
)
|
|
28
28
|
|
|
29
|
-
@
|
|
29
|
+
@model_validator(mode="before")
|
|
30
|
+
@classmethod
|
|
30
31
|
def lower_freq_configs_are_set(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
31
32
|
lower_freq_profile_enabled = values.get("lower_freq_profile_enabled")
|
|
32
33
|
profile_day_of_week = values.get("profile_day_of_week")
|
|
@@ -41,7 +42,8 @@ class OperationConfig(ConfigModel):
|
|
|
41
42
|
)
|
|
42
43
|
return values
|
|
43
44
|
|
|
44
|
-
@
|
|
45
|
+
@field_validator("profile_day_of_week", mode="after")
|
|
46
|
+
@classmethod
|
|
45
47
|
def validate_profile_day_of_week(cls, v: Optional[int]) -> Optional[int]:
|
|
46
48
|
profile_day_of_week = v
|
|
47
49
|
if profile_day_of_week is None:
|
|
@@ -52,7 +54,8 @@ class OperationConfig(ConfigModel):
|
|
|
52
54
|
)
|
|
53
55
|
return profile_day_of_week
|
|
54
56
|
|
|
55
|
-
@
|
|
57
|
+
@field_validator("profile_date_of_month", mode="after")
|
|
58
|
+
@classmethod
|
|
56
59
|
def validate_profile_date_of_month(cls, v: Optional[int]) -> Optional[int]:
|
|
57
60
|
profile_date_of_month = v
|
|
58
61
|
if profile_date_of_month is None:
|
|
@@ -3,7 +3,7 @@ from typing import Dict, List, Optional, Union
|
|
|
3
3
|
from urllib.parse import urlparse
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
|
-
from pydantic import Field,
|
|
6
|
+
from pydantic import Field, model_validator
|
|
7
7
|
|
|
8
8
|
from datahub.configuration.common import AllowDenyPattern
|
|
9
9
|
from datahub.configuration.source_common import (
|
|
@@ -100,27 +100,23 @@ class PulsarSourceConfig(
|
|
|
100
100
|
default_factory=dict, description="Placeholder for OpenId discovery document"
|
|
101
101
|
)
|
|
102
102
|
|
|
103
|
-
@
|
|
104
|
-
def ensure_only_issuer_or_token(
|
|
105
|
-
|
|
106
|
-
) -> Optional[str]:
|
|
107
|
-
if token is not None and values.get("issuer_url") is not None:
|
|
103
|
+
@model_validator(mode="after")
|
|
104
|
+
def ensure_only_issuer_or_token(self) -> "PulsarSourceConfig":
|
|
105
|
+
if self.token is not None and self.issuer_url is not None:
|
|
108
106
|
raise ValueError(
|
|
109
107
|
"Expected only one authentication method, either issuer_url or token."
|
|
110
108
|
)
|
|
111
|
-
return
|
|
112
|
-
|
|
113
|
-
@
|
|
114
|
-
def ensure_client_id_and_secret_for_issuer_url(
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
if values.get("issuer_url") is not None and (
|
|
118
|
-
client_secret is None or values.get("client_id") is None
|
|
109
|
+
return self
|
|
110
|
+
|
|
111
|
+
@model_validator(mode="after")
|
|
112
|
+
def ensure_client_id_and_secret_for_issuer_url(self) -> "PulsarSourceConfig":
|
|
113
|
+
if self.issuer_url is not None and (
|
|
114
|
+
self.client_secret is None or self.client_id is None
|
|
119
115
|
):
|
|
120
116
|
raise ValueError(
|
|
121
117
|
"Missing configuration: client_id and client_secret are mandatory when issuer_url is set."
|
|
122
118
|
)
|
|
123
|
-
return
|
|
119
|
+
return self
|
|
124
120
|
|
|
125
121
|
@pydantic.field_validator("web_service_url", mode="after")
|
|
126
122
|
@classmethod
|
|
@@ -32,7 +32,7 @@ class AddDatasetBrowsePathTransformer(DatasetBrowsePathsTransformer):
|
|
|
32
32
|
def create(
|
|
33
33
|
cls, config_dict: dict, ctx: PipelineContext
|
|
34
34
|
) -> "AddDatasetBrowsePathTransformer":
|
|
35
|
-
config = AddDatasetBrowsePathConfig.
|
|
35
|
+
config = AddDatasetBrowsePathConfig.model_validate(config_dict)
|
|
36
36
|
return cls(config, ctx)
|
|
37
37
|
|
|
38
38
|
@staticmethod
|