acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/METADATA +2501 -2501
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/RECORD +193 -193
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +6 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +4 -3
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +11 -8
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/config.py +33 -34
- datahub/ingestion/source/unity/proxy.py +203 -0
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +27 -2
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/sdk/search_filters.py +68 -40
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ Manage the communication with DataBricks Server and provide equivalent dataclass
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import dataclasses
|
|
6
|
+
import json
|
|
6
7
|
import logging
|
|
7
8
|
import os
|
|
8
9
|
from concurrent.futures import ThreadPoolExecutor
|
|
@@ -11,6 +12,7 @@ from typing import Any, Dict, Iterable, List, Optional, Sequence, Union, cast
|
|
|
11
12
|
from unittest.mock import patch
|
|
12
13
|
|
|
13
14
|
import cachetools
|
|
15
|
+
import yaml
|
|
14
16
|
from cachetools import cached
|
|
15
17
|
from databricks.sdk import WorkspaceClient
|
|
16
18
|
from databricks.sdk.service.catalog import (
|
|
@@ -23,7 +25,11 @@ from databricks.sdk.service.catalog import (
|
|
|
23
25
|
SchemaInfo,
|
|
24
26
|
TableInfo,
|
|
25
27
|
)
|
|
28
|
+
from databricks.sdk.service.files import DownloadResponse, FilesAPI
|
|
26
29
|
from databricks.sdk.service.iam import ServicePrincipal as DatabricksServicePrincipal
|
|
30
|
+
from databricks.sdk.service.ml import (
|
|
31
|
+
ExperimentsAPI,
|
|
32
|
+
)
|
|
27
33
|
from databricks.sdk.service.sql import (
|
|
28
34
|
QueryFilter,
|
|
29
35
|
QueryInfo,
|
|
@@ -54,6 +60,8 @@ from datahub.ingestion.source.unity.proxy_types import (
|
|
|
54
60
|
ExternalTableReference,
|
|
55
61
|
Metastore,
|
|
56
62
|
Model,
|
|
63
|
+
ModelRunDetails,
|
|
64
|
+
ModelSignature,
|
|
57
65
|
ModelVersion,
|
|
58
66
|
Notebook,
|
|
59
67
|
NotebookReference,
|
|
@@ -155,6 +163,8 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
155
163
|
_workspace_url: str
|
|
156
164
|
report: UnityCatalogReport
|
|
157
165
|
warehouse_id: str
|
|
166
|
+
_experiments_api: ExperimentsAPI
|
|
167
|
+
_files_api: FilesAPI
|
|
158
168
|
|
|
159
169
|
def __init__(
|
|
160
170
|
self,
|
|
@@ -179,6 +189,7 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
179
189
|
self.lineage_data_source = lineage_data_source
|
|
180
190
|
self.usage_data_source = usage_data_source
|
|
181
191
|
self.databricks_api_page_size = databricks_api_page_size
|
|
192
|
+
self._workspace_url = workspace_url
|
|
182
193
|
self._sql_connection_params = {
|
|
183
194
|
"server_hostname": self._workspace_client.config.host.replace(
|
|
184
195
|
"https://", ""
|
|
@@ -187,6 +198,185 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
187
198
|
"access_token": self._workspace_client.config.token,
|
|
188
199
|
"user_agent_entry": "datahub",
|
|
189
200
|
}
|
|
201
|
+
# Initialize MLflow APIs
|
|
202
|
+
self._experiments_api = ExperimentsAPI(self._workspace_client.api_client)
|
|
203
|
+
self._files_api = FilesAPI(self._workspace_client.api_client)
|
|
204
|
+
|
|
205
|
+
def get_run_details(self, run_id: str) -> Optional[ModelRunDetails]:
|
|
206
|
+
"""
|
|
207
|
+
Get comprehensive details from an MLflow run.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
run_id: The MLflow run ID
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
ModelRunDetails object with comprehensive run information
|
|
214
|
+
"""
|
|
215
|
+
try:
|
|
216
|
+
run_response = self._experiments_api.get_run(run_id)
|
|
217
|
+
run = run_response.run
|
|
218
|
+
|
|
219
|
+
if (
|
|
220
|
+
not run
|
|
221
|
+
or not run.info
|
|
222
|
+
or not run.info.run_id
|
|
223
|
+
or not run.info.experiment_id
|
|
224
|
+
):
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
# Extract metrics
|
|
228
|
+
metrics: Dict[str, Any] = {}
|
|
229
|
+
if run.data and run.data.metrics:
|
|
230
|
+
for metric in run.data.metrics:
|
|
231
|
+
if metric.key is not None:
|
|
232
|
+
metrics[metric.key] = metric.value
|
|
233
|
+
|
|
234
|
+
# Extract parameters
|
|
235
|
+
parameters: Dict[str, Any] = {}
|
|
236
|
+
if run.data and run.data.params:
|
|
237
|
+
for param in run.data.params:
|
|
238
|
+
if param.key is not None:
|
|
239
|
+
parameters[param.key] = param.value
|
|
240
|
+
|
|
241
|
+
# Extract tags
|
|
242
|
+
tags: Dict[str, str] = {}
|
|
243
|
+
if run.data and run.data.tags:
|
|
244
|
+
for tag in run.data.tags:
|
|
245
|
+
if tag.key is not None and tag.value is not None:
|
|
246
|
+
tags[tag.key] = tag.value
|
|
247
|
+
|
|
248
|
+
return ModelRunDetails(
|
|
249
|
+
run_id=run.info.run_id,
|
|
250
|
+
experiment_id=run.info.experiment_id,
|
|
251
|
+
status=run.info.status.value if run.info.status else None,
|
|
252
|
+
start_time=parse_ts_millis(run.info.start_time),
|
|
253
|
+
end_time=parse_ts_millis(run.info.end_time),
|
|
254
|
+
user_id=run.info.user_id,
|
|
255
|
+
metrics=metrics,
|
|
256
|
+
parameters=parameters,
|
|
257
|
+
tags=tags,
|
|
258
|
+
)
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.warning(
|
|
261
|
+
f"Unable to get run details for MLflow experiment, run-id: {run_id}",
|
|
262
|
+
exc_info=True,
|
|
263
|
+
)
|
|
264
|
+
self.report.report_warning(
|
|
265
|
+
title="Unable to get run details for MLflow experiment",
|
|
266
|
+
message="Error while getting run details for MLflow experiment",
|
|
267
|
+
context=f"run-id: {run_id}",
|
|
268
|
+
exc=e,
|
|
269
|
+
)
|
|
270
|
+
return None
|
|
271
|
+
|
|
272
|
+
def _extract_signature_from_files_api(
|
|
273
|
+
self, model_version: ModelVersionInfo
|
|
274
|
+
) -> Optional[ModelSignature]:
|
|
275
|
+
"""
|
|
276
|
+
Extract signature from MLmodel file using Databricks FilesAPI.
|
|
277
|
+
Uses the API endpoint: /api/2.0/fs/files/Models/{catalog}/{schema}/{model}/{version}/MLmodel
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
model_version: Unity Catalog ModelVersionInfo object with catalog_name, schema_name, model_name, version
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
ModelSignature if found, None otherwise
|
|
284
|
+
"""
|
|
285
|
+
try:
|
|
286
|
+
# Construct file path for FilesAPI
|
|
287
|
+
# The correct path format is: /Models/{catalog}/{schema}/{model}/{version}/MLmodel
|
|
288
|
+
file_path = (
|
|
289
|
+
f"/Models/{model_version.catalog_name}/{model_version.schema_name}/"
|
|
290
|
+
f"{model_version.model_name}/{model_version.version}/MLmodel"
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
logger.debug(f"Downloading MLmodel from FilesAPI: {file_path}")
|
|
294
|
+
|
|
295
|
+
# Download the file using FilesAPI
|
|
296
|
+
download_response: DownloadResponse = self._files_api.download(
|
|
297
|
+
file_path=file_path
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Read the file content
|
|
301
|
+
# DownloadResponse.contents is a BinaryIO object
|
|
302
|
+
if download_response and download_response.contents:
|
|
303
|
+
content_stream = download_response.contents
|
|
304
|
+
|
|
305
|
+
# Read from the binary stream
|
|
306
|
+
if content_stream:
|
|
307
|
+
mlmodel_content: str = content_stream.read().decode("utf-8")
|
|
308
|
+
|
|
309
|
+
logger.debug(
|
|
310
|
+
f"MLmodel file contents from FilesAPI ({file_path}):\n{mlmodel_content}"
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Parse YAML content
|
|
314
|
+
mlmodel_data = yaml.safe_load(mlmodel_content)
|
|
315
|
+
|
|
316
|
+
# Extract signature from MLmodel YAML
|
|
317
|
+
if mlmodel_data and "signature" in mlmodel_data:
|
|
318
|
+
signature_raw = mlmodel_data["signature"]
|
|
319
|
+
|
|
320
|
+
# Signature inputs and outputs are stored as JSON strings in the YAML
|
|
321
|
+
# Parse them into proper dict/list format
|
|
322
|
+
signature_data = {}
|
|
323
|
+
if "inputs" in signature_raw:
|
|
324
|
+
try:
|
|
325
|
+
signature_data["inputs"] = json.loads(
|
|
326
|
+
signature_raw["inputs"]
|
|
327
|
+
)
|
|
328
|
+
except (json.JSONDecodeError, TypeError) as e:
|
|
329
|
+
logger.debug(f"Failed to parse inputs JSON: {e}")
|
|
330
|
+
|
|
331
|
+
if "outputs" in signature_raw:
|
|
332
|
+
try:
|
|
333
|
+
signature_data["outputs"] = json.loads(
|
|
334
|
+
signature_raw["outputs"]
|
|
335
|
+
)
|
|
336
|
+
except (json.JSONDecodeError, TypeError) as e:
|
|
337
|
+
logger.debug(f"Failed to parse outputs JSON: {e}")
|
|
338
|
+
|
|
339
|
+
if "parameters" in signature_raw:
|
|
340
|
+
try:
|
|
341
|
+
signature_data["parameters"] = json.loads(
|
|
342
|
+
signature_raw["parameters"]
|
|
343
|
+
)
|
|
344
|
+
except (json.JSONDecodeError, TypeError) as e:
|
|
345
|
+
logger.debug(f"Failed to parse parameters JSON: {e}")
|
|
346
|
+
|
|
347
|
+
return ModelSignature(
|
|
348
|
+
inputs=signature_data["inputs"]
|
|
349
|
+
if "inputs" in signature_raw
|
|
350
|
+
else None,
|
|
351
|
+
outputs=signature_data["outputs"]
|
|
352
|
+
if "outputs" in signature_raw
|
|
353
|
+
else None,
|
|
354
|
+
parameters=signature_data["parameters"]
|
|
355
|
+
if "parameters" in signature_raw
|
|
356
|
+
else None,
|
|
357
|
+
)
|
|
358
|
+
else:
|
|
359
|
+
logger.debug(
|
|
360
|
+
f"No signature found in MLmodel data from {file_path}"
|
|
361
|
+
)
|
|
362
|
+
return None
|
|
363
|
+
|
|
364
|
+
return None
|
|
365
|
+
|
|
366
|
+
except Exception as e:
|
|
367
|
+
model_name = getattr(model_version, "model_name", "unknown")
|
|
368
|
+
version_num = getattr(model_version, "version", "unknown")
|
|
369
|
+
self.report.report_warning(
|
|
370
|
+
title="Unable to extract signature from MLmodel file",
|
|
371
|
+
message="Error while extracting signature from MLmodel file",
|
|
372
|
+
context=f"model-name: {model_name}, model-version: {version_num}",
|
|
373
|
+
exc=e,
|
|
374
|
+
)
|
|
375
|
+
logger.warning(
|
|
376
|
+
f"Unable to extract signature from MLmodel file, model-name: {model_name}, model-version: {version_num}",
|
|
377
|
+
exc_info=True,
|
|
378
|
+
)
|
|
379
|
+
return None
|
|
190
380
|
|
|
191
381
|
def check_basic_connectivity(self) -> bool:
|
|
192
382
|
return bool(
|
|
@@ -1019,6 +1209,17 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
1019
1209
|
for alias in obj.aliases:
|
|
1020
1210
|
if alias.alias_name:
|
|
1021
1211
|
aliases.append(alias.alias_name)
|
|
1212
|
+
|
|
1213
|
+
run_details: Optional[ModelRunDetails] = None
|
|
1214
|
+
# Fetch run details if run_id exists
|
|
1215
|
+
if obj.run_id:
|
|
1216
|
+
run_details = self.get_run_details(obj.run_id)
|
|
1217
|
+
|
|
1218
|
+
# Extract signature separately from Files API
|
|
1219
|
+
signature: Optional[ModelSignature] = self._extract_signature_from_files_api(
|
|
1220
|
+
obj
|
|
1221
|
+
)
|
|
1222
|
+
|
|
1022
1223
|
return ModelVersion(
|
|
1023
1224
|
id=f"{model.id}_{obj.version}",
|
|
1024
1225
|
name=f"{model.name}_{obj.version}",
|
|
@@ -1029,6 +1230,8 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
|
|
|
1029
1230
|
created_at=parse_ts_millis(obj.created_at),
|
|
1030
1231
|
updated_at=parse_ts_millis(obj.updated_at),
|
|
1031
1232
|
created_by=obj.created_by,
|
|
1233
|
+
run_details=run_details,
|
|
1234
|
+
signature=signature,
|
|
1032
1235
|
)
|
|
1033
1236
|
|
|
1034
1237
|
def _create_service_principal(
|
|
@@ -339,8 +339,75 @@ class Notebook:
|
|
|
339
339
|
)
|
|
340
340
|
|
|
341
341
|
|
|
342
|
+
@dataclass
|
|
343
|
+
class ModelSignature:
|
|
344
|
+
"""
|
|
345
|
+
Represents the model signature with input and output schemas extracted from MLflow.
|
|
346
|
+
|
|
347
|
+
In Unity Catalog, model signatures define the expected input/output formats for ML models.
|
|
348
|
+
Model signature is stored in the MLmodel YAML file.
|
|
349
|
+
|
|
350
|
+
Attributes:
|
|
351
|
+
inputs: List of input schema specifications, each containing name, type, dtype, shape
|
|
352
|
+
outputs: List of output schema specifications, each containing name, type, dtype, shape
|
|
353
|
+
parameters: List of model parameters
|
|
354
|
+
"""
|
|
355
|
+
|
|
356
|
+
inputs: Optional[List[Dict[str, str]]]
|
|
357
|
+
outputs: Optional[List[Dict[str, str]]]
|
|
358
|
+
parameters: Optional[List[Dict[str, str]]]
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
@dataclass
|
|
362
|
+
class ModelRunDetails:
|
|
363
|
+
"""
|
|
364
|
+
Represents comprehensive details from an MLflow run associated with a Unity Catalog model version.
|
|
365
|
+
|
|
366
|
+
In Unity Catalog, each model version is linked to an MLflow run via run_id. This dataclass
|
|
367
|
+
contains all the metadata extracted from that MLflow run, including metrics, parameters,
|
|
368
|
+
and tags.
|
|
369
|
+
|
|
370
|
+
Attributes:
|
|
371
|
+
run_id: MLflow run ID
|
|
372
|
+
experiment_id: MLflow experiment ID
|
|
373
|
+
status: Run status (e.g., "FINISHED", "RUNNING")
|
|
374
|
+
start_time: Run start timestamp (milliseconds since epoch)
|
|
375
|
+
end_time: Run end timestamp (milliseconds since epoch)
|
|
376
|
+
user_id: User who initiated the run
|
|
377
|
+
metrics: Training metrics (e.g., accuracy, loss)
|
|
378
|
+
parameters: Hyperparameters used for training
|
|
379
|
+
tags: Run tags/metadata
|
|
380
|
+
"""
|
|
381
|
+
|
|
382
|
+
run_id: str
|
|
383
|
+
experiment_id: str
|
|
384
|
+
status: Optional[str]
|
|
385
|
+
start_time: Optional[datetime]
|
|
386
|
+
end_time: Optional[datetime]
|
|
387
|
+
user_id: Optional[str]
|
|
388
|
+
metrics: Optional[Dict[str, str]]
|
|
389
|
+
parameters: Optional[Dict[str, str]]
|
|
390
|
+
tags: Optional[Dict[str, str]]
|
|
391
|
+
|
|
392
|
+
|
|
342
393
|
@dataclass
|
|
343
394
|
class Model:
|
|
395
|
+
"""
|
|
396
|
+
Represents a Unity Catalog registered ML model (model group).
|
|
397
|
+
|
|
398
|
+
In Unity Catalog, a registered model is a collection of model versions.
|
|
399
|
+
This dataclass corresponds to a Unity Catalog RegisteredModelInfo.
|
|
400
|
+
|
|
401
|
+
Attributes:
|
|
402
|
+
id: Full qualified name (e.g., "catalog.schema.model_name")
|
|
403
|
+
name: Model name without catalog/schema prefix
|
|
404
|
+
schema_name: Schema name containing the model
|
|
405
|
+
catalog_name: Catalog name containing the model
|
|
406
|
+
description: Model description/comment
|
|
407
|
+
created_at: Model creation timestamp
|
|
408
|
+
updated_at: Last update timestamp
|
|
409
|
+
"""
|
|
410
|
+
|
|
344
411
|
id: str
|
|
345
412
|
name: str
|
|
346
413
|
schema_name: str
|
|
@@ -352,6 +419,28 @@ class Model:
|
|
|
352
419
|
|
|
353
420
|
@dataclass
|
|
354
421
|
class ModelVersion:
|
|
422
|
+
"""
|
|
423
|
+
Represents a specific version of a Unity Catalog registered ML model.
|
|
424
|
+
|
|
425
|
+
In Unity Catalog, each model version is linked to an MLflow run (via run_id).
|
|
426
|
+
This dataclass corresponds to a Unity Catalog ModelVersionInfo.
|
|
427
|
+
|
|
428
|
+
Attributes:
|
|
429
|
+
id: Unique identifier combining model ID and version (e.g., "catalog.schema.model_1")
|
|
430
|
+
name: Versioned model name
|
|
431
|
+
model: Reference to the parent Model (model group)
|
|
432
|
+
version: Version number as string
|
|
433
|
+
aliases: List of aliases (e.g., ["prod", "latest"])
|
|
434
|
+
description: Version description/comment
|
|
435
|
+
created_at: Version creation timestamp
|
|
436
|
+
updated_at: Last update timestamp
|
|
437
|
+
created_by: User who created this version
|
|
438
|
+
run_details: Comprehensive MLflow run details (metrics, parameters, tags)
|
|
439
|
+
extracted from the MLflow run linked to this model version.
|
|
440
|
+
signature: Model signature extracted from the MLmodel file via Files API.
|
|
441
|
+
Contains input/output schema specifications and parameters.
|
|
442
|
+
"""
|
|
443
|
+
|
|
355
444
|
id: str
|
|
356
445
|
name: str
|
|
357
446
|
model: Model
|
|
@@ -361,3 +450,5 @@ class ModelVersion:
|
|
|
361
450
|
created_at: Optional[datetime]
|
|
362
451
|
updated_at: Optional[datetime]
|
|
363
452
|
created_by: Optional[str]
|
|
453
|
+
run_details: Optional["ModelRunDetails"]
|
|
454
|
+
signature: Optional["ModelSignature"]
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import json
|
|
1
3
|
import logging
|
|
2
4
|
import re
|
|
3
5
|
import time
|
|
4
|
-
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
|
|
6
|
+
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union, cast
|
|
5
7
|
from urllib.parse import urljoin
|
|
6
8
|
|
|
7
9
|
from datahub.api.entities.external.unity_catalog_external_entites import UnityCatalogTag
|
|
@@ -317,7 +319,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
317
319
|
|
|
318
320
|
@classmethod
|
|
319
321
|
def create(cls, config_dict, ctx):
|
|
320
|
-
config = UnityCatalogSourceConfig.
|
|
322
|
+
config = UnityCatalogSourceConfig.model_validate(config_dict)
|
|
321
323
|
return cls(ctx=ctx, config=config)
|
|
322
324
|
|
|
323
325
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -741,6 +743,17 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
741
743
|
created=TimeStampClass(time=created_time, actor=created_actor),
|
|
742
744
|
)
|
|
743
745
|
)
|
|
746
|
+
custom_properties = {}
|
|
747
|
+
if ml_model_version.signature:
|
|
748
|
+
for key, value in dataclasses.asdict(ml_model_version.signature).items():
|
|
749
|
+
if value:
|
|
750
|
+
custom_properties[f"signature.{key}"] = json.dumps(value)
|
|
751
|
+
|
|
752
|
+
if ml_model_version.run_details:
|
|
753
|
+
if ml_model_version.run_details.tags:
|
|
754
|
+
for key, value in ml_model_version.run_details.tags.items():
|
|
755
|
+
if value:
|
|
756
|
+
custom_properties[key] = json.dumps(value)
|
|
744
757
|
|
|
745
758
|
ml_model = MLModel(
|
|
746
759
|
id=ml_model_version.id,
|
|
@@ -751,6 +764,18 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
751
764
|
model_group=ml_model_urn,
|
|
752
765
|
platform=self.platform,
|
|
753
766
|
last_modified=ml_model_version.updated_at,
|
|
767
|
+
training_metrics=cast(
|
|
768
|
+
Optional[Dict[str, Optional[str]]], ml_model_version.run_details.metrics
|
|
769
|
+
)
|
|
770
|
+
if ml_model_version.run_details and ml_model_version.run_details.metrics
|
|
771
|
+
else None,
|
|
772
|
+
hyper_params=cast(
|
|
773
|
+
Optional[Dict[str, Optional[str]]],
|
|
774
|
+
ml_model_version.run_details.parameters,
|
|
775
|
+
)
|
|
776
|
+
if ml_model_version.run_details and ml_model_version.run_details.parameters
|
|
777
|
+
else None,
|
|
778
|
+
custom_properties=custom_properties if custom_properties else None,
|
|
754
779
|
extra_aspects=extra_aspects,
|
|
755
780
|
)
|
|
756
781
|
|
|
@@ -115,7 +115,7 @@ class ClickHouseUsageSource(Source):
|
|
|
115
115
|
|
|
116
116
|
@classmethod
|
|
117
117
|
def create(cls, config_dict, ctx):
|
|
118
|
-
config = ClickHouseUsageConfig.
|
|
118
|
+
config = ClickHouseUsageConfig.model_validate(config_dict)
|
|
119
119
|
return cls(ctx, config)
|
|
120
120
|
|
|
121
121
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
@@ -133,7 +133,7 @@ class TrinoUsageSource(Source):
|
|
|
133
133
|
|
|
134
134
|
@classmethod
|
|
135
135
|
def create(cls, config_dict, ctx):
|
|
136
|
-
config = TrinoUsageConfig.
|
|
136
|
+
config = TrinoUsageConfig.model_validate(config_dict)
|
|
137
137
|
return cls(ctx, config)
|
|
138
138
|
|
|
139
139
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
@@ -15,6 +15,7 @@ from typing import (
|
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
import pydantic
|
|
18
|
+
from pydantic import ValidationInfo, field_validator
|
|
18
19
|
from pydantic.fields import Field
|
|
19
20
|
|
|
20
21
|
import datahub.emitter.mce_builder as builder
|
|
@@ -226,10 +227,11 @@ class BaseUsageConfig(BaseTimeWindowConfig):
|
|
|
226
227
|
default=True, description="Whether to ingest the top_n_queries."
|
|
227
228
|
)
|
|
228
229
|
|
|
229
|
-
@
|
|
230
|
-
|
|
230
|
+
@field_validator("top_n_queries", mode="after")
|
|
231
|
+
@classmethod
|
|
232
|
+
def ensure_top_n_queries_is_not_too_big(cls, v: int, info: ValidationInfo) -> int:
|
|
231
233
|
minimum_query_size = 20
|
|
232
|
-
|
|
234
|
+
values = info.data
|
|
233
235
|
max_queries = int(values["queries_character_limit"] / minimum_query_size)
|
|
234
236
|
if v > max_queries:
|
|
235
237
|
raise ValueError(
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
from typing import Any, Dict
|
|
2
|
-
|
|
3
1
|
import pydantic
|
|
2
|
+
from pydantic import field_validator
|
|
4
3
|
|
|
5
4
|
from datahub.configuration.common import ConfigModel
|
|
6
5
|
|
|
@@ -21,7 +20,8 @@ class CSVEnricherConfig(ConfigModel):
|
|
|
21
20
|
description="Delimiter to use when parsing array fields (tags, terms and owners)",
|
|
22
21
|
)
|
|
23
22
|
|
|
24
|
-
@
|
|
23
|
+
@field_validator("write_semantics", mode="after")
|
|
24
|
+
@classmethod
|
|
25
25
|
def validate_write_semantics(cls, write_semantics: str) -> str:
|
|
26
26
|
if write_semantics.lower() not in {"patch", "override"}:
|
|
27
27
|
raise ValueError(
|
|
@@ -31,9 +31,10 @@ class CSVEnricherConfig(ConfigModel):
|
|
|
31
31
|
)
|
|
32
32
|
return write_semantics
|
|
33
33
|
|
|
34
|
-
@
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
@field_validator("array_delimiter", mode="after")
|
|
35
|
+
@classmethod
|
|
36
|
+
def validator_diff(cls, array_delimiter: str, info: pydantic.ValidationInfo) -> str:
|
|
37
|
+
if array_delimiter == info.data["delimiter"]:
|
|
37
38
|
raise ValueError(
|
|
38
39
|
"array_delimiter and delimiter are the same. Please choose different delimiters."
|
|
39
40
|
)
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
5
|
import cachetools
|
|
6
|
-
import
|
|
6
|
+
from pydantic import field_validator, model_validator
|
|
7
7
|
from pydantic.fields import Field
|
|
8
8
|
|
|
9
9
|
from datahub.configuration.common import ConfigModel
|
|
@@ -26,7 +26,8 @@ class OperationConfig(ConfigModel):
|
|
|
26
26
|
description="Number between 1 to 31 for date of month (both inclusive). If not specified, defaults to Nothing and this field does not take affect.",
|
|
27
27
|
)
|
|
28
28
|
|
|
29
|
-
@
|
|
29
|
+
@model_validator(mode="before")
|
|
30
|
+
@classmethod
|
|
30
31
|
def lower_freq_configs_are_set(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
31
32
|
lower_freq_profile_enabled = values.get("lower_freq_profile_enabled")
|
|
32
33
|
profile_day_of_week = values.get("profile_day_of_week")
|
|
@@ -41,7 +42,8 @@ class OperationConfig(ConfigModel):
|
|
|
41
42
|
)
|
|
42
43
|
return values
|
|
43
44
|
|
|
44
|
-
@
|
|
45
|
+
@field_validator("profile_day_of_week", mode="after")
|
|
46
|
+
@classmethod
|
|
45
47
|
def validate_profile_day_of_week(cls, v: Optional[int]) -> Optional[int]:
|
|
46
48
|
profile_day_of_week = v
|
|
47
49
|
if profile_day_of_week is None:
|
|
@@ -52,7 +54,8 @@ class OperationConfig(ConfigModel):
|
|
|
52
54
|
)
|
|
53
55
|
return profile_day_of_week
|
|
54
56
|
|
|
55
|
-
@
|
|
57
|
+
@field_validator("profile_date_of_month", mode="after")
|
|
58
|
+
@classmethod
|
|
56
59
|
def validate_profile_date_of_month(cls, v: Optional[int]) -> Optional[int]:
|
|
57
60
|
profile_date_of_month = v
|
|
58
61
|
if profile_date_of_month is None:
|
|
@@ -3,7 +3,7 @@ from typing import Dict, List, Optional, Union
|
|
|
3
3
|
from urllib.parse import urlparse
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
|
-
from pydantic import Field,
|
|
6
|
+
from pydantic import Field, model_validator
|
|
7
7
|
|
|
8
8
|
from datahub.configuration.common import AllowDenyPattern
|
|
9
9
|
from datahub.configuration.source_common import (
|
|
@@ -100,27 +100,23 @@ class PulsarSourceConfig(
|
|
|
100
100
|
default_factory=dict, description="Placeholder for OpenId discovery document"
|
|
101
101
|
)
|
|
102
102
|
|
|
103
|
-
@
|
|
104
|
-
def ensure_only_issuer_or_token(
|
|
105
|
-
|
|
106
|
-
) -> Optional[str]:
|
|
107
|
-
if token is not None and values.get("issuer_url") is not None:
|
|
103
|
+
@model_validator(mode="after")
|
|
104
|
+
def ensure_only_issuer_or_token(self) -> "PulsarSourceConfig":
|
|
105
|
+
if self.token is not None and self.issuer_url is not None:
|
|
108
106
|
raise ValueError(
|
|
109
107
|
"Expected only one authentication method, either issuer_url or token."
|
|
110
108
|
)
|
|
111
|
-
return
|
|
112
|
-
|
|
113
|
-
@
|
|
114
|
-
def ensure_client_id_and_secret_for_issuer_url(
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
if values.get("issuer_url") is not None and (
|
|
118
|
-
client_secret is None or values.get("client_id") is None
|
|
109
|
+
return self
|
|
110
|
+
|
|
111
|
+
@model_validator(mode="after")
|
|
112
|
+
def ensure_client_id_and_secret_for_issuer_url(self) -> "PulsarSourceConfig":
|
|
113
|
+
if self.issuer_url is not None and (
|
|
114
|
+
self.client_secret is None or self.client_id is None
|
|
119
115
|
):
|
|
120
116
|
raise ValueError(
|
|
121
117
|
"Missing configuration: client_id and client_secret are mandatory when issuer_url is set."
|
|
122
118
|
)
|
|
123
|
-
return
|
|
119
|
+
return self
|
|
124
120
|
|
|
125
121
|
@pydantic.field_validator("web_service_url", mode="after")
|
|
126
122
|
@classmethod
|
|
@@ -32,7 +32,7 @@ class AddDatasetBrowsePathTransformer(DatasetBrowsePathsTransformer):
|
|
|
32
32
|
def create(
|
|
33
33
|
cls, config_dict: dict, ctx: PipelineContext
|
|
34
34
|
) -> "AddDatasetBrowsePathTransformer":
|
|
35
|
-
config = AddDatasetBrowsePathConfig.
|
|
35
|
+
config = AddDatasetBrowsePathConfig.model_validate(config_dict)
|
|
36
36
|
return cls(config, ctx)
|
|
37
37
|
|
|
38
38
|
@staticmethod
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Callable, Dict, List, Optional, Union
|
|
3
3
|
|
|
4
|
-
import
|
|
4
|
+
from pydantic import model_validator
|
|
5
5
|
|
|
6
6
|
from datahub.configuration.common import ConfigModel, KeyValuePattern
|
|
7
7
|
from datahub.configuration.import_resolver import pydantic_resolve_key
|
|
@@ -39,7 +39,7 @@ class AddDatasetDataProduct(DatasetDataproductTransformer):
|
|
|
39
39
|
|
|
40
40
|
@classmethod
|
|
41
41
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetDataProduct":
|
|
42
|
-
config = AddDatasetDataProductConfig.
|
|
42
|
+
config = AddDatasetDataProductConfig.model_validate(config_dict)
|
|
43
43
|
return cls(config, ctx)
|
|
44
44
|
|
|
45
45
|
def transform_aspect(
|
|
@@ -116,7 +116,7 @@ class SimpleAddDatasetDataProduct(AddDatasetDataProduct):
|
|
|
116
116
|
def create(
|
|
117
117
|
cls, config_dict: dict, ctx: PipelineContext
|
|
118
118
|
) -> "SimpleAddDatasetDataProduct":
|
|
119
|
-
config = SimpleDatasetDataProductConfig.
|
|
119
|
+
config = SimpleDatasetDataProductConfig.model_validate(config_dict)
|
|
120
120
|
return cls(config, ctx)
|
|
121
121
|
|
|
122
122
|
|
|
@@ -124,7 +124,8 @@ class PatternDatasetDataProductConfig(ConfigModel):
|
|
|
124
124
|
dataset_to_data_product_urns_pattern: KeyValuePattern = KeyValuePattern.all()
|
|
125
125
|
is_container: bool = False
|
|
126
126
|
|
|
127
|
-
@
|
|
127
|
+
@model_validator(mode="before")
|
|
128
|
+
@classmethod
|
|
128
129
|
def validate_pattern_value(cls, values: Dict) -> Dict:
|
|
129
130
|
rules = values["dataset_to_data_product_urns_pattern"]["rules"]
|
|
130
131
|
for key, value in rules.items():
|
|
@@ -156,5 +157,5 @@ class PatternAddDatasetDataProduct(AddDatasetDataProduct):
|
|
|
156
157
|
def create(
|
|
157
158
|
cls, config_dict: dict, ctx: PipelineContext
|
|
158
159
|
) -> "PatternAddDatasetDataProduct":
|
|
159
|
-
config = PatternDatasetDataProductConfig.
|
|
160
|
+
config = PatternDatasetDataProductConfig.model_validate(config_dict)
|
|
160
161
|
return cls(config, ctx)
|
|
@@ -55,7 +55,7 @@ class AddDatasetOwnership(OwnershipTransformer):
|
|
|
55
55
|
|
|
56
56
|
@classmethod
|
|
57
57
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetOwnership":
|
|
58
|
-
config = AddDatasetOwnershipConfig.
|
|
58
|
+
config = AddDatasetOwnershipConfig.model_validate(config_dict)
|
|
59
59
|
return cls(config, ctx)
|
|
60
60
|
|
|
61
61
|
@staticmethod
|
|
@@ -209,7 +209,7 @@ class SimpleAddDatasetOwnership(AddDatasetOwnership):
|
|
|
209
209
|
def create(
|
|
210
210
|
cls, config_dict: dict, ctx: PipelineContext
|
|
211
211
|
) -> "SimpleAddDatasetOwnership":
|
|
212
|
-
config = SimpleDatasetOwnershipConfig.
|
|
212
|
+
config = SimpleDatasetOwnershipConfig.model_validate(config_dict)
|
|
213
213
|
return cls(config, ctx)
|
|
214
214
|
|
|
215
215
|
|
|
@@ -247,5 +247,5 @@ class PatternAddDatasetOwnership(AddDatasetOwnership):
|
|
|
247
247
|
def create(
|
|
248
248
|
cls, config_dict: dict, ctx: PipelineContext
|
|
249
249
|
) -> "PatternAddDatasetOwnership":
|
|
250
|
-
config = PatternDatasetOwnershipConfig.
|
|
250
|
+
config = PatternDatasetOwnershipConfig.model_validate(config_dict)
|
|
251
251
|
return cls(config, ctx)
|