acryl-datahub 1.0.0.2rc4__py3-none-any.whl → 1.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/METADATA +2566 -2514
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/RECORD +159 -149
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +3 -3
- datahub/api/entities/datajob/datajob.py +7 -4
- datahub/api/entities/dataset/dataset.py +9 -11
- datahub/api/entities/forms/forms.py +34 -34
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/operation.py +4 -4
- datahub/cli/check_cli.py +3 -2
- datahub/cli/config_utils.py +2 -2
- datahub/cli/delete_cli.py +6 -5
- datahub/cli/docker_cli.py +2 -2
- datahub/cli/exists_cli.py +2 -1
- datahub/cli/get_cli.py +2 -1
- datahub/cli/iceberg_cli.py +6 -5
- datahub/cli/ingest_cli.py +9 -6
- datahub/cli/migrate.py +4 -3
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +3 -2
- datahub/cli/specific/assertions_cli.py +2 -1
- datahub/cli/specific/datacontract_cli.py +3 -2
- datahub/cli/specific/dataproduct_cli.py +10 -9
- datahub/cli/specific/dataset_cli.py +4 -3
- datahub/cli/specific/forms_cli.py +2 -1
- datahub/cli/specific/group_cli.py +2 -1
- datahub/cli/specific/structuredproperties_cli.py +4 -3
- datahub/cli/specific/user_cli.py +2 -1
- datahub/cli/state_cli.py +2 -1
- datahub/cli/timeline_cli.py +2 -1
- datahub/configuration/common.py +5 -0
- datahub/configuration/source_common.py +1 -1
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/request_helper.py +116 -3
- datahub/emitter/rest_emitter.py +163 -93
- datahub/entrypoints.py +2 -1
- datahub/errors.py +4 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
- datahub/ingestion/api/source.py +2 -5
- datahub/ingestion/api/source_helpers.py +1 -0
- datahub/ingestion/glossary/classification_mixin.py +4 -2
- datahub/ingestion/graph/client.py +33 -8
- datahub/ingestion/graph/config.py +14 -0
- datahub/ingestion/graph/filters.py +1 -1
- datahub/ingestion/graph/links.py +53 -0
- datahub/ingestion/run/pipeline.py +9 -6
- datahub/ingestion/run/pipeline_config.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +5 -6
- datahub/ingestion/source/apply/datahub_apply.py +2 -1
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery.py +24 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +25 -24
- datahub/ingestion/source/common/subtypes.py +3 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +12 -11
- datahub/ingestion/source/dbt/dbt_cloud.py +2 -6
- datahub/ingestion/source/dbt/dbt_common.py +10 -2
- datahub/ingestion/source/dbt/dbt_core.py +82 -42
- datahub/ingestion/source/dynamodb/dynamodb.py +7 -4
- datahub/ingestion/source/feast.py +4 -4
- datahub/ingestion/source/fivetran/config.py +1 -1
- datahub/ingestion/source/fivetran/fivetran_log_api.py +7 -3
- datahub/ingestion/source/fivetran/fivetran_query.py +16 -16
- datahub/ingestion/source/ge_data_profiler.py +27 -1
- datahub/ingestion/source/hex/api.py +1 -20
- datahub/ingestion/source/hex/query_fetcher.py +4 -1
- datahub/ingestion/source/iceberg/iceberg.py +20 -4
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +17 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +1 -1
- datahub/ingestion/source/looker/looker_source.py +34 -5
- datahub/ingestion/source/looker/lookml_source.py +7 -1
- datahub/ingestion/source/metadata/lineage.py +2 -1
- datahub/ingestion/source/mlflow.py +19 -6
- datahub/ingestion/source/mode.py +74 -28
- datahub/ingestion/source/neo4j/neo4j_source.py +85 -55
- datahub/ingestion/source/powerbi/config.py +13 -1
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/redshift/usage.py +10 -9
- datahub/ingestion/source/sigma/config.py +74 -6
- datahub/ingestion/source/sigma/sigma.py +16 -1
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +4 -52
- datahub/ingestion/source/snowflake/snowflake_config.py +2 -12
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -18
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
- datahub/ingestion/source/snowflake/snowflake_query.py +9 -63
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/sql/athena.py +2 -1
- datahub/ingestion/source/sql/clickhouse.py +5 -1
- datahub/ingestion/source/sql/druid.py +7 -2
- datahub/ingestion/source/sql/hive.py +7 -2
- datahub/ingestion/source/sql/hive_metastore.py +5 -5
- datahub/ingestion/source/sql/mssql/source.py +1 -1
- datahub/ingestion/source/sql/oracle.py +6 -2
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/tableau/tableau.py +31 -6
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +2 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +7 -3
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -3
- datahub/ingestion/source/vertexai/vertexai.py +316 -4
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +23 -2
- datahub/integrations/assertion/common.py +3 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +538 -493
- datahub/metadata/_urns/urn_defs.py +1819 -1763
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/schema.avsc +17296 -16883
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
- datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
- datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
- datahub/metadata/schemas/QueryProperties.avsc +4 -2
- datahub/metadata/schemas/SystemMetadata.avsc +86 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/_all_entities.py +4 -0
- datahub/sdk/_shared.py +142 -4
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/dataset.py +2 -2
- datahub/sdk/entity_client.py +8 -0
- datahub/sdk/lineage_client.py +235 -0
- datahub/sdk/main_client.py +6 -3
- datahub/sdk/mlmodel.py +301 -0
- datahub/sdk/mlmodelgroup.py +233 -0
- datahub/secret/datahub_secret_store.py +2 -1
- datahub/specific/dataset.py +12 -0
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +48 -34
- datahub/sql_parsing/sqlglot_utils.py +18 -14
- datahub/telemetry/telemetry.py +2 -2
- datahub/testing/check_imports.py +1 -1
- datahub/testing/mcp_diff.py +15 -2
- datahub/upgrade/upgrade.py +10 -12
- datahub/utilities/logging_manager.py +8 -1
- datahub/utilities/server_config_util.py +350 -10
- datahub/utilities/sqlalchemy_query_combiner.py +4 -5
- datahub/utilities/urn_encoder.py +1 -1
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -20,16 +20,16 @@ from datahub.utilities._custom_package_loader import get_custom_models_package
|
|
|
20
20
|
_custom_package_path = get_custom_models_package()
|
|
21
21
|
|
|
22
22
|
if TYPE_CHECKING or not _custom_package_path:
|
|
23
|
-
from .
|
|
23
|
+
from ._internal_schema_classes import *
|
|
24
24
|
|
|
25
25
|
# Required explicitly because __all__ doesn't include _ prefixed names.
|
|
26
|
-
from .
|
|
26
|
+
from ._internal_schema_classes import __SCHEMA_TYPES
|
|
27
27
|
|
|
28
28
|
if IS_SPHINX_BUILD:
|
|
29
29
|
# Set __module__ to the current module so that Sphinx will document the
|
|
30
30
|
# classes as belonging to this module instead of the custom package.
|
|
31
31
|
for _cls in list(globals().values()):
|
|
32
|
-
if hasattr(_cls, "__module__") and "datahub.metadata.
|
|
32
|
+
if hasattr(_cls, "__module__") and "datahub.metadata._internal_schema_classes" in _cls.__module__:
|
|
33
33
|
_cls.__module__ = __name__
|
|
34
34
|
else:
|
|
35
35
|
_custom_package = importlib.import_module(_custom_package_path)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "dataHubOpenAPISchemaKey",
|
|
5
|
+
"keyForEntity": "dataHubOpenAPISchema",
|
|
6
|
+
"entityCategory": "internal",
|
|
7
|
+
"entityAspects": [
|
|
8
|
+
"systemMetadata"
|
|
9
|
+
],
|
|
10
|
+
"entityDoc": "Contains aspects which are used in OpenAPI requests/responses which are not otherwise present in the data model."
|
|
11
|
+
},
|
|
12
|
+
"name": "DataHubOpenAPISchemaKey",
|
|
13
|
+
"namespace": "com.linkedin.pegasus2avro.metadata.key",
|
|
14
|
+
"fields": [
|
|
15
|
+
{
|
|
16
|
+
"type": "string",
|
|
17
|
+
"name": "id",
|
|
18
|
+
"doc": "A unique id for the DataHub OpenAPI schema."
|
|
19
|
+
}
|
|
20
|
+
],
|
|
21
|
+
"doc": "Key for a Query"
|
|
22
|
+
}
|
|
@@ -31,12 +31,14 @@
|
|
|
31
31
|
"type": {
|
|
32
32
|
"type": "enum",
|
|
33
33
|
"symbolDocs": {
|
|
34
|
-
"SQL": "A SQL Query"
|
|
34
|
+
"SQL": "A SQL Query",
|
|
35
|
+
"UNKNOWN": "Unknown query language"
|
|
35
36
|
},
|
|
36
37
|
"name": "QueryLanguage",
|
|
37
38
|
"namespace": "com.linkedin.pegasus2avro.query",
|
|
38
39
|
"symbols": [
|
|
39
|
-
"SQL"
|
|
40
|
+
"SQL",
|
|
41
|
+
"UNKNOWN"
|
|
40
42
|
]
|
|
41
43
|
},
|
|
42
44
|
"name": "language",
|
|
@@ -53,6 +53,11 @@
|
|
|
53
53
|
"namespace": "com.linkedin.pegasus2avro.form",
|
|
54
54
|
"fields": [
|
|
55
55
|
{
|
|
56
|
+
"Searchable": {
|
|
57
|
+
"fieldName": "promptId",
|
|
58
|
+
"fieldType": "KEYWORD",
|
|
59
|
+
"queryByDefault": false
|
|
60
|
+
},
|
|
56
61
|
"type": "string",
|
|
57
62
|
"name": "id",
|
|
58
63
|
"doc": "The unique id for this prompt. This must be GLOBALLY unique."
|
|
@@ -6755,6 +6755,9 @@
|
|
|
6755
6755
|
"doc": "Version of the MLModelDeployment"
|
|
6756
6756
|
},
|
|
6757
6757
|
{
|
|
6758
|
+
"Searchable": {
|
|
6759
|
+
"fieldName": "deploymentStatus"
|
|
6760
|
+
},
|
|
6758
6761
|
"type": [
|
|
6759
6762
|
"null",
|
|
6760
6763
|
{
|
|
@@ -8116,6 +8119,9 @@
|
|
|
8116
8119
|
"null",
|
|
8117
8120
|
{
|
|
8118
8121
|
"type": "record",
|
|
8122
|
+
"Aspect": {
|
|
8123
|
+
"name": "systemMetadata"
|
|
8124
|
+
},
|
|
8119
8125
|
"name": "SystemMetadata",
|
|
8120
8126
|
"namespace": "com.linkedin.pegasus2avro.mxe",
|
|
8121
8127
|
"fields": [
|
|
@@ -36,12 +36,14 @@
|
|
|
36
36
|
"type": {
|
|
37
37
|
"type": "enum",
|
|
38
38
|
"symbolDocs": {
|
|
39
|
-
"SQL": "A SQL Query"
|
|
39
|
+
"SQL": "A SQL Query",
|
|
40
|
+
"UNKNOWN": "Unknown query language"
|
|
40
41
|
},
|
|
41
42
|
"name": "QueryLanguage",
|
|
42
43
|
"namespace": "com.linkedin.pegasus2avro.query",
|
|
43
44
|
"symbols": [
|
|
44
|
-
"SQL"
|
|
45
|
+
"SQL",
|
|
46
|
+
"UNKNOWN"
|
|
45
47
|
]
|
|
46
48
|
},
|
|
47
49
|
"name": "language",
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "systemMetadata"
|
|
5
|
+
},
|
|
6
|
+
"name": "SystemMetadata",
|
|
7
|
+
"namespace": "com.linkedin.pegasus2avro.mxe",
|
|
8
|
+
"fields": [
|
|
9
|
+
{
|
|
10
|
+
"type": [
|
|
11
|
+
"long",
|
|
12
|
+
"null"
|
|
13
|
+
],
|
|
14
|
+
"name": "lastObserved",
|
|
15
|
+
"default": 0,
|
|
16
|
+
"doc": "The timestamp the metadata was observed at"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"type": [
|
|
20
|
+
"string",
|
|
21
|
+
"null"
|
|
22
|
+
],
|
|
23
|
+
"name": "runId",
|
|
24
|
+
"default": "no-run-id-provided",
|
|
25
|
+
"doc": "The original run id that produced the metadata. Populated in case of batch-ingestion."
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"type": [
|
|
29
|
+
"string",
|
|
30
|
+
"null"
|
|
31
|
+
],
|
|
32
|
+
"name": "lastRunId",
|
|
33
|
+
"default": "no-run-id-provided",
|
|
34
|
+
"doc": "The last run id that produced the metadata. Populated in case of batch-ingestion."
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"type": [
|
|
38
|
+
"null",
|
|
39
|
+
"string"
|
|
40
|
+
],
|
|
41
|
+
"name": "pipelineName",
|
|
42
|
+
"default": null,
|
|
43
|
+
"doc": "The ingestion pipeline id that produced the metadata. Populated in case of batch ingestion."
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"type": [
|
|
47
|
+
"null",
|
|
48
|
+
"string"
|
|
49
|
+
],
|
|
50
|
+
"name": "registryName",
|
|
51
|
+
"default": null,
|
|
52
|
+
"doc": "The model registry name that was used to process this event"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"type": [
|
|
56
|
+
"null",
|
|
57
|
+
"string"
|
|
58
|
+
],
|
|
59
|
+
"name": "registryVersion",
|
|
60
|
+
"default": null,
|
|
61
|
+
"doc": "The model registry version that was used to process this event"
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"type": [
|
|
65
|
+
"null",
|
|
66
|
+
{
|
|
67
|
+
"type": "map",
|
|
68
|
+
"values": "string"
|
|
69
|
+
}
|
|
70
|
+
],
|
|
71
|
+
"name": "properties",
|
|
72
|
+
"default": null,
|
|
73
|
+
"doc": "Additional properties"
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"type": [
|
|
77
|
+
"null",
|
|
78
|
+
"string"
|
|
79
|
+
],
|
|
80
|
+
"name": "version",
|
|
81
|
+
"default": null,
|
|
82
|
+
"doc": "Aspect version\n Initial implementation will use the aspect version's number, however stored as\n a string in the case where a different aspect versioning scheme is later adopted."
|
|
83
|
+
}
|
|
84
|
+
],
|
|
85
|
+
"doc": "Metadata associated with each metadata change that is processed by the system"
|
|
86
|
+
}
|
|
@@ -15,10 +15,10 @@ import pathlib
|
|
|
15
15
|
def _load_schema(schema_name: str) -> str:
|
|
16
16
|
return (pathlib.Path(__file__).parent / f"{schema_name}.avsc").read_text()
|
|
17
17
|
|
|
18
|
-
def getMetadataChangeProposalSchema() -> str:
|
|
19
|
-
return _load_schema("MetadataChangeProposal")
|
|
20
|
-
|
|
21
18
|
def getMetadataChangeEventSchema() -> str:
|
|
22
19
|
return _load_schema("MetadataChangeEvent")
|
|
23
20
|
|
|
21
|
+
def getMetadataChangeProposalSchema() -> str:
|
|
22
|
+
return _load_schema("MetadataChangeProposal")
|
|
23
|
+
|
|
24
24
|
# fmt: on
|
datahub/sdk/_all_entities.py
CHANGED
|
@@ -3,11 +3,15 @@ from typing import Dict, List, Type
|
|
|
3
3
|
from datahub.sdk.container import Container
|
|
4
4
|
from datahub.sdk.dataset import Dataset
|
|
5
5
|
from datahub.sdk.entity import Entity
|
|
6
|
+
from datahub.sdk.mlmodel import MLModel
|
|
7
|
+
from datahub.sdk.mlmodelgroup import MLModelGroup
|
|
6
8
|
|
|
7
9
|
# TODO: Is there a better way to declare this?
|
|
8
10
|
ENTITY_CLASSES_LIST: List[Type[Entity]] = [
|
|
9
11
|
Container,
|
|
10
12
|
Dataset,
|
|
13
|
+
MLModel,
|
|
14
|
+
MLModelGroup,
|
|
11
15
|
]
|
|
12
16
|
|
|
13
17
|
ENTITY_CLASSES: Dict[str, Type[Entity]] = {
|
datahub/sdk/_shared.py
CHANGED
|
@@ -5,6 +5,7 @@ from datetime import datetime
|
|
|
5
5
|
from typing import (
|
|
6
6
|
TYPE_CHECKING,
|
|
7
7
|
Callable,
|
|
8
|
+
Dict,
|
|
8
9
|
List,
|
|
9
10
|
Optional,
|
|
10
11
|
Sequence,
|
|
@@ -14,6 +15,7 @@ from typing import (
|
|
|
14
15
|
|
|
15
16
|
from typing_extensions import TypeAlias, assert_never
|
|
16
17
|
|
|
18
|
+
import datahub.emitter.mce_builder as builder
|
|
17
19
|
import datahub.metadata.schema_classes as models
|
|
18
20
|
from datahub.emitter.mce_builder import (
|
|
19
21
|
make_ts_millis,
|
|
@@ -30,14 +32,16 @@ from datahub.metadata.urns import (
|
|
|
30
32
|
DataJobUrn,
|
|
31
33
|
DataPlatformInstanceUrn,
|
|
32
34
|
DataPlatformUrn,
|
|
35
|
+
DataProcessInstanceUrn,
|
|
33
36
|
DatasetUrn,
|
|
34
37
|
DomainUrn,
|
|
35
38
|
GlossaryTermUrn,
|
|
36
39
|
OwnershipTypeUrn,
|
|
37
40
|
TagUrn,
|
|
38
41
|
Urn,
|
|
42
|
+
VersionSetUrn,
|
|
39
43
|
)
|
|
40
|
-
from datahub.sdk._utils import add_list_unique, remove_list_unique
|
|
44
|
+
from datahub.sdk._utils import DEFAULT_ACTOR_URN, add_list_unique, remove_list_unique
|
|
41
45
|
from datahub.sdk.entity import Entity
|
|
42
46
|
from datahub.utilities.urns.error import InvalidUrnError
|
|
43
47
|
|
|
@@ -50,7 +54,35 @@ DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
|
|
|
50
54
|
|
|
51
55
|
ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
|
|
52
56
|
|
|
53
|
-
|
|
57
|
+
TrainingMetricsInputType: TypeAlias = Union[
|
|
58
|
+
List[models.MLMetricClass], Dict[str, Optional[str]]
|
|
59
|
+
]
|
|
60
|
+
HyperParamsInputType: TypeAlias = Union[
|
|
61
|
+
List[models.MLHyperParamClass], Dict[str, Optional[str]]
|
|
62
|
+
]
|
|
63
|
+
MLTrainingJobInputType: TypeAlias = Union[Sequence[Union[str, DataProcessInstanceUrn]]]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def convert_training_metrics(
|
|
67
|
+
metrics: TrainingMetricsInputType,
|
|
68
|
+
) -> List[models.MLMetricClass]:
|
|
69
|
+
if isinstance(metrics, dict):
|
|
70
|
+
return [
|
|
71
|
+
models.MLMetricClass(name=name, value=str(value))
|
|
72
|
+
for name, value in metrics.items()
|
|
73
|
+
]
|
|
74
|
+
return metrics
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def convert_hyper_params(
|
|
78
|
+
params: HyperParamsInputType,
|
|
79
|
+
) -> List[models.MLHyperParamClass]:
|
|
80
|
+
if isinstance(params, dict):
|
|
81
|
+
return [
|
|
82
|
+
models.MLHyperParamClass(name=name, value=str(value))
|
|
83
|
+
for name, value in params.items()
|
|
84
|
+
]
|
|
85
|
+
return params
|
|
54
86
|
|
|
55
87
|
|
|
56
88
|
def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
|
|
@@ -441,7 +473,7 @@ class HasTerms(Entity):
|
|
|
441
473
|
def _terms_audit_stamp(self) -> models.AuditStampClass:
|
|
442
474
|
return models.AuditStampClass(
|
|
443
475
|
time=0,
|
|
444
|
-
actor=
|
|
476
|
+
actor=DEFAULT_ACTOR_URN,
|
|
445
477
|
)
|
|
446
478
|
|
|
447
479
|
def set_terms(self, terms: TermsInputType) -> None:
|
|
@@ -529,7 +561,7 @@ class HasInstitutionalMemory(Entity):
|
|
|
529
561
|
def _institutional_memory_audit_stamp(self) -> models.AuditStampClass:
|
|
530
562
|
return models.AuditStampClass(
|
|
531
563
|
time=0,
|
|
532
|
-
actor=
|
|
564
|
+
actor=DEFAULT_ACTOR_URN,
|
|
533
565
|
)
|
|
534
566
|
|
|
535
567
|
@classmethod
|
|
@@ -578,3 +610,109 @@ class HasInstitutionalMemory(Entity):
|
|
|
578
610
|
self._link_key,
|
|
579
611
|
self._parse_link_association_class(link),
|
|
580
612
|
)
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
class HasVersion(Entity):
|
|
616
|
+
"""Mixin for entities that have version properties."""
|
|
617
|
+
|
|
618
|
+
def _get_version_props(self) -> Optional[models.VersionPropertiesClass]:
|
|
619
|
+
return self._get_aspect(models.VersionPropertiesClass)
|
|
620
|
+
|
|
621
|
+
def _ensure_version_props(self) -> models.VersionPropertiesClass:
|
|
622
|
+
version_props = self._get_version_props()
|
|
623
|
+
if version_props is None:
|
|
624
|
+
guid_dict = {"urn": str(self.urn)}
|
|
625
|
+
version_set_urn = VersionSetUrn(
|
|
626
|
+
id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
version_props = models.VersionPropertiesClass(
|
|
630
|
+
versionSet=str(version_set_urn),
|
|
631
|
+
version=models.VersionTagClass(versionTag="0.1.0"),
|
|
632
|
+
sortId="0000000.1.0",
|
|
633
|
+
)
|
|
634
|
+
self._set_aspect(version_props)
|
|
635
|
+
return version_props
|
|
636
|
+
|
|
637
|
+
@property
|
|
638
|
+
def version(self) -> Optional[str]:
|
|
639
|
+
version_props = self._get_version_props()
|
|
640
|
+
if version_props and version_props.version:
|
|
641
|
+
return version_props.version.versionTag
|
|
642
|
+
return None
|
|
643
|
+
|
|
644
|
+
def set_version(self, version: str) -> None:
|
|
645
|
+
"""Set the version of the entity."""
|
|
646
|
+
guid_dict = {"urn": str(self.urn)}
|
|
647
|
+
version_set_urn = VersionSetUrn(
|
|
648
|
+
id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
version_props = self._get_version_props()
|
|
652
|
+
if version_props is None:
|
|
653
|
+
# If no version properties exist, create a new one
|
|
654
|
+
version_props = models.VersionPropertiesClass(
|
|
655
|
+
version=models.VersionTagClass(versionTag=version),
|
|
656
|
+
versionSet=str(version_set_urn),
|
|
657
|
+
sortId=version.zfill(10), # Pad with zeros for sorting
|
|
658
|
+
)
|
|
659
|
+
else:
|
|
660
|
+
# Update existing version properties
|
|
661
|
+
version_props.version = models.VersionTagClass(versionTag=version)
|
|
662
|
+
version_props.versionSet = str(version_set_urn)
|
|
663
|
+
version_props.sortId = version.zfill(10)
|
|
664
|
+
|
|
665
|
+
self._set_aspect(version_props)
|
|
666
|
+
|
|
667
|
+
@property
|
|
668
|
+
def version_aliases(self) -> List[str]:
|
|
669
|
+
version_props = self._get_version_props()
|
|
670
|
+
if version_props and version_props.aliases:
|
|
671
|
+
return [
|
|
672
|
+
alias.versionTag
|
|
673
|
+
for alias in version_props.aliases
|
|
674
|
+
if alias.versionTag is not None
|
|
675
|
+
]
|
|
676
|
+
return [] # Return empty list instead of None
|
|
677
|
+
|
|
678
|
+
def set_version_aliases(self, aliases: List[str]) -> None:
|
|
679
|
+
version_props = self._get_aspect(models.VersionPropertiesClass)
|
|
680
|
+
if version_props:
|
|
681
|
+
version_props.aliases = [
|
|
682
|
+
models.VersionTagClass(versionTag=alias) for alias in aliases
|
|
683
|
+
]
|
|
684
|
+
else:
|
|
685
|
+
# If no version properties exist, we need to create one with a default version
|
|
686
|
+
guid_dict = {"urn": str(self.urn)}
|
|
687
|
+
version_set_urn = VersionSetUrn(
|
|
688
|
+
id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
|
|
689
|
+
)
|
|
690
|
+
self._set_aspect(
|
|
691
|
+
models.VersionPropertiesClass(
|
|
692
|
+
version=models.VersionTagClass(
|
|
693
|
+
versionTag="0.1.0"
|
|
694
|
+
), # Default version
|
|
695
|
+
versionSet=str(version_set_urn),
|
|
696
|
+
sortId="0000000.1.0",
|
|
697
|
+
aliases=[
|
|
698
|
+
models.VersionTagClass(versionTag=alias) for alias in aliases
|
|
699
|
+
],
|
|
700
|
+
)
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
def add_version_alias(self, alias: str) -> None:
|
|
704
|
+
if not alias:
|
|
705
|
+
raise ValueError("Alias cannot be empty")
|
|
706
|
+
version_props = self._ensure_version_props()
|
|
707
|
+
if version_props.aliases is None:
|
|
708
|
+
version_props.aliases = []
|
|
709
|
+
version_props.aliases.append(models.VersionTagClass(versionTag=alias))
|
|
710
|
+
self._set_aspect(version_props)
|
|
711
|
+
|
|
712
|
+
def remove_version_alias(self, alias: str) -> None:
|
|
713
|
+
version_props = self._get_version_props()
|
|
714
|
+
if version_props and version_props.aliases:
|
|
715
|
+
version_props.aliases = [
|
|
716
|
+
a for a in version_props.aliases if a.versionTag != alias
|
|
717
|
+
]
|
|
718
|
+
self._set_aspect(version_props)
|
datahub/sdk/_utils.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
from typing import Any, Callable, List, Protocol, TypeVar
|
|
2
2
|
|
|
3
3
|
from datahub.errors import ItemNotFoundError
|
|
4
|
+
from datahub.metadata.urns import CorpUserUrn
|
|
5
|
+
|
|
6
|
+
# TODO: Change __ingestion to _ingestion.
|
|
7
|
+
DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
|
|
4
8
|
|
|
5
9
|
|
|
6
10
|
class _SupportsEq(Protocol):
|
datahub/sdk/dataset.py
CHANGED
|
@@ -87,7 +87,7 @@ def _parse_upstream_input(
|
|
|
87
87
|
assert_never(upstream_input)
|
|
88
88
|
|
|
89
89
|
|
|
90
|
-
def
|
|
90
|
+
def parse_cll_mapping(
|
|
91
91
|
*,
|
|
92
92
|
upstream: DatasetUrnOrStr,
|
|
93
93
|
downstream: DatasetUrnOrStr,
|
|
@@ -142,7 +142,7 @@ def _parse_upstream_lineage_input(
|
|
|
142
142
|
)
|
|
143
143
|
)
|
|
144
144
|
cll.extend(
|
|
145
|
-
|
|
145
|
+
parse_cll_mapping(
|
|
146
146
|
upstream=dataset_urn,
|
|
147
147
|
downstream=downstream_urn,
|
|
148
148
|
cll_mapping=column_lineage,
|
datahub/sdk/entity_client.py
CHANGED
|
@@ -11,6 +11,8 @@ from datahub.ingestion.graph.client import DataHubGraph
|
|
|
11
11
|
from datahub.metadata.urns import (
|
|
12
12
|
ContainerUrn,
|
|
13
13
|
DatasetUrn,
|
|
14
|
+
MlModelGroupUrn,
|
|
15
|
+
MlModelUrn,
|
|
14
16
|
Urn,
|
|
15
17
|
)
|
|
16
18
|
from datahub.sdk._all_entities import ENTITY_CLASSES
|
|
@@ -18,6 +20,8 @@ from datahub.sdk._shared import UrnOrStr
|
|
|
18
20
|
from datahub.sdk.container import Container
|
|
19
21
|
from datahub.sdk.dataset import Dataset
|
|
20
22
|
from datahub.sdk.entity import Entity
|
|
23
|
+
from datahub.sdk.mlmodel import MLModel
|
|
24
|
+
from datahub.sdk.mlmodelgroup import MLModelGroup
|
|
21
25
|
|
|
22
26
|
if TYPE_CHECKING:
|
|
23
27
|
from datahub.sdk.main_client import DataHubClient
|
|
@@ -49,6 +53,10 @@ class EntityClient:
|
|
|
49
53
|
@overload
|
|
50
54
|
def get(self, urn: DatasetUrn) -> Dataset: ...
|
|
51
55
|
@overload
|
|
56
|
+
def get(self, urn: MlModelUrn) -> MLModel: ...
|
|
57
|
+
@overload
|
|
58
|
+
def get(self, urn: MlModelGroupUrn) -> MLModelGroup: ...
|
|
59
|
+
@overload
|
|
52
60
|
def get(self, urn: Union[Urn, str]) -> Entity: ...
|
|
53
61
|
def get(self, urn: UrnOrStr) -> Entity:
|
|
54
62
|
"""Retrieve an entity by its urn.
|