acryl-datahub 1.0.0.2rc4__py3-none-any.whl → 1.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (159) hide show
  1. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/METADATA +2566 -2514
  2. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/RECORD +159 -149
  3. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  6. datahub/api/entities/datacontract/datacontract.py +35 -3
  7. datahub/api/entities/datajob/dataflow.py +3 -3
  8. datahub/api/entities/datajob/datajob.py +7 -4
  9. datahub/api/entities/dataset/dataset.py +9 -11
  10. datahub/api/entities/forms/forms.py +34 -34
  11. datahub/api/graphql/assertion.py +1 -1
  12. datahub/api/graphql/operation.py +4 -4
  13. datahub/cli/check_cli.py +3 -2
  14. datahub/cli/config_utils.py +2 -2
  15. datahub/cli/delete_cli.py +6 -5
  16. datahub/cli/docker_cli.py +2 -2
  17. datahub/cli/exists_cli.py +2 -1
  18. datahub/cli/get_cli.py +2 -1
  19. datahub/cli/iceberg_cli.py +6 -5
  20. datahub/cli/ingest_cli.py +9 -6
  21. datahub/cli/migrate.py +4 -3
  22. datahub/cli/migration_utils.py +4 -3
  23. datahub/cli/put_cli.py +3 -2
  24. datahub/cli/specific/assertions_cli.py +2 -1
  25. datahub/cli/specific/datacontract_cli.py +3 -2
  26. datahub/cli/specific/dataproduct_cli.py +10 -9
  27. datahub/cli/specific/dataset_cli.py +4 -3
  28. datahub/cli/specific/forms_cli.py +2 -1
  29. datahub/cli/specific/group_cli.py +2 -1
  30. datahub/cli/specific/structuredproperties_cli.py +4 -3
  31. datahub/cli/specific/user_cli.py +2 -1
  32. datahub/cli/state_cli.py +2 -1
  33. datahub/cli/timeline_cli.py +2 -1
  34. datahub/configuration/common.py +5 -0
  35. datahub/configuration/source_common.py +1 -1
  36. datahub/emitter/mcp.py +20 -5
  37. datahub/emitter/request_helper.py +116 -3
  38. datahub/emitter/rest_emitter.py +163 -93
  39. datahub/entrypoints.py +2 -1
  40. datahub/errors.py +4 -0
  41. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
  42. datahub/ingestion/api/source.py +2 -5
  43. datahub/ingestion/api/source_helpers.py +1 -0
  44. datahub/ingestion/glossary/classification_mixin.py +4 -2
  45. datahub/ingestion/graph/client.py +33 -8
  46. datahub/ingestion/graph/config.py +14 -0
  47. datahub/ingestion/graph/filters.py +1 -1
  48. datahub/ingestion/graph/links.py +53 -0
  49. datahub/ingestion/run/pipeline.py +9 -6
  50. datahub/ingestion/run/pipeline_config.py +1 -1
  51. datahub/ingestion/sink/datahub_rest.py +5 -6
  52. datahub/ingestion/source/apply/datahub_apply.py +2 -1
  53. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  54. datahub/ingestion/source/bigquery_v2/bigquery.py +24 -23
  55. datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
  56. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
  57. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
  58. datahub/ingestion/source/cassandra/cassandra_profiling.py +25 -24
  59. datahub/ingestion/source/common/subtypes.py +3 -0
  60. datahub/ingestion/source/datahub/datahub_database_reader.py +12 -11
  61. datahub/ingestion/source/dbt/dbt_cloud.py +2 -6
  62. datahub/ingestion/source/dbt/dbt_common.py +10 -2
  63. datahub/ingestion/source/dbt/dbt_core.py +82 -42
  64. datahub/ingestion/source/dynamodb/dynamodb.py +7 -4
  65. datahub/ingestion/source/feast.py +4 -4
  66. datahub/ingestion/source/fivetran/config.py +1 -1
  67. datahub/ingestion/source/fivetran/fivetran_log_api.py +7 -3
  68. datahub/ingestion/source/fivetran/fivetran_query.py +16 -16
  69. datahub/ingestion/source/ge_data_profiler.py +27 -1
  70. datahub/ingestion/source/hex/api.py +1 -20
  71. datahub/ingestion/source/hex/query_fetcher.py +4 -1
  72. datahub/ingestion/source/iceberg/iceberg.py +20 -4
  73. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  74. datahub/ingestion/source/ldap.py +1 -1
  75. datahub/ingestion/source/looker/looker_common.py +17 -2
  76. datahub/ingestion/source/looker/looker_lib_wrapper.py +1 -1
  77. datahub/ingestion/source/looker/looker_source.py +34 -5
  78. datahub/ingestion/source/looker/lookml_source.py +7 -1
  79. datahub/ingestion/source/metadata/lineage.py +2 -1
  80. datahub/ingestion/source/mlflow.py +19 -6
  81. datahub/ingestion/source/mode.py +74 -28
  82. datahub/ingestion/source/neo4j/neo4j_source.py +85 -55
  83. datahub/ingestion/source/powerbi/config.py +13 -1
  84. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  85. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  86. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
  87. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  88. datahub/ingestion/source/redshift/usage.py +10 -9
  89. datahub/ingestion/source/sigma/config.py +74 -6
  90. datahub/ingestion/source/sigma/sigma.py +16 -1
  91. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  92. datahub/ingestion/source/slack/slack.py +4 -52
  93. datahub/ingestion/source/snowflake/snowflake_config.py +2 -12
  94. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -18
  95. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  96. datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
  97. datahub/ingestion/source/snowflake/snowflake_query.py +9 -63
  98. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  99. datahub/ingestion/source/sql/athena.py +2 -1
  100. datahub/ingestion/source/sql/clickhouse.py +5 -1
  101. datahub/ingestion/source/sql/druid.py +7 -2
  102. datahub/ingestion/source/sql/hive.py +7 -2
  103. datahub/ingestion/source/sql/hive_metastore.py +5 -5
  104. datahub/ingestion/source/sql/mssql/source.py +1 -1
  105. datahub/ingestion/source/sql/oracle.py +6 -2
  106. datahub/ingestion/source/sql/sql_config.py +1 -34
  107. datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
  108. datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
  109. datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
  110. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  111. datahub/ingestion/source/tableau/tableau.py +31 -6
  112. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  113. datahub/ingestion/source/unity/config.py +2 -1
  114. datahub/ingestion/source/usage/clickhouse_usage.py +7 -3
  115. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -3
  116. datahub/ingestion/source/vertexai/vertexai.py +316 -4
  117. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +23 -2
  118. datahub/integrations/assertion/common.py +3 -2
  119. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +538 -493
  120. datahub/metadata/_urns/urn_defs.py +1819 -1763
  121. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  122. datahub/metadata/schema.avsc +17296 -16883
  123. datahub/metadata/schema_classes.py +3 -3
  124. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  125. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  126. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  127. datahub/metadata/schemas/FormInfo.avsc +5 -0
  128. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  129. datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
  130. datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
  131. datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
  132. datahub/metadata/schemas/QueryProperties.avsc +4 -2
  133. datahub/metadata/schemas/SystemMetadata.avsc +86 -0
  134. datahub/metadata/schemas/__init__.py +3 -3
  135. datahub/sdk/_all_entities.py +4 -0
  136. datahub/sdk/_shared.py +142 -4
  137. datahub/sdk/_utils.py +4 -0
  138. datahub/sdk/dataset.py +2 -2
  139. datahub/sdk/entity_client.py +8 -0
  140. datahub/sdk/lineage_client.py +235 -0
  141. datahub/sdk/main_client.py +6 -3
  142. datahub/sdk/mlmodel.py +301 -0
  143. datahub/sdk/mlmodelgroup.py +233 -0
  144. datahub/secret/datahub_secret_store.py +2 -1
  145. datahub/specific/dataset.py +12 -0
  146. datahub/sql_parsing/fingerprint_utils.py +6 -0
  147. datahub/sql_parsing/sql_parsing_aggregator.py +48 -34
  148. datahub/sql_parsing/sqlglot_utils.py +18 -14
  149. datahub/telemetry/telemetry.py +2 -2
  150. datahub/testing/check_imports.py +1 -1
  151. datahub/testing/mcp_diff.py +15 -2
  152. datahub/upgrade/upgrade.py +10 -12
  153. datahub/utilities/logging_manager.py +8 -1
  154. datahub/utilities/server_config_util.py +350 -10
  155. datahub/utilities/sqlalchemy_query_combiner.py +4 -5
  156. datahub/utilities/urn_encoder.py +1 -1
  157. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/entry_points.txt +0 -0
  158. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/licenses/LICENSE +0 -0
  159. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/top_level.txt +0 -0
@@ -20,16 +20,16 @@ from datahub.utilities._custom_package_loader import get_custom_models_package
20
20
  _custom_package_path = get_custom_models_package()
21
21
 
22
22
  if TYPE_CHECKING or not _custom_package_path:
23
- from ._schema_classes import *
23
+ from ._internal_schema_classes import *
24
24
 
25
25
  # Required explicitly because __all__ doesn't include _ prefixed names.
26
- from ._schema_classes import __SCHEMA_TYPES
26
+ from ._internal_schema_classes import __SCHEMA_TYPES
27
27
 
28
28
  if IS_SPHINX_BUILD:
29
29
  # Set __module__ to the current module so that Sphinx will document the
30
30
  # classes as belonging to this module instead of the custom package.
31
31
  for _cls in list(globals().values()):
32
- if hasattr(_cls, "__module__") and "datahub.metadata._schema_classes" in _cls.__module__:
32
+ if hasattr(_cls, "__module__") and "datahub.metadata._internal_schema_classes" in _cls.__module__:
33
33
  _cls.__module__ = __name__
34
34
  else:
35
35
  _custom_package = importlib.import_module(_custom_package_path)
@@ -7,7 +7,8 @@
7
7
  "entityAspects": [
8
8
  "dataContractProperties",
9
9
  "dataContractStatus",
10
- "status"
10
+ "status",
11
+ "structuredProperties"
11
12
  ]
12
13
  },
13
14
  "name": "DataContractKey",
@@ -0,0 +1,22 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "dataHubOpenAPISchemaKey",
5
+ "keyForEntity": "dataHubOpenAPISchema",
6
+ "entityCategory": "internal",
7
+ "entityAspects": [
8
+ "systemMetadata"
9
+ ],
10
+ "entityDoc": "Contains aspects which are used in OpenAPI requests/responses which are not otherwise present in the data model."
11
+ },
12
+ "name": "DataHubOpenAPISchemaKey",
13
+ "namespace": "com.linkedin.pegasus2avro.metadata.key",
14
+ "fields": [
15
+ {
16
+ "type": "string",
17
+ "name": "id",
18
+ "doc": "A unique id for the DataHub OpenAPI schema."
19
+ }
20
+ ],
21
+ "doc": "Key for a Query"
22
+ }
@@ -31,12 +31,14 @@
31
31
  "type": {
32
32
  "type": "enum",
33
33
  "symbolDocs": {
34
- "SQL": "A SQL Query"
34
+ "SQL": "A SQL Query",
35
+ "UNKNOWN": "Unknown query language"
35
36
  },
36
37
  "name": "QueryLanguage",
37
38
  "namespace": "com.linkedin.pegasus2avro.query",
38
39
  "symbols": [
39
- "SQL"
40
+ "SQL",
41
+ "UNKNOWN"
40
42
  ]
41
43
  },
42
44
  "name": "language",
@@ -53,6 +53,11 @@
53
53
  "namespace": "com.linkedin.pegasus2avro.form",
54
54
  "fields": [
55
55
  {
56
+ "Searchable": {
57
+ "fieldName": "promptId",
58
+ "fieldType": "KEYWORD",
59
+ "queryByDefault": false
60
+ },
56
61
  "type": "string",
57
62
  "name": "id",
58
63
  "doc": "The unique id for this prompt. This must be GLOBALLY unique."
@@ -135,6 +135,9 @@
135
135
  "doc": "Version of the MLModelDeployment"
136
136
  },
137
137
  {
138
+ "Searchable": {
139
+ "fieldName": "deploymentStatus"
140
+ },
138
141
  "type": [
139
142
  "null",
140
143
  {
@@ -6755,6 +6755,9 @@
6755
6755
  "doc": "Version of the MLModelDeployment"
6756
6756
  },
6757
6757
  {
6758
+ "Searchable": {
6759
+ "fieldName": "deploymentStatus"
6760
+ },
6758
6761
  "type": [
6759
6762
  "null",
6760
6763
  {
@@ -8116,6 +8119,9 @@
8116
8119
  "null",
8117
8120
  {
8118
8121
  "type": "record",
8122
+ "Aspect": {
8123
+ "name": "systemMetadata"
8124
+ },
8119
8125
  "name": "SystemMetadata",
8120
8126
  "namespace": "com.linkedin.pegasus2avro.mxe",
8121
8127
  "fields": [
@@ -187,6 +187,9 @@
187
187
  "null",
188
188
  {
189
189
  "type": "record",
190
+ "Aspect": {
191
+ "name": "systemMetadata"
192
+ },
190
193
  "name": "SystemMetadata",
191
194
  "namespace": "com.linkedin.pegasus2avro.mxe",
192
195
  "fields": [
@@ -187,6 +187,9 @@
187
187
  "null",
188
188
  {
189
189
  "type": "record",
190
+ "Aspect": {
191
+ "name": "systemMetadata"
192
+ },
190
193
  "name": "SystemMetadata",
191
194
  "namespace": "com.linkedin.pegasus2avro.mxe",
192
195
  "fields": [
@@ -36,12 +36,14 @@
36
36
  "type": {
37
37
  "type": "enum",
38
38
  "symbolDocs": {
39
- "SQL": "A SQL Query"
39
+ "SQL": "A SQL Query",
40
+ "UNKNOWN": "Unknown query language"
40
41
  },
41
42
  "name": "QueryLanguage",
42
43
  "namespace": "com.linkedin.pegasus2avro.query",
43
44
  "symbols": [
44
- "SQL"
45
+ "SQL",
46
+ "UNKNOWN"
45
47
  ]
46
48
  },
47
49
  "name": "language",
@@ -0,0 +1,86 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "systemMetadata"
5
+ },
6
+ "name": "SystemMetadata",
7
+ "namespace": "com.linkedin.pegasus2avro.mxe",
8
+ "fields": [
9
+ {
10
+ "type": [
11
+ "long",
12
+ "null"
13
+ ],
14
+ "name": "lastObserved",
15
+ "default": 0,
16
+ "doc": "The timestamp the metadata was observed at"
17
+ },
18
+ {
19
+ "type": [
20
+ "string",
21
+ "null"
22
+ ],
23
+ "name": "runId",
24
+ "default": "no-run-id-provided",
25
+ "doc": "The original run id that produced the metadata. Populated in case of batch-ingestion."
26
+ },
27
+ {
28
+ "type": [
29
+ "string",
30
+ "null"
31
+ ],
32
+ "name": "lastRunId",
33
+ "default": "no-run-id-provided",
34
+ "doc": "The last run id that produced the metadata. Populated in case of batch-ingestion."
35
+ },
36
+ {
37
+ "type": [
38
+ "null",
39
+ "string"
40
+ ],
41
+ "name": "pipelineName",
42
+ "default": null,
43
+ "doc": "The ingestion pipeline id that produced the metadata. Populated in case of batch ingestion."
44
+ },
45
+ {
46
+ "type": [
47
+ "null",
48
+ "string"
49
+ ],
50
+ "name": "registryName",
51
+ "default": null,
52
+ "doc": "The model registry name that was used to process this event"
53
+ },
54
+ {
55
+ "type": [
56
+ "null",
57
+ "string"
58
+ ],
59
+ "name": "registryVersion",
60
+ "default": null,
61
+ "doc": "The model registry version that was used to process this event"
62
+ },
63
+ {
64
+ "type": [
65
+ "null",
66
+ {
67
+ "type": "map",
68
+ "values": "string"
69
+ }
70
+ ],
71
+ "name": "properties",
72
+ "default": null,
73
+ "doc": "Additional properties"
74
+ },
75
+ {
76
+ "type": [
77
+ "null",
78
+ "string"
79
+ ],
80
+ "name": "version",
81
+ "default": null,
82
+ "doc": "Aspect version\n Initial implementation will use the aspect version's number, however stored as\n a string in the case where a different aspect versioning scheme is later adopted."
83
+ }
84
+ ],
85
+ "doc": "Metadata associated with each metadata change that is processed by the system"
86
+ }
@@ -15,10 +15,10 @@ import pathlib
15
15
  def _load_schema(schema_name: str) -> str:
16
16
  return (pathlib.Path(__file__).parent / f"{schema_name}.avsc").read_text()
17
17
 
18
- def getMetadataChangeProposalSchema() -> str:
19
- return _load_schema("MetadataChangeProposal")
20
-
21
18
  def getMetadataChangeEventSchema() -> str:
22
19
  return _load_schema("MetadataChangeEvent")
23
20
 
21
+ def getMetadataChangeProposalSchema() -> str:
22
+ return _load_schema("MetadataChangeProposal")
23
+
24
24
  # fmt: on
@@ -3,11 +3,15 @@ from typing import Dict, List, Type
3
3
  from datahub.sdk.container import Container
4
4
  from datahub.sdk.dataset import Dataset
5
5
  from datahub.sdk.entity import Entity
6
+ from datahub.sdk.mlmodel import MLModel
7
+ from datahub.sdk.mlmodelgroup import MLModelGroup
6
8
 
7
9
  # TODO: Is there a better way to declare this?
8
10
  ENTITY_CLASSES_LIST: List[Type[Entity]] = [
9
11
  Container,
10
12
  Dataset,
13
+ MLModel,
14
+ MLModelGroup,
11
15
  ]
12
16
 
13
17
  ENTITY_CLASSES: Dict[str, Type[Entity]] = {
datahub/sdk/_shared.py CHANGED
@@ -5,6 +5,7 @@ from datetime import datetime
5
5
  from typing import (
6
6
  TYPE_CHECKING,
7
7
  Callable,
8
+ Dict,
8
9
  List,
9
10
  Optional,
10
11
  Sequence,
@@ -14,6 +15,7 @@ from typing import (
14
15
 
15
16
  from typing_extensions import TypeAlias, assert_never
16
17
 
18
+ import datahub.emitter.mce_builder as builder
17
19
  import datahub.metadata.schema_classes as models
18
20
  from datahub.emitter.mce_builder import (
19
21
  make_ts_millis,
@@ -30,14 +32,16 @@ from datahub.metadata.urns import (
30
32
  DataJobUrn,
31
33
  DataPlatformInstanceUrn,
32
34
  DataPlatformUrn,
35
+ DataProcessInstanceUrn,
33
36
  DatasetUrn,
34
37
  DomainUrn,
35
38
  GlossaryTermUrn,
36
39
  OwnershipTypeUrn,
37
40
  TagUrn,
38
41
  Urn,
42
+ VersionSetUrn,
39
43
  )
40
- from datahub.sdk._utils import add_list_unique, remove_list_unique
44
+ from datahub.sdk._utils import DEFAULT_ACTOR_URN, add_list_unique, remove_list_unique
41
45
  from datahub.sdk.entity import Entity
42
46
  from datahub.utilities.urns.error import InvalidUrnError
43
47
 
@@ -50,7 +54,35 @@ DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
50
54
 
51
55
  ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
52
56
 
53
- _DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
57
+ TrainingMetricsInputType: TypeAlias = Union[
58
+ List[models.MLMetricClass], Dict[str, Optional[str]]
59
+ ]
60
+ HyperParamsInputType: TypeAlias = Union[
61
+ List[models.MLHyperParamClass], Dict[str, Optional[str]]
62
+ ]
63
+ MLTrainingJobInputType: TypeAlias = Union[Sequence[Union[str, DataProcessInstanceUrn]]]
64
+
65
+
66
+ def convert_training_metrics(
67
+ metrics: TrainingMetricsInputType,
68
+ ) -> List[models.MLMetricClass]:
69
+ if isinstance(metrics, dict):
70
+ return [
71
+ models.MLMetricClass(name=name, value=str(value))
72
+ for name, value in metrics.items()
73
+ ]
74
+ return metrics
75
+
76
+
77
+ def convert_hyper_params(
78
+ params: HyperParamsInputType,
79
+ ) -> List[models.MLHyperParamClass]:
80
+ if isinstance(params, dict):
81
+ return [
82
+ models.MLHyperParamClass(name=name, value=str(value))
83
+ for name, value in params.items()
84
+ ]
85
+ return params
54
86
 
55
87
 
56
88
  def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
@@ -441,7 +473,7 @@ class HasTerms(Entity):
441
473
  def _terms_audit_stamp(self) -> models.AuditStampClass:
442
474
  return models.AuditStampClass(
443
475
  time=0,
444
- actor=_DEFAULT_ACTOR_URN,
476
+ actor=DEFAULT_ACTOR_URN,
445
477
  )
446
478
 
447
479
  def set_terms(self, terms: TermsInputType) -> None:
@@ -529,7 +561,7 @@ class HasInstitutionalMemory(Entity):
529
561
  def _institutional_memory_audit_stamp(self) -> models.AuditStampClass:
530
562
  return models.AuditStampClass(
531
563
  time=0,
532
- actor=_DEFAULT_ACTOR_URN,
564
+ actor=DEFAULT_ACTOR_URN,
533
565
  )
534
566
 
535
567
  @classmethod
@@ -578,3 +610,109 @@ class HasInstitutionalMemory(Entity):
578
610
  self._link_key,
579
611
  self._parse_link_association_class(link),
580
612
  )
613
+
614
+
615
+ class HasVersion(Entity):
616
+ """Mixin for entities that have version properties."""
617
+
618
+ def _get_version_props(self) -> Optional[models.VersionPropertiesClass]:
619
+ return self._get_aspect(models.VersionPropertiesClass)
620
+
621
+ def _ensure_version_props(self) -> models.VersionPropertiesClass:
622
+ version_props = self._get_version_props()
623
+ if version_props is None:
624
+ guid_dict = {"urn": str(self.urn)}
625
+ version_set_urn = VersionSetUrn(
626
+ id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
627
+ )
628
+
629
+ version_props = models.VersionPropertiesClass(
630
+ versionSet=str(version_set_urn),
631
+ version=models.VersionTagClass(versionTag="0.1.0"),
632
+ sortId="0000000.1.0",
633
+ )
634
+ self._set_aspect(version_props)
635
+ return version_props
636
+
637
+ @property
638
+ def version(self) -> Optional[str]:
639
+ version_props = self._get_version_props()
640
+ if version_props and version_props.version:
641
+ return version_props.version.versionTag
642
+ return None
643
+
644
+ def set_version(self, version: str) -> None:
645
+ """Set the version of the entity."""
646
+ guid_dict = {"urn": str(self.urn)}
647
+ version_set_urn = VersionSetUrn(
648
+ id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
649
+ )
650
+
651
+ version_props = self._get_version_props()
652
+ if version_props is None:
653
+ # If no version properties exist, create a new one
654
+ version_props = models.VersionPropertiesClass(
655
+ version=models.VersionTagClass(versionTag=version),
656
+ versionSet=str(version_set_urn),
657
+ sortId=version.zfill(10), # Pad with zeros for sorting
658
+ )
659
+ else:
660
+ # Update existing version properties
661
+ version_props.version = models.VersionTagClass(versionTag=version)
662
+ version_props.versionSet = str(version_set_urn)
663
+ version_props.sortId = version.zfill(10)
664
+
665
+ self._set_aspect(version_props)
666
+
667
+ @property
668
+ def version_aliases(self) -> List[str]:
669
+ version_props = self._get_version_props()
670
+ if version_props and version_props.aliases:
671
+ return [
672
+ alias.versionTag
673
+ for alias in version_props.aliases
674
+ if alias.versionTag is not None
675
+ ]
676
+ return [] # Return empty list instead of None
677
+
678
+ def set_version_aliases(self, aliases: List[str]) -> None:
679
+ version_props = self._get_aspect(models.VersionPropertiesClass)
680
+ if version_props:
681
+ version_props.aliases = [
682
+ models.VersionTagClass(versionTag=alias) for alias in aliases
683
+ ]
684
+ else:
685
+ # If no version properties exist, we need to create one with a default version
686
+ guid_dict = {"urn": str(self.urn)}
687
+ version_set_urn = VersionSetUrn(
688
+ id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
689
+ )
690
+ self._set_aspect(
691
+ models.VersionPropertiesClass(
692
+ version=models.VersionTagClass(
693
+ versionTag="0.1.0"
694
+ ), # Default version
695
+ versionSet=str(version_set_urn),
696
+ sortId="0000000.1.0",
697
+ aliases=[
698
+ models.VersionTagClass(versionTag=alias) for alias in aliases
699
+ ],
700
+ )
701
+ )
702
+
703
+ def add_version_alias(self, alias: str) -> None:
704
+ if not alias:
705
+ raise ValueError("Alias cannot be empty")
706
+ version_props = self._ensure_version_props()
707
+ if version_props.aliases is None:
708
+ version_props.aliases = []
709
+ version_props.aliases.append(models.VersionTagClass(versionTag=alias))
710
+ self._set_aspect(version_props)
711
+
712
+ def remove_version_alias(self, alias: str) -> None:
713
+ version_props = self._get_version_props()
714
+ if version_props and version_props.aliases:
715
+ version_props.aliases = [
716
+ a for a in version_props.aliases if a.versionTag != alias
717
+ ]
718
+ self._set_aspect(version_props)
datahub/sdk/_utils.py CHANGED
@@ -1,6 +1,10 @@
1
1
  from typing import Any, Callable, List, Protocol, TypeVar
2
2
 
3
3
  from datahub.errors import ItemNotFoundError
4
+ from datahub.metadata.urns import CorpUserUrn
5
+
6
+ # TODO: Change __ingestion to _ingestion.
7
+ DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
4
8
 
5
9
 
6
10
  class _SupportsEq(Protocol):
datahub/sdk/dataset.py CHANGED
@@ -87,7 +87,7 @@ def _parse_upstream_input(
87
87
  assert_never(upstream_input)
88
88
 
89
89
 
90
- def _parse_cll_mapping(
90
+ def parse_cll_mapping(
91
91
  *,
92
92
  upstream: DatasetUrnOrStr,
93
93
  downstream: DatasetUrnOrStr,
@@ -142,7 +142,7 @@ def _parse_upstream_lineage_input(
142
142
  )
143
143
  )
144
144
  cll.extend(
145
- _parse_cll_mapping(
145
+ parse_cll_mapping(
146
146
  upstream=dataset_urn,
147
147
  downstream=downstream_urn,
148
148
  cll_mapping=column_lineage,
@@ -11,6 +11,8 @@ from datahub.ingestion.graph.client import DataHubGraph
11
11
  from datahub.metadata.urns import (
12
12
  ContainerUrn,
13
13
  DatasetUrn,
14
+ MlModelGroupUrn,
15
+ MlModelUrn,
14
16
  Urn,
15
17
  )
16
18
  from datahub.sdk._all_entities import ENTITY_CLASSES
@@ -18,6 +20,8 @@ from datahub.sdk._shared import UrnOrStr
18
20
  from datahub.sdk.container import Container
19
21
  from datahub.sdk.dataset import Dataset
20
22
  from datahub.sdk.entity import Entity
23
+ from datahub.sdk.mlmodel import MLModel
24
+ from datahub.sdk.mlmodelgroup import MLModelGroup
21
25
 
22
26
  if TYPE_CHECKING:
23
27
  from datahub.sdk.main_client import DataHubClient
@@ -49,6 +53,10 @@ class EntityClient:
49
53
  @overload
50
54
  def get(self, urn: DatasetUrn) -> Dataset: ...
51
55
  @overload
56
+ def get(self, urn: MlModelUrn) -> MLModel: ...
57
+ @overload
58
+ def get(self, urn: MlModelGroupUrn) -> MLModelGroup: ...
59
+ @overload
52
60
  def get(self, urn: Union[Urn, str]) -> Entity: ...
53
61
  def get(self, urn: UrnOrStr) -> Entity:
54
62
  """Retrieve an entity by its urn.