acryl-datahub 1.0.0.2rc4__py3-none-any.whl → 1.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (159) hide show
  1. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/METADATA +2566 -2514
  2. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/RECORD +159 -149
  3. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  6. datahub/api/entities/datacontract/datacontract.py +35 -3
  7. datahub/api/entities/datajob/dataflow.py +3 -3
  8. datahub/api/entities/datajob/datajob.py +7 -4
  9. datahub/api/entities/dataset/dataset.py +9 -11
  10. datahub/api/entities/forms/forms.py +34 -34
  11. datahub/api/graphql/assertion.py +1 -1
  12. datahub/api/graphql/operation.py +4 -4
  13. datahub/cli/check_cli.py +3 -2
  14. datahub/cli/config_utils.py +2 -2
  15. datahub/cli/delete_cli.py +6 -5
  16. datahub/cli/docker_cli.py +2 -2
  17. datahub/cli/exists_cli.py +2 -1
  18. datahub/cli/get_cli.py +2 -1
  19. datahub/cli/iceberg_cli.py +6 -5
  20. datahub/cli/ingest_cli.py +9 -6
  21. datahub/cli/migrate.py +4 -3
  22. datahub/cli/migration_utils.py +4 -3
  23. datahub/cli/put_cli.py +3 -2
  24. datahub/cli/specific/assertions_cli.py +2 -1
  25. datahub/cli/specific/datacontract_cli.py +3 -2
  26. datahub/cli/specific/dataproduct_cli.py +10 -9
  27. datahub/cli/specific/dataset_cli.py +4 -3
  28. datahub/cli/specific/forms_cli.py +2 -1
  29. datahub/cli/specific/group_cli.py +2 -1
  30. datahub/cli/specific/structuredproperties_cli.py +4 -3
  31. datahub/cli/specific/user_cli.py +2 -1
  32. datahub/cli/state_cli.py +2 -1
  33. datahub/cli/timeline_cli.py +2 -1
  34. datahub/configuration/common.py +5 -0
  35. datahub/configuration/source_common.py +1 -1
  36. datahub/emitter/mcp.py +20 -5
  37. datahub/emitter/request_helper.py +116 -3
  38. datahub/emitter/rest_emitter.py +163 -93
  39. datahub/entrypoints.py +2 -1
  40. datahub/errors.py +4 -0
  41. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
  42. datahub/ingestion/api/source.py +2 -5
  43. datahub/ingestion/api/source_helpers.py +1 -0
  44. datahub/ingestion/glossary/classification_mixin.py +4 -2
  45. datahub/ingestion/graph/client.py +33 -8
  46. datahub/ingestion/graph/config.py +14 -0
  47. datahub/ingestion/graph/filters.py +1 -1
  48. datahub/ingestion/graph/links.py +53 -0
  49. datahub/ingestion/run/pipeline.py +9 -6
  50. datahub/ingestion/run/pipeline_config.py +1 -1
  51. datahub/ingestion/sink/datahub_rest.py +5 -6
  52. datahub/ingestion/source/apply/datahub_apply.py +2 -1
  53. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  54. datahub/ingestion/source/bigquery_v2/bigquery.py +24 -23
  55. datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
  56. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
  57. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
  58. datahub/ingestion/source/cassandra/cassandra_profiling.py +25 -24
  59. datahub/ingestion/source/common/subtypes.py +3 -0
  60. datahub/ingestion/source/datahub/datahub_database_reader.py +12 -11
  61. datahub/ingestion/source/dbt/dbt_cloud.py +2 -6
  62. datahub/ingestion/source/dbt/dbt_common.py +10 -2
  63. datahub/ingestion/source/dbt/dbt_core.py +82 -42
  64. datahub/ingestion/source/dynamodb/dynamodb.py +7 -4
  65. datahub/ingestion/source/feast.py +4 -4
  66. datahub/ingestion/source/fivetran/config.py +1 -1
  67. datahub/ingestion/source/fivetran/fivetran_log_api.py +7 -3
  68. datahub/ingestion/source/fivetran/fivetran_query.py +16 -16
  69. datahub/ingestion/source/ge_data_profiler.py +27 -1
  70. datahub/ingestion/source/hex/api.py +1 -20
  71. datahub/ingestion/source/hex/query_fetcher.py +4 -1
  72. datahub/ingestion/source/iceberg/iceberg.py +20 -4
  73. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  74. datahub/ingestion/source/ldap.py +1 -1
  75. datahub/ingestion/source/looker/looker_common.py +17 -2
  76. datahub/ingestion/source/looker/looker_lib_wrapper.py +1 -1
  77. datahub/ingestion/source/looker/looker_source.py +34 -5
  78. datahub/ingestion/source/looker/lookml_source.py +7 -1
  79. datahub/ingestion/source/metadata/lineage.py +2 -1
  80. datahub/ingestion/source/mlflow.py +19 -6
  81. datahub/ingestion/source/mode.py +74 -28
  82. datahub/ingestion/source/neo4j/neo4j_source.py +85 -55
  83. datahub/ingestion/source/powerbi/config.py +13 -1
  84. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  85. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  86. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
  87. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  88. datahub/ingestion/source/redshift/usage.py +10 -9
  89. datahub/ingestion/source/sigma/config.py +74 -6
  90. datahub/ingestion/source/sigma/sigma.py +16 -1
  91. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  92. datahub/ingestion/source/slack/slack.py +4 -52
  93. datahub/ingestion/source/snowflake/snowflake_config.py +2 -12
  94. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -18
  95. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  96. datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
  97. datahub/ingestion/source/snowflake/snowflake_query.py +9 -63
  98. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  99. datahub/ingestion/source/sql/athena.py +2 -1
  100. datahub/ingestion/source/sql/clickhouse.py +5 -1
  101. datahub/ingestion/source/sql/druid.py +7 -2
  102. datahub/ingestion/source/sql/hive.py +7 -2
  103. datahub/ingestion/source/sql/hive_metastore.py +5 -5
  104. datahub/ingestion/source/sql/mssql/source.py +1 -1
  105. datahub/ingestion/source/sql/oracle.py +6 -2
  106. datahub/ingestion/source/sql/sql_config.py +1 -34
  107. datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
  108. datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
  109. datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
  110. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  111. datahub/ingestion/source/tableau/tableau.py +31 -6
  112. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  113. datahub/ingestion/source/unity/config.py +2 -1
  114. datahub/ingestion/source/usage/clickhouse_usage.py +7 -3
  115. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -3
  116. datahub/ingestion/source/vertexai/vertexai.py +316 -4
  117. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +23 -2
  118. datahub/integrations/assertion/common.py +3 -2
  119. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +538 -493
  120. datahub/metadata/_urns/urn_defs.py +1819 -1763
  121. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  122. datahub/metadata/schema.avsc +17296 -16883
  123. datahub/metadata/schema_classes.py +3 -3
  124. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  125. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  126. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  127. datahub/metadata/schemas/FormInfo.avsc +5 -0
  128. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  129. datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
  130. datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
  131. datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
  132. datahub/metadata/schemas/QueryProperties.avsc +4 -2
  133. datahub/metadata/schemas/SystemMetadata.avsc +86 -0
  134. datahub/metadata/schemas/__init__.py +3 -3
  135. datahub/sdk/_all_entities.py +4 -0
  136. datahub/sdk/_shared.py +142 -4
  137. datahub/sdk/_utils.py +4 -0
  138. datahub/sdk/dataset.py +2 -2
  139. datahub/sdk/entity_client.py +8 -0
  140. datahub/sdk/lineage_client.py +235 -0
  141. datahub/sdk/main_client.py +6 -3
  142. datahub/sdk/mlmodel.py +301 -0
  143. datahub/sdk/mlmodelgroup.py +233 -0
  144. datahub/secret/datahub_secret_store.py +2 -1
  145. datahub/specific/dataset.py +12 -0
  146. datahub/sql_parsing/fingerprint_utils.py +6 -0
  147. datahub/sql_parsing/sql_parsing_aggregator.py +48 -34
  148. datahub/sql_parsing/sqlglot_utils.py +18 -14
  149. datahub/telemetry/telemetry.py +2 -2
  150. datahub/testing/check_imports.py +1 -1
  151. datahub/testing/mcp_diff.py +15 -2
  152. datahub/upgrade/upgrade.py +10 -12
  153. datahub/utilities/logging_manager.py +8 -1
  154. datahub/utilities/server_config_util.py +350 -10
  155. datahub/utilities/sqlalchemy_query_combiner.py +4 -5
  156. datahub/utilities/urn_encoder.py +1 -1
  157. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/entry_points.txt +0 -0
  158. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/licenses/LICENSE +0 -0
  159. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,235 @@
1
+ from __future__ import annotations
2
+
3
+ import difflib
4
+ import logging
5
+ from typing import TYPE_CHECKING, List, Literal, Optional, Set, Union
6
+
7
+ import datahub.metadata.schema_classes as models
8
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
9
+ from datahub.errors import SdkUsageError
10
+ from datahub.metadata.schema_classes import SchemaMetadataClass
11
+ from datahub.metadata.urns import DatasetUrn, QueryUrn
12
+ from datahub.sdk._shared import DatasetUrnOrStr
13
+ from datahub.sdk._utils import DEFAULT_ACTOR_URN
14
+ from datahub.sdk.dataset import ColumnLineageMapping, parse_cll_mapping
15
+ from datahub.specific.dataset import DatasetPatchBuilder
16
+ from datahub.sql_parsing.fingerprint_utils import generate_hash
17
+ from datahub.utilities.ordered_set import OrderedSet
18
+
19
+ if TYPE_CHECKING:
20
+ from datahub.sdk.main_client import DataHubClient
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ _empty_audit_stamp = models.AuditStampClass(
25
+ time=0,
26
+ actor=DEFAULT_ACTOR_URN,
27
+ )
28
+
29
+
30
+ class LineageClient:
31
+ def __init__(self, client: DataHubClient):
32
+ self._client = client
33
+
34
+ def _get_fields_from_dataset_urn(self, dataset_urn: DatasetUrn) -> Set[str]:
35
+ schema_metadata = self._client._graph.get_aspect(
36
+ str(dataset_urn), SchemaMetadataClass
37
+ )
38
+ if schema_metadata is None:
39
+ return Set()
40
+
41
+ return {field.fieldPath for field in schema_metadata.fields}
42
+
43
+ @classmethod
44
+ def _get_strict_column_lineage(
45
+ cls,
46
+ upstream_fields: Set[str],
47
+ downstream_fields: Set[str],
48
+ ) -> ColumnLineageMapping:
49
+ """Find matches between upstream and downstream fields with case-insensitive matching."""
50
+ strict_column_lineage: ColumnLineageMapping = {}
51
+
52
+ # Create case-insensitive mapping of upstream fields
53
+ case_insensitive_map = {field.lower(): field for field in upstream_fields}
54
+
55
+ # Match downstream fields using case-insensitive comparison
56
+ for downstream_field in downstream_fields:
57
+ lower_field = downstream_field.lower()
58
+ if lower_field in case_insensitive_map:
59
+ # Use the original case of the upstream field
60
+ strict_column_lineage[downstream_field] = [
61
+ case_insensitive_map[lower_field]
62
+ ]
63
+
64
+ return strict_column_lineage
65
+
66
+ @classmethod
67
+ def _get_fuzzy_column_lineage(
68
+ cls,
69
+ upstream_fields: Set[str],
70
+ downstream_fields: Set[str],
71
+ ) -> ColumnLineageMapping:
72
+ """Generate fuzzy matches between upstream and downstream fields."""
73
+
74
+ # Simple normalization function for better matching
75
+ def normalize(s: str) -> str:
76
+ return s.lower().replace("_", "")
77
+
78
+ # Create normalized lookup for upstream fields
79
+ normalized_upstream = {normalize(field): field for field in upstream_fields}
80
+
81
+ fuzzy_column_lineage = {}
82
+ for downstream_field in downstream_fields:
83
+ # Try exact match first
84
+ if downstream_field in upstream_fields:
85
+ fuzzy_column_lineage[downstream_field] = [downstream_field]
86
+ continue
87
+
88
+ # Try normalized match
89
+ norm_downstream = normalize(downstream_field)
90
+ if norm_downstream in normalized_upstream:
91
+ fuzzy_column_lineage[downstream_field] = [
92
+ normalized_upstream[norm_downstream]
93
+ ]
94
+ continue
95
+
96
+ # If no direct match, find closest match using similarity
97
+ matches = difflib.get_close_matches(
98
+ norm_downstream,
99
+ normalized_upstream.keys(),
100
+ n=1, # Return only the best match
101
+ cutoff=0.8, # Adjust cutoff for sensitivity
102
+ )
103
+
104
+ if matches:
105
+ fuzzy_column_lineage[downstream_field] = [
106
+ normalized_upstream[matches[0]]
107
+ ]
108
+
109
+ return fuzzy_column_lineage
110
+
111
+ def add_dataset_copy_lineage(
112
+ self,
113
+ *,
114
+ upstream: DatasetUrnOrStr,
115
+ downstream: DatasetUrnOrStr,
116
+ column_lineage: Union[
117
+ None, ColumnLineageMapping, Literal["auto_fuzzy", "auto_strict"]
118
+ ] = "auto_fuzzy",
119
+ ) -> None:
120
+ upstream = DatasetUrn.from_string(upstream)
121
+ downstream = DatasetUrn.from_string(downstream)
122
+
123
+ if column_lineage is None:
124
+ cll = None
125
+ elif column_lineage in ["auto_fuzzy", "auto_strict"]:
126
+ upstream_schema = self._get_fields_from_dataset_urn(upstream)
127
+ downstream_schema = self._get_fields_from_dataset_urn(downstream)
128
+ if column_lineage == "auto_fuzzy":
129
+ mapping = self._get_fuzzy_column_lineage(
130
+ upstream_schema, downstream_schema
131
+ )
132
+ else:
133
+ mapping = self._get_strict_column_lineage(
134
+ upstream_schema, downstream_schema
135
+ )
136
+ cll = parse_cll_mapping(
137
+ upstream=upstream,
138
+ downstream=downstream,
139
+ cll_mapping=mapping,
140
+ )
141
+ elif isinstance(column_lineage, dict):
142
+ cll = parse_cll_mapping(
143
+ upstream=upstream,
144
+ downstream=downstream,
145
+ cll_mapping=column_lineage,
146
+ )
147
+
148
+ updater = DatasetPatchBuilder(str(downstream))
149
+ updater.add_upstream_lineage(
150
+ models.UpstreamClass(
151
+ dataset=str(upstream),
152
+ type=models.DatasetLineageTypeClass.COPY,
153
+ )
154
+ )
155
+ for cl in cll or []:
156
+ updater.add_fine_grained_upstream_lineage(cl)
157
+
158
+ self._client.entities.update(updater)
159
+
160
+ def add_dataset_transform_lineage(
161
+ self,
162
+ *,
163
+ upstream: DatasetUrnOrStr,
164
+ downstream: DatasetUrnOrStr,
165
+ column_lineage: Optional[ColumnLineageMapping] = None,
166
+ query_text: Optional[str] = None,
167
+ ) -> None:
168
+ upstream = DatasetUrn.from_string(upstream)
169
+ downstream = DatasetUrn.from_string(downstream)
170
+
171
+ cll = None
172
+ if column_lineage is not None:
173
+ cll = parse_cll_mapping(
174
+ upstream=upstream,
175
+ downstream=downstream,
176
+ cll_mapping=column_lineage,
177
+ )
178
+
179
+ fields_involved = OrderedSet([str(upstream), str(downstream)])
180
+ if cll is not None:
181
+ for c in cll:
182
+ for field in c.upstreams or []:
183
+ fields_involved.add(field)
184
+ for field in c.downstreams or []:
185
+ fields_involved.add(field)
186
+
187
+ query_urn = None
188
+ query_entity = None
189
+ if query_text:
190
+ # Eventually we might want to use our regex-based fingerprinting instead.
191
+ fingerprint = generate_hash(query_text)
192
+ query_urn = QueryUrn(fingerprint).urn()
193
+
194
+ from datahub.sql_parsing.sql_parsing_aggregator import make_query_subjects
195
+
196
+ query_entity = MetadataChangeProposalWrapper.construct_many(
197
+ query_urn,
198
+ aspects=[
199
+ models.QueryPropertiesClass(
200
+ statement=models.QueryStatementClass(
201
+ value=query_text, language=models.QueryLanguageClass.SQL
202
+ ),
203
+ source=models.QuerySourceClass.SYSTEM,
204
+ created=_empty_audit_stamp,
205
+ lastModified=_empty_audit_stamp,
206
+ ),
207
+ make_query_subjects(list(fields_involved)),
208
+ ],
209
+ )
210
+
211
+ updater = DatasetPatchBuilder(str(downstream))
212
+ updater.add_upstream_lineage(
213
+ models.UpstreamClass(
214
+ dataset=str(upstream),
215
+ type=models.DatasetLineageTypeClass.TRANSFORMED,
216
+ query=query_urn,
217
+ )
218
+ )
219
+ for cl in cll or []:
220
+ cl.query = query_urn
221
+ updater.add_fine_grained_upstream_lineage(cl)
222
+
223
+ # Throw if the dataset does not exist.
224
+ # We need to manually call .build() instead of reusing client.update()
225
+ # so that we make just one emit_mcps call.
226
+ if not self._client._graph.exists(updater.urn):
227
+ raise SdkUsageError(
228
+ f"Dataset {updater.urn} does not exist, and hence cannot be updated."
229
+ )
230
+ mcps: List[
231
+ Union[MetadataChangeProposalWrapper, models.MetadataChangeProposalClass]
232
+ ] = list(updater.build())
233
+ if query_entity:
234
+ mcps.extend(query_entity)
235
+ self._client._graph.emit_mcps(mcps)
@@ -4,8 +4,9 @@ from typing import Optional, overload
4
4
 
5
5
  from datahub.errors import SdkUsageError
6
6
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
7
- from datahub.ingestion.graph.config import DatahubClientConfig
7
+ from datahub.ingestion.graph.config import ClientMode, DatahubClientConfig
8
8
  from datahub.sdk.entity_client import EntityClient
9
+ from datahub.sdk.lineage_client import LineageClient
9
10
  from datahub.sdk.resolver_client import ResolverClient
10
11
  from datahub.sdk.search_client import SearchClient
11
12
 
@@ -83,7 +84,7 @@ class DataHubClient:
83
84
  # Inspired by the DockerClient.from_env() method.
84
85
  # TODO: This one also reads from ~/.datahubenv, so the "from_env" name might be a bit confusing.
85
86
  # That file is part of the "environment", but is not a traditional "env variable".
86
- graph = get_default_graph()
87
+ graph = get_default_graph(ClientMode.SDK)
87
88
 
88
89
  return cls(graph=graph)
89
90
 
@@ -99,4 +100,6 @@ class DataHubClient:
99
100
  def search(self) -> SearchClient:
100
101
  return SearchClient(self)
101
102
 
102
- # TODO: lineage client
103
+ @property
104
+ def lineage(self) -> LineageClient:
105
+ return LineageClient(self)
datahub/sdk/mlmodel.py ADDED
@@ -0,0 +1,301 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Dict, List, Optional, Sequence, Type, Union
5
+
6
+ from typing_extensions import Self
7
+
8
+ from datahub.emitter.mce_builder import DEFAULT_ENV
9
+ from datahub.metadata.schema_classes import (
10
+ AspectBag,
11
+ MLHyperParamClass,
12
+ MLMetricClass,
13
+ MLModelPropertiesClass,
14
+ )
15
+ from datahub.metadata.urns import (
16
+ DataProcessInstanceUrn,
17
+ MlModelGroupUrn,
18
+ MlModelUrn,
19
+ Urn,
20
+ )
21
+ from datahub.sdk._shared import (
22
+ DomainInputType,
23
+ HasDomain,
24
+ HasInstitutionalMemory,
25
+ HasOwnership,
26
+ HasPlatformInstance,
27
+ HasTags,
28
+ HasTerms,
29
+ HasVersion,
30
+ HyperParamsInputType,
31
+ LinksInputType,
32
+ MLTrainingJobInputType,
33
+ OwnersInputType,
34
+ TagsInputType,
35
+ TermsInputType,
36
+ TrainingMetricsInputType,
37
+ convert_hyper_params,
38
+ convert_training_metrics,
39
+ make_time_stamp,
40
+ parse_time_stamp,
41
+ )
42
+ from datahub.sdk.entity import Entity, ExtraAspectsType
43
+
44
+
45
+ class MLModel(
46
+ HasPlatformInstance,
47
+ HasOwnership,
48
+ HasInstitutionalMemory,
49
+ HasTags,
50
+ HasTerms,
51
+ HasDomain,
52
+ HasVersion,
53
+ Entity,
54
+ ):
55
+ __slots__ = ()
56
+
57
+ @classmethod
58
+ def get_urn_type(cls) -> Type[MlModelUrn]:
59
+ return MlModelUrn
60
+
61
+ def __init__(
62
+ self,
63
+ id: str,
64
+ platform: str,
65
+ version: Optional[str] = None,
66
+ aliases: Optional[List[str]] = None,
67
+ platform_instance: Optional[str] = None,
68
+ env: str = DEFAULT_ENV,
69
+ name: Optional[str] = None,
70
+ description: Optional[str] = None,
71
+ training_metrics: Optional[TrainingMetricsInputType] = None,
72
+ hyper_params: Optional[HyperParamsInputType] = None,
73
+ external_url: Optional[str] = None,
74
+ custom_properties: Optional[Dict[str, str]] = None,
75
+ created: Optional[datetime] = None,
76
+ last_modified: Optional[datetime] = None,
77
+ owners: Optional[OwnersInputType] = None,
78
+ links: Optional[LinksInputType] = None,
79
+ tags: Optional[TagsInputType] = None,
80
+ terms: Optional[TermsInputType] = None,
81
+ domain: Optional[DomainInputType] = None,
82
+ model_group: Optional[Union[str, MlModelGroupUrn]] = None,
83
+ training_jobs: Optional[MLTrainingJobInputType] = None,
84
+ downstream_jobs: Optional[MLTrainingJobInputType] = None,
85
+ extra_aspects: ExtraAspectsType = None,
86
+ ):
87
+ urn = MlModelUrn(platform=platform, name=id, env=env)
88
+ super().__init__(urn)
89
+ self._set_extra_aspects(extra_aspects)
90
+
91
+ self._set_platform_instance(urn.platform, platform_instance)
92
+
93
+ self._ensure_model_props()
94
+
95
+ if version is not None:
96
+ self.set_version(version)
97
+ if name is not None:
98
+ self.set_name(name)
99
+ if aliases is not None:
100
+ self.set_version_aliases(aliases)
101
+ if description is not None:
102
+ self.set_description(description)
103
+ if training_metrics is not None:
104
+ self.set_training_metrics(training_metrics)
105
+ if hyper_params is not None:
106
+ self.set_hyper_params(hyper_params)
107
+ if external_url is not None:
108
+ self.set_external_url(external_url)
109
+ if custom_properties is not None:
110
+ self.set_custom_properties(custom_properties)
111
+ if created is not None:
112
+ self.set_created(created)
113
+ if last_modified is not None:
114
+ self.set_last_modified(last_modified)
115
+
116
+ if owners is not None:
117
+ self.set_owners(owners)
118
+ if links is not None:
119
+ self.set_links(links)
120
+ if tags is not None:
121
+ self.set_tags(tags)
122
+ if terms is not None:
123
+ self.set_terms(terms)
124
+ if domain is not None:
125
+ self.set_domain(domain)
126
+ if model_group is not None:
127
+ self.set_model_group(model_group)
128
+ if training_jobs is not None:
129
+ self.set_training_jobs(training_jobs)
130
+ if downstream_jobs is not None:
131
+ self.set_downstream_jobs(downstream_jobs)
132
+
133
+ @classmethod
134
+ def _new_from_graph(cls, urn: Urn, current_aspects: AspectBag) -> Self:
135
+ assert isinstance(urn, MlModelUrn)
136
+ entity = cls(
137
+ id=urn.name,
138
+ platform=urn.platform,
139
+ env=urn.env,
140
+ )
141
+ return entity._init_from_graph(current_aspects)
142
+
143
+ @property
144
+ def urn(self) -> MlModelUrn:
145
+ return self._urn # type: ignore
146
+
147
+ def _ensure_model_props(
148
+ self,
149
+ ) -> MLModelPropertiesClass:
150
+ return self._setdefault_aspect(MLModelPropertiesClass())
151
+
152
+ @property
153
+ def name(self) -> Optional[str]:
154
+ return self._ensure_model_props().name
155
+
156
+ def set_name(self, name: str) -> None:
157
+ self._ensure_model_props().name = name
158
+
159
+ @property
160
+ def description(self) -> Optional[str]:
161
+ return self._ensure_model_props().description
162
+
163
+ def set_description(self, description: str) -> None:
164
+ self._ensure_model_props().description = description
165
+
166
+ @property
167
+ def external_url(self) -> Optional[str]:
168
+ return self._ensure_model_props().externalUrl
169
+
170
+ def set_external_url(self, external_url: str) -> None:
171
+ self._ensure_model_props().externalUrl = external_url
172
+
173
+ @property
174
+ def custom_properties(self) -> Optional[Dict[str, str]]:
175
+ return self._ensure_model_props().customProperties
176
+
177
+ def set_custom_properties(self, custom_properties: Dict[str, str]) -> None:
178
+ self._ensure_model_props().customProperties = custom_properties
179
+
180
+ @property
181
+ def created(self) -> Optional[datetime]:
182
+ return parse_time_stamp(self._ensure_model_props().created)
183
+
184
+ def set_created(self, created: datetime) -> None:
185
+ self._ensure_model_props().created = make_time_stamp(created)
186
+
187
+ @property
188
+ def last_modified(self) -> Optional[datetime]:
189
+ return parse_time_stamp(self._ensure_model_props().lastModified)
190
+
191
+ def set_last_modified(self, last_modified: datetime) -> None:
192
+ self._ensure_model_props().lastModified = make_time_stamp(last_modified)
193
+
194
+ @property
195
+ def training_metrics(self) -> Optional[List[MLMetricClass]]:
196
+ return self._ensure_model_props().trainingMetrics
197
+
198
+ def set_training_metrics(self, metrics: TrainingMetricsInputType) -> None:
199
+ self._ensure_model_props().trainingMetrics = convert_training_metrics(metrics)
200
+
201
+ def add_training_metrics(self, metrics: TrainingMetricsInputType) -> None:
202
+ props = self._ensure_model_props()
203
+ if props.trainingMetrics is None:
204
+ props.trainingMetrics = []
205
+ if isinstance(metrics, list):
206
+ props.trainingMetrics.extend(
207
+ [
208
+ MLMetricClass(name=metric.name, value=metric.value)
209
+ for metric in metrics
210
+ ]
211
+ )
212
+ else:
213
+ # For dictionary case, use the key as name and value as value
214
+ for name, value in metrics.items():
215
+ props.trainingMetrics.append(MLMetricClass(name=name, value=value))
216
+
217
+ @property
218
+ def hyper_params(self) -> Optional[List[MLHyperParamClass]]:
219
+ return self._ensure_model_props().hyperParams
220
+
221
+ def set_hyper_params(self, params: HyperParamsInputType) -> None:
222
+ self._ensure_model_props().hyperParams = convert_hyper_params(params)
223
+
224
+ def add_hyper_params(self, params: HyperParamsInputType) -> None:
225
+ props = self._ensure_model_props()
226
+ if props.hyperParams is None:
227
+ props.hyperParams = []
228
+ if isinstance(params, list):
229
+ props.hyperParams.extend(
230
+ [
231
+ MLHyperParamClass(name=param.name, value=param.value)
232
+ for param in params
233
+ ]
234
+ )
235
+ else:
236
+ # For dictionary case, iterate through key-value pairs
237
+ for name, value in params.items():
238
+ props.hyperParams.append(MLHyperParamClass(name=name, value=value))
239
+
240
+ @property
241
+ def model_group(self) -> Optional[str]:
242
+ props = self._ensure_model_props()
243
+ groups = props.groups
244
+ if groups is None or len(groups) == 0:
245
+ return None
246
+ return groups[0]
247
+
248
+ def set_model_group(self, group: Union[str, MlModelGroupUrn]) -> None:
249
+ self._ensure_model_props().groups = [str(group)]
250
+
251
+ @property
252
+ def training_jobs(self) -> Optional[List[str]]:
253
+ return self._ensure_model_props().trainingJobs
254
+
255
+ def set_training_jobs(self, training_jobs: MLTrainingJobInputType) -> None:
256
+ self._ensure_model_props().trainingJobs = [str(job) for job in training_jobs]
257
+
258
+ def add_training_job(
259
+ self, training_job: Union[str, DataProcessInstanceUrn]
260
+ ) -> None:
261
+ props = self._ensure_model_props()
262
+ if props.trainingJobs is None:
263
+ props.trainingJobs = []
264
+ props.trainingJobs.append(str(training_job))
265
+
266
+ def remove_training_job(
267
+ self, training_job: Union[str, DataProcessInstanceUrn]
268
+ ) -> None:
269
+ props = self._ensure_model_props()
270
+ if props.trainingJobs is not None:
271
+ job_str = str(training_job)
272
+ props.trainingJobs = [job for job in props.trainingJobs if job != job_str]
273
+
274
+ @property
275
+ def downstream_jobs(self) -> Optional[List[str]]:
276
+ return self._ensure_model_props().downstreamJobs
277
+
278
+ def set_downstream_jobs(
279
+ self, downstream_jobs: Sequence[Union[str, DataProcessInstanceUrn]]
280
+ ) -> None:
281
+ self._ensure_model_props().downstreamJobs = [
282
+ str(job) for job in downstream_jobs
283
+ ]
284
+
285
+ def add_downstream_job(
286
+ self, downstream_job: Union[str, DataProcessInstanceUrn]
287
+ ) -> None:
288
+ props = self._ensure_model_props()
289
+ if props.downstreamJobs is None:
290
+ props.downstreamJobs = []
291
+ props.downstreamJobs.append(str(downstream_job))
292
+
293
+ def remove_downstream_job(
294
+ self, downstream_job: Union[str, DataProcessInstanceUrn]
295
+ ) -> None:
296
+ props = self._ensure_model_props()
297
+ if props.downstreamJobs is not None:
298
+ job_str = str(downstream_job)
299
+ props.downstreamJobs = [
300
+ job for job in props.downstreamJobs if job != job_str
301
+ ]