acryl-datahub 0.15.0.1rc17__py3-none-any.whl → 0.15.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (211) hide show
  1. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/METADATA +2440 -2438
  2. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/RECORD +211 -207
  3. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/WHEEL +1 -1
  4. datahub/__init__.py +1 -1
  5. datahub/api/entities/assertion/assertion_operator.py +3 -5
  6. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  7. datahub/api/entities/datacontract/assertion_operator.py +3 -5
  8. datahub/api/entities/dataproduct/dataproduct.py +4 -4
  9. datahub/api/entities/dataset/dataset.py +2 -1
  10. datahub/api/entities/structuredproperties/structuredproperties.py +18 -7
  11. datahub/cli/cli_utils.py +13 -2
  12. datahub/cli/delete_cli.py +3 -3
  13. datahub/cli/docker_cli.py +6 -6
  14. datahub/cli/ingest_cli.py +25 -15
  15. datahub/cli/lite_cli.py +2 -2
  16. datahub/cli/migrate.py +5 -5
  17. datahub/cli/specific/assertions_cli.py +3 -3
  18. datahub/cli/specific/structuredproperties_cli.py +84 -0
  19. datahub/cli/timeline_cli.py +1 -1
  20. datahub/configuration/common.py +1 -2
  21. datahub/configuration/config_loader.py +73 -50
  22. datahub/configuration/git.py +2 -2
  23. datahub/configuration/time_window_config.py +10 -5
  24. datahub/emitter/mce_builder.py +4 -8
  25. datahub/emitter/mcp_builder.py +27 -0
  26. datahub/emitter/mcp_patch_builder.py +1 -2
  27. datahub/emitter/rest_emitter.py +126 -85
  28. datahub/entrypoints.py +6 -0
  29. datahub/ingestion/api/incremental_lineage_helper.py +2 -8
  30. datahub/ingestion/api/report.py +1 -2
  31. datahub/ingestion/api/source.py +4 -2
  32. datahub/ingestion/api/source_helpers.py +1 -1
  33. datahub/ingestion/extractor/json_schema_util.py +3 -3
  34. datahub/ingestion/extractor/schema_util.py +3 -5
  35. datahub/ingestion/fs/s3_fs.py +3 -3
  36. datahub/ingestion/glossary/datahub_classifier.py +6 -4
  37. datahub/ingestion/graph/client.py +22 -19
  38. datahub/ingestion/graph/config.py +1 -1
  39. datahub/ingestion/run/pipeline.py +8 -7
  40. datahub/ingestion/run/pipeline_config.py +3 -3
  41. datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
  42. datahub/ingestion/source/abs/source.py +19 -8
  43. datahub/ingestion/source/aws/glue.py +77 -47
  44. datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
  45. datahub/ingestion/source/aws/s3_util.py +24 -1
  46. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  47. datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
  48. datahub/ingestion/source/bigquery_v2/bigquery.py +34 -34
  49. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
  50. datahub/ingestion/source/bigquery_v2/bigquery_config.py +14 -6
  51. datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
  52. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -3
  53. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +22 -16
  54. datahub/ingestion/source/bigquery_v2/lineage.py +16 -16
  55. datahub/ingestion/source/bigquery_v2/queries.py +1 -3
  56. datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
  57. datahub/ingestion/source/bigquery_v2/usage.py +60 -60
  58. datahub/ingestion/source/cassandra/cassandra.py +0 -1
  59. datahub/ingestion/source/cassandra/cassandra_profiling.py +24 -24
  60. datahub/ingestion/source/cassandra/cassandra_utils.py +4 -7
  61. datahub/ingestion/source/confluent_schema_registry.py +6 -6
  62. datahub/ingestion/source/csv_enricher.py +29 -29
  63. datahub/ingestion/source/datahub/config.py +10 -0
  64. datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
  65. datahub/ingestion/source/datahub/datahub_source.py +12 -2
  66. datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
  67. datahub/ingestion/source/dbt/dbt_common.py +9 -7
  68. datahub/ingestion/source/delta_lake/source.py +0 -5
  69. datahub/ingestion/source/demo_data.py +1 -1
  70. datahub/ingestion/source/dremio/dremio_api.py +4 -4
  71. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
  72. datahub/ingestion/source/dremio/dremio_reporting.py +0 -3
  73. datahub/ingestion/source/dremio/dremio_source.py +2 -2
  74. datahub/ingestion/source/elastic_search.py +4 -4
  75. datahub/ingestion/source/fivetran/fivetran.py +1 -6
  76. datahub/ingestion/source/gc/datahub_gc.py +11 -14
  77. datahub/ingestion/source/gc/execution_request_cleanup.py +31 -6
  78. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +48 -15
  79. datahub/ingestion/source/gcs/gcs_source.py +3 -2
  80. datahub/ingestion/source/ge_data_profiler.py +2 -5
  81. datahub/ingestion/source/ge_profiling_config.py +3 -3
  82. datahub/ingestion/source/iceberg/iceberg.py +13 -6
  83. datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
  84. datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
  85. datahub/ingestion/source/identity/azure_ad.py +3 -3
  86. datahub/ingestion/source/identity/okta.py +3 -3
  87. datahub/ingestion/source/kafka/kafka.py +11 -9
  88. datahub/ingestion/source/kafka_connect/kafka_connect.py +3 -9
  89. datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
  90. datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
  91. datahub/ingestion/source/looker/looker_common.py +19 -19
  92. datahub/ingestion/source/looker/looker_config.py +11 -6
  93. datahub/ingestion/source/looker/looker_source.py +25 -25
  94. datahub/ingestion/source/looker/looker_template_language.py +3 -3
  95. datahub/ingestion/source/looker/looker_usage.py +5 -7
  96. datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
  97. datahub/ingestion/source/looker/lookml_source.py +13 -15
  98. datahub/ingestion/source/looker/view_upstream.py +5 -5
  99. datahub/ingestion/source/metabase.py +1 -6
  100. datahub/ingestion/source/mlflow.py +4 -9
  101. datahub/ingestion/source/mode.py +5 -5
  102. datahub/ingestion/source/mongodb.py +6 -4
  103. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  104. datahub/ingestion/source/nifi.py +24 -31
  105. datahub/ingestion/source/openapi.py +9 -9
  106. datahub/ingestion/source/powerbi/config.py +12 -12
  107. datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
  108. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
  109. datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
  110. datahub/ingestion/source/powerbi/powerbi.py +6 -6
  111. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
  112. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
  113. datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
  114. datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
  115. datahub/ingestion/source/redash.py +0 -5
  116. datahub/ingestion/source/redshift/config.py +3 -3
  117. datahub/ingestion/source/redshift/redshift.py +45 -46
  118. datahub/ingestion/source/redshift/usage.py +33 -33
  119. datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
  120. datahub/ingestion/source/s3/source.py +11 -15
  121. datahub/ingestion/source/salesforce.py +26 -25
  122. datahub/ingestion/source/schema/json_schema.py +1 -1
  123. datahub/ingestion/source/sigma/sigma.py +3 -3
  124. datahub/ingestion/source/sigma/sigma_api.py +12 -10
  125. datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
  126. datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
  127. datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
  128. datahub/ingestion/source/snowflake/snowflake_report.py +0 -3
  129. datahub/ingestion/source/snowflake/snowflake_schema.py +8 -5
  130. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +136 -42
  131. datahub/ingestion/source/snowflake/snowflake_tag.py +21 -11
  132. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +49 -50
  133. datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
  134. datahub/ingestion/source/snowflake/snowflake_v2.py +51 -47
  135. datahub/ingestion/source/sql/athena.py +1 -3
  136. datahub/ingestion/source/sql/clickhouse.py +8 -14
  137. datahub/ingestion/source/sql/oracle.py +1 -3
  138. datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
  139. datahub/ingestion/source/sql/sql_types.py +1 -2
  140. datahub/ingestion/source/sql/sql_utils.py +5 -0
  141. datahub/ingestion/source/sql/teradata.py +18 -5
  142. datahub/ingestion/source/state/profiling_state_handler.py +3 -3
  143. datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
  144. datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
  145. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
  146. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  147. datahub/ingestion/source/superset.py +1 -6
  148. datahub/ingestion/source/tableau/tableau.py +343 -117
  149. datahub/ingestion/source/tableau/tableau_common.py +5 -2
  150. datahub/ingestion/source/unity/config.py +3 -1
  151. datahub/ingestion/source/unity/proxy.py +1 -1
  152. datahub/ingestion/source/unity/source.py +74 -74
  153. datahub/ingestion/source/unity/usage.py +3 -1
  154. datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
  155. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
  156. datahub/ingestion/source/usage/usage_common.py +1 -1
  157. datahub/ingestion/source_report/ingestion_stage.py +24 -20
  158. datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
  159. datahub/ingestion/transformer/add_dataset_properties.py +3 -3
  160. datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
  161. datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
  162. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
  163. datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
  164. datahub/ingestion/transformer/tags_to_terms.py +7 -7
  165. datahub/integrations/assertion/snowflake/compiler.py +10 -10
  166. datahub/lite/duckdb_lite.py +12 -10
  167. datahub/metadata/_schema_classes.py +317 -44
  168. datahub/metadata/_urns/urn_defs.py +69 -15
  169. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  170. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  171. datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
  172. datahub/metadata/schema.avsc +302 -89
  173. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  174. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  175. datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
  176. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
  177. datahub/metadata/schemas/DatasetKey.avsc +2 -1
  178. datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
  179. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
  180. datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
  181. datahub/metadata/schemas/MLModelKey.avsc +2 -1
  182. datahub/metadata/schemas/MLModelProperties.avsc +96 -48
  183. datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
  184. datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
  185. datahub/metadata/schemas/VersionProperties.avsc +216 -0
  186. datahub/metadata/schemas/VersionSetKey.avsc +26 -0
  187. datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
  188. datahub/secret/datahub_secrets_client.py +12 -21
  189. datahub/secret/secret_common.py +14 -8
  190. datahub/specific/aspect_helpers/custom_properties.py +1 -2
  191. datahub/sql_parsing/schema_resolver.py +5 -10
  192. datahub/sql_parsing/sql_parsing_aggregator.py +18 -16
  193. datahub/sql_parsing/sqlglot_lineage.py +3 -3
  194. datahub/sql_parsing/sqlglot_utils.py +1 -1
  195. datahub/telemetry/stats.py +1 -2
  196. datahub/testing/mcp_diff.py +1 -1
  197. datahub/utilities/file_backed_collections.py +11 -11
  198. datahub/utilities/hive_schema_to_avro.py +2 -2
  199. datahub/utilities/logging_manager.py +2 -2
  200. datahub/utilities/lossy_collections.py +3 -3
  201. datahub/utilities/mapping.py +3 -3
  202. datahub/utilities/memory_footprint.py +3 -2
  203. datahub/utilities/perf_timer.py +11 -6
  204. datahub/utilities/serialized_lru_cache.py +3 -1
  205. datahub/utilities/sqlalchemy_query_combiner.py +6 -6
  206. datahub/utilities/sqllineage_patch.py +1 -1
  207. datahub/utilities/stats_collections.py +3 -1
  208. datahub/utilities/urns/_urn_base.py +28 -5
  209. datahub/utilities/urns/urn_iter.py +2 -2
  210. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/entry_points.txt +0 -0
  211. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,7 @@
10
10
 
11
11
  # This file contains classes corresponding to entity URNs.
12
12
 
13
- from typing import ClassVar, List, Optional, Type, TYPE_CHECKING
13
+ from typing import ClassVar, List, Optional, Type, TYPE_CHECKING, Union
14
14
 
15
15
  import functools
16
16
  from deprecated.sphinx import deprecated as _sphinx_deprecated
@@ -213,7 +213,7 @@ class SchemaFieldUrn(_SpecificUrn):
213
213
  ENTITY_TYPE: ClassVar[str] = "schemaField"
214
214
  _URN_PARTS: ClassVar[int] = 2
215
215
 
216
- def __init__(self, parent: str, field_path: str, *, _allow_coercion: bool = True) -> None:
216
+ def __init__(self, parent: Union["Urn", str], field_path: str, *, _allow_coercion: bool = True) -> None:
217
217
  if _allow_coercion:
218
218
  # Field coercion logic (if any is required).
219
219
  field_path = UrnEncoder.encode_string(field_path)
@@ -604,7 +604,7 @@ class DataJobUrn(_SpecificUrn):
604
604
  ENTITY_TYPE: ClassVar[str] = "dataJob"
605
605
  _URN_PARTS: ClassVar[int] = 2
606
606
 
607
- def __init__(self, flow: str, job_id: str, *, _allow_coercion: bool = True) -> None:
607
+ def __init__(self, flow: Union["DataFlowUrn", str], job_id: str, *, _allow_coercion: bool = True) -> None:
608
608
  if _allow_coercion:
609
609
  # Field coercion logic (if any is required).
610
610
  job_id = UrnEncoder.encode_string(job_id)
@@ -1435,10 +1435,10 @@ class DataPlatformInstanceUrn(_SpecificUrn):
1435
1435
  ENTITY_TYPE: ClassVar[str] = "dataPlatformInstance"
1436
1436
  _URN_PARTS: ClassVar[int] = 2
1437
1437
 
1438
- def __init__(self, platform: str, instance: str, *, _allow_coercion: bool = True) -> None:
1438
+ def __init__(self, platform: Union["DataPlatformUrn", str], instance: str, *, _allow_coercion: bool = True) -> None:
1439
1439
  if _allow_coercion:
1440
1440
  # Field coercion logic (if any is required).
1441
- platform = DataPlatformUrn(platform).urn()
1441
+ platform = platform.urn() if isinstance(platform, DataPlatformUrn) else DataPlatformUrn(platform).urn()
1442
1442
  instance = UrnEncoder.encode_string(instance)
1443
1443
 
1444
1444
  # Validation logic.
@@ -1678,10 +1678,10 @@ class DatasetUrn(_SpecificUrn):
1678
1678
  ENTITY_TYPE: ClassVar[str] = "dataset"
1679
1679
  _URN_PARTS: ClassVar[int] = 3
1680
1680
 
1681
- def __init__(self, platform: str, name: str, env: str = "PROD", *, _allow_coercion: bool = True) -> None:
1681
+ def __init__(self, platform: Union["DataPlatformUrn", str], name: str, env: str = "PROD", *, _allow_coercion: bool = True) -> None:
1682
1682
  if _allow_coercion:
1683
1683
  # Field coercion logic (if any is required).
1684
- platform = DataPlatformUrn(platform).urn()
1684
+ platform = platform.urn() if isinstance(platform, DataPlatformUrn) else DataPlatformUrn(platform).urn()
1685
1685
  name = UrnEncoder.encode_string(name)
1686
1686
  env = env.upper()
1687
1687
  env = UrnEncoder.encode_string(env)
@@ -1771,10 +1771,10 @@ class MlModelUrn(_SpecificUrn):
1771
1771
  ENTITY_TYPE: ClassVar[str] = "mlModel"
1772
1772
  _URN_PARTS: ClassVar[int] = 3
1773
1773
 
1774
- def __init__(self, platform: str, name: str, env: str = "PROD", *, _allow_coercion: bool = True) -> None:
1774
+ def __init__(self, platform: Union["DataPlatformUrn", str], name: str, env: str = "PROD", *, _allow_coercion: bool = True) -> None:
1775
1775
  if _allow_coercion:
1776
1776
  # Field coercion logic (if any is required).
1777
- platform = DataPlatformUrn(platform).urn()
1777
+ platform = platform.urn() if isinstance(platform, DataPlatformUrn) else DataPlatformUrn(platform).urn()
1778
1778
  name = UrnEncoder.encode_string(name)
1779
1779
  env = env.upper()
1780
1780
  env = UrnEncoder.encode_string(env)
@@ -1889,10 +1889,10 @@ class MlModelDeploymentUrn(_SpecificUrn):
1889
1889
  ENTITY_TYPE: ClassVar[str] = "mlModelDeployment"
1890
1890
  _URN_PARTS: ClassVar[int] = 3
1891
1891
 
1892
- def __init__(self, platform: str, name: str, env: str = "PROD", *, _allow_coercion: bool = True) -> None:
1892
+ def __init__(self, platform: Union["DataPlatformUrn", str], name: str, env: str = "PROD", *, _allow_coercion: bool = True) -> None:
1893
1893
  if _allow_coercion:
1894
1894
  # Field coercion logic (if any is required).
1895
- platform = DataPlatformUrn(platform).urn()
1895
+ platform = platform.urn() if isinstance(platform, DataPlatformUrn) else DataPlatformUrn(platform).urn()
1896
1896
  name = UrnEncoder.encode_string(name)
1897
1897
  env = env.upper()
1898
1898
  env = UrnEncoder.encode_string(env)
@@ -1953,10 +1953,10 @@ class MlFeatureTableUrn(_SpecificUrn):
1953
1953
  ENTITY_TYPE: ClassVar[str] = "mlFeatureTable"
1954
1954
  _URN_PARTS: ClassVar[int] = 2
1955
1955
 
1956
- def __init__(self, platform: str, name: str, *, _allow_coercion: bool = True) -> None:
1956
+ def __init__(self, platform: Union["DataPlatformUrn", str], name: str, *, _allow_coercion: bool = True) -> None:
1957
1957
  if _allow_coercion:
1958
1958
  # Field coercion logic (if any is required).
1959
- platform = DataPlatformUrn(platform).urn()
1959
+ platform = platform.urn() if isinstance(platform, DataPlatformUrn) else DataPlatformUrn(platform).urn()
1960
1960
  name = UrnEncoder.encode_string(name)
1961
1961
 
1962
1962
  # Validation logic.
@@ -2225,6 +2225,60 @@ class DataHubIngestionSourceUrn(_SpecificUrn):
2225
2225
  def id(self) -> str:
2226
2226
  return self.entity_ids[0]
2227
2227
 
2228
+ if TYPE_CHECKING:
2229
+ from datahub.metadata.schema_classes import VersionSetKeyClass
2230
+
2231
+ class VersionSetUrn(_SpecificUrn):
2232
+ ENTITY_TYPE: ClassVar[str] = "versionSet"
2233
+ _URN_PARTS: ClassVar[int] = 2
2234
+
2235
+ def __init__(self, id: str, entity_type: str, *, _allow_coercion: bool = True) -> None:
2236
+ if _allow_coercion:
2237
+ # Field coercion logic (if any is required).
2238
+ id = UrnEncoder.encode_string(id)
2239
+ entity_type = UrnEncoder.encode_string(entity_type)
2240
+
2241
+ # Validation logic.
2242
+ if not id:
2243
+ raise InvalidUrnError("VersionSetUrn id cannot be empty")
2244
+ if UrnEncoder.contains_reserved_char(id):
2245
+ raise InvalidUrnError(f'VersionSetUrn id contains reserved characters')
2246
+ if not entity_type:
2247
+ raise InvalidUrnError("VersionSetUrn entity_type cannot be empty")
2248
+ if UrnEncoder.contains_reserved_char(entity_type):
2249
+ raise InvalidUrnError(f'VersionSetUrn entity_type contains reserved characters')
2250
+
2251
+ super().__init__(self.ENTITY_TYPE, [id, entity_type])
2252
+
2253
+ @classmethod
2254
+ def _parse_ids(cls, entity_ids: List[str]) -> "VersionSetUrn":
2255
+ if len(entity_ids) != cls._URN_PARTS:
2256
+ raise InvalidUrnError(f"VersionSetUrn should have {cls._URN_PARTS} parts, got {len(entity_ids)}: {entity_ids}")
2257
+ return cls(id=entity_ids[0], entity_type=entity_ids[1], _allow_coercion=False)
2258
+
2259
+ @classmethod
2260
+ def underlying_key_aspect_type(cls) -> Type["VersionSetKeyClass"]:
2261
+ from datahub.metadata.schema_classes import VersionSetKeyClass
2262
+
2263
+ return VersionSetKeyClass
2264
+
2265
+ def to_key_aspect(self) -> "VersionSetKeyClass":
2266
+ from datahub.metadata.schema_classes import VersionSetKeyClass
2267
+
2268
+ return VersionSetKeyClass(id=self.id, entityType=self.entity_type)
2269
+
2270
+ @classmethod
2271
+ def from_key_aspect(cls, key_aspect: "VersionSetKeyClass") -> "VersionSetUrn":
2272
+ return cls(id=key_aspect.id, entity_type=key_aspect.entityType)
2273
+
2274
+ @property
2275
+ def id(self) -> str:
2276
+ return self.entity_ids[0]
2277
+
2278
+ @property
2279
+ def entity_type(self) -> str:
2280
+ return self.entity_ids[1]
2281
+
2228
2282
  if TYPE_CHECKING:
2229
2283
  from datahub.metadata.schema_classes import DataHubRetentionKeyClass
2230
2284
 
@@ -2385,10 +2439,10 @@ class MlModelGroupUrn(_SpecificUrn):
2385
2439
  ENTITY_TYPE: ClassVar[str] = "mlModelGroup"
2386
2440
  _URN_PARTS: ClassVar[int] = 3
2387
2441
 
2388
- def __init__(self, platform: str, name: str, env: str = "PROD", *, _allow_coercion: bool = True) -> None:
2442
+ def __init__(self, platform: Union["DataPlatformUrn", str], name: str, env: str = "PROD", *, _allow_coercion: bool = True) -> None:
2389
2443
  if _allow_coercion:
2390
2444
  # Field coercion logic (if any is required).
2391
- platform = DataPlatformUrn(platform).urn()
2445
+ platform = platform.urn() if isinstance(platform, DataPlatformUrn) else DataPlatformUrn(platform).urn()
2392
2446
  name = UrnEncoder.encode_string(name)
2393
2447
  env = env.upper()
2394
2448
  env = UrnEncoder.encode_string(env)
@@ -65,6 +65,7 @@ from .....schema_classes import StatusClass
65
65
  from .....schema_classes import SubTypesClass
66
66
  from .....schema_classes import TagAssociationClass
67
67
  from .....schema_classes import TimeStampClass
68
+ from .....schema_classes import VersionPropertiesClass
68
69
  from .....schema_classes import VersionTagClass
69
70
  from .....schema_classes import WindowDurationClass
70
71
 
@@ -127,6 +128,7 @@ Status = StatusClass
127
128
  SubTypes = SubTypesClass
128
129
  TagAssociation = TagAssociationClass
129
130
  TimeStamp = TimeStampClass
131
+ VersionProperties = VersionPropertiesClass
130
132
  VersionTag = VersionTagClass
131
133
  WindowDuration = WindowDurationClass
132
134
 
@@ -57,6 +57,7 @@ from ......schema_classes import SchemaFieldKeyClass
57
57
  from ......schema_classes import TagKeyClass
58
58
  from ......schema_classes import TelemetryKeyClass
59
59
  from ......schema_classes import TestKeyClass
60
+ from ......schema_classes import VersionSetKeyClass
60
61
 
61
62
 
62
63
  AssertionKey = AssertionKeyClass
@@ -109,5 +110,6 @@ SchemaFieldKey = SchemaFieldKeyClass
109
110
  TagKey = TagKeyClass
110
111
  TelemetryKey = TelemetryKeyClass
111
112
  TestKey = TestKeyClass
113
+ VersionSetKey = VersionSetKeyClass
112
114
 
113
115
  # fmt: on
@@ -0,0 +1,17 @@
1
+ # mypy: ignore-errors
2
+ # flake8: noqa
3
+
4
+ # This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py
5
+ # Do not modify manually!
6
+
7
+ # pylint: skip-file
8
+ # fmt: off
9
+ # isort: skip_file
10
+ from .....schema_classes import VersionSetPropertiesClass
11
+ from .....schema_classes import VersioningSchemeClass
12
+
13
+
14
+ VersionSetProperties = VersionSetPropertiesClass
15
+ VersioningScheme = VersioningSchemeClass
16
+
17
+ # fmt: on
@@ -2021,6 +2021,136 @@
2021
2021
  ],
2022
2022
  "doc": "Aspect used for storing all applicable documentations on assets.\nThis aspect supports multiple documentations from different sources.\nThere is an implicit assumption that there is only one documentation per\n source.\nFor example, if there are two documentations from the same source, the\n latest one will overwrite the previous one.\nIf there are two documentations from different sources, both will be\n stored.\nFuture evolution considerations:\nThe first entity that uses this aspect is Schema Field. We will expand this\n aspect to other entities eventually.\nThe values of the documentation are not currently searchable. This will be\n changed once this aspect develops opinion on which documentation entry is\n the authoritative one.\nEnsuring that there is only one documentation per source is a business\n rule that is not enforced by the aspect yet. This will currently be enforced by the\n application that uses this aspect. We will eventually enforce this rule in\n the aspect using AspectMutators."
2023
2023
  },
2024
+ {
2025
+ "type": "record",
2026
+ "Aspect": {
2027
+ "name": "versionProperties"
2028
+ },
2029
+ "name": "VersionProperties",
2030
+ "namespace": "com.linkedin.pegasus2avro.common",
2031
+ "fields": [
2032
+ {
2033
+ "Relationship": {
2034
+ "entityTypes": [
2035
+ "versionSet"
2036
+ ],
2037
+ "name": "VersionOf"
2038
+ },
2039
+ "Searchable": {
2040
+ "queryByDefault": false
2041
+ },
2042
+ "java": {
2043
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
2044
+ },
2045
+ "Urn": "Urn",
2046
+ "entityTypes": [
2047
+ "versionSet"
2048
+ ],
2049
+ "type": "string",
2050
+ "name": "versionSet",
2051
+ "doc": "The linked Version Set entity that ties multiple versioned assets together"
2052
+ },
2053
+ {
2054
+ "Searchable": {
2055
+ "/versionTag": {
2056
+ "fieldName": "version",
2057
+ "queryByDefault": false
2058
+ }
2059
+ },
2060
+ "type": {
2061
+ "type": "record",
2062
+ "name": "VersionTag",
2063
+ "namespace": "com.linkedin.pegasus2avro.common",
2064
+ "fields": [
2065
+ {
2066
+ "type": [
2067
+ "null",
2068
+ "string"
2069
+ ],
2070
+ "name": "versionTag",
2071
+ "default": null
2072
+ },
2073
+ {
2074
+ "type": [
2075
+ "null",
2076
+ "com.linkedin.pegasus2avro.common.MetadataAttribution"
2077
+ ],
2078
+ "name": "metadataAttribution",
2079
+ "default": null
2080
+ }
2081
+ ],
2082
+ "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
2083
+ },
2084
+ "name": "version",
2085
+ "doc": "Label for this versioned asset, is unique within a version set"
2086
+ },
2087
+ {
2088
+ "Searchable": {
2089
+ "/*/versionTag": {
2090
+ "fieldName": "aliases",
2091
+ "queryByDefault": false
2092
+ }
2093
+ },
2094
+ "type": {
2095
+ "type": "array",
2096
+ "items": "com.linkedin.pegasus2avro.common.VersionTag"
2097
+ },
2098
+ "name": "aliases",
2099
+ "default": [],
2100
+ "doc": "Associated aliases for this versioned asset"
2101
+ },
2102
+ {
2103
+ "type": [
2104
+ "null",
2105
+ "string"
2106
+ ],
2107
+ "name": "comment",
2108
+ "default": null,
2109
+ "doc": "Comment documenting what this version was created for, changes, or represents"
2110
+ },
2111
+ {
2112
+ "Searchable": {
2113
+ "fieldName": "versionSortId",
2114
+ "queryByDefault": false
2115
+ },
2116
+ "type": "string",
2117
+ "name": "sortId",
2118
+ "doc": "Sort identifier that determines where a version lives in the order of the Version Set.\nWhat this looks like depends on the Version Scheme. For sort ids generated by DataHub we use an 8 character string representation."
2119
+ },
2120
+ {
2121
+ "type": [
2122
+ "null",
2123
+ "com.linkedin.pegasus2avro.common.AuditStamp"
2124
+ ],
2125
+ "name": "sourceCreatedTimestamp",
2126
+ "default": null,
2127
+ "doc": "Timestamp reflecting when this asset version was created in the source system."
2128
+ },
2129
+ {
2130
+ "type": [
2131
+ "null",
2132
+ "com.linkedin.pegasus2avro.common.AuditStamp"
2133
+ ],
2134
+ "name": "metadataCreatedTimestamp",
2135
+ "default": null,
2136
+ "doc": "Timestamp reflecting when the metadata for this version was created in DataHub"
2137
+ },
2138
+ {
2139
+ "Searchable": {
2140
+ "fieldType": "BOOLEAN",
2141
+ "queryByDefault": false
2142
+ },
2143
+ "type": [
2144
+ "null",
2145
+ "boolean"
2146
+ ],
2147
+ "name": "isLatest",
2148
+ "default": null,
2149
+ "doc": "Marks whether this version is currently the latest. Set by a side effect and should not be modified by API."
2150
+ }
2151
+ ],
2152
+ "doc": "Properties about a versioned asset i.e. dataset, ML Model, etc."
2153
+ },
2024
2154
  {
2025
2155
  "type": "record",
2026
2156
  "Aspect": {
@@ -6890,6 +7020,51 @@
6890
7020
  "default": null,
6891
7021
  "doc": "URL where the reference exist"
6892
7022
  },
7023
+ {
7024
+ "Relationship": {
7025
+ "/*": {
7026
+ "entityTypes": [
7027
+ "dataJob",
7028
+ "dataProcessInstance"
7029
+ ],
7030
+ "isLineage": true,
7031
+ "name": "TrainedBy"
7032
+ }
7033
+ },
7034
+ "type": [
7035
+ "null",
7036
+ {
7037
+ "type": "array",
7038
+ "items": "string"
7039
+ }
7040
+ ],
7041
+ "name": "trainingJobs",
7042
+ "default": null,
7043
+ "doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
7044
+ },
7045
+ {
7046
+ "Relationship": {
7047
+ "/*": {
7048
+ "entityTypes": [
7049
+ "dataJob",
7050
+ "dataProcessInstance"
7051
+ ],
7052
+ "isLineage": true,
7053
+ "isUpstream": false,
7054
+ "name": "UsedBy"
7055
+ }
7056
+ },
7057
+ "type": [
7058
+ "null",
7059
+ {
7060
+ "type": "array",
7061
+ "items": "string"
7062
+ }
7063
+ ],
7064
+ "name": "downstreamJobs",
7065
+ "default": null,
7066
+ "doc": "List of jobs or process instances (if any) that use the model or group."
7067
+ },
6893
7068
  {
6894
7069
  "Searchable": {
6895
7070
  "boostScore": 10.0,
@@ -6949,22 +7124,7 @@
6949
7124
  {
6950
7125
  "type": [
6951
7126
  "null",
6952
- {
6953
- "type": "record",
6954
- "name": "VersionTag",
6955
- "namespace": "com.linkedin.pegasus2avro.common",
6956
- "fields": [
6957
- {
6958
- "type": [
6959
- "null",
6960
- "string"
6961
- ],
6962
- "name": "versionTag",
6963
- "default": null
6964
- }
6965
- ],
6966
- "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
6967
- }
7127
+ "com.linkedin.pegasus2avro.common.VersionTag"
6968
7128
  ],
6969
7129
  "name": "version",
6970
7130
  "default": null,
@@ -7174,54 +7334,6 @@
7174
7334
  "default": null,
7175
7335
  "doc": "Deployments for the MLModel"
7176
7336
  },
7177
- {
7178
- "Relationship": {
7179
- "/*": {
7180
- "entityTypes": [
7181
- "dataJob",
7182
- "dataProcessInstance"
7183
- ],
7184
- "isLineage": true,
7185
- "name": "TrainedBy"
7186
- }
7187
- },
7188
- "Urn": "Urn",
7189
- "urn_is_array": true,
7190
- "type": [
7191
- "null",
7192
- {
7193
- "type": "array",
7194
- "items": "string"
7195
- }
7196
- ],
7197
- "name": "trainingJobs",
7198
- "default": null,
7199
- "doc": "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
7200
- },
7201
- {
7202
- "Relationship": {
7203
- "/*": {
7204
- "entityTypes": [
7205
- "dataJob"
7206
- ],
7207
- "isLineage": true,
7208
- "isUpstream": false,
7209
- "name": "UsedBy"
7210
- }
7211
- },
7212
- "Urn": "Urn",
7213
- "urn_is_array": true,
7214
- "type": [
7215
- "null",
7216
- {
7217
- "type": "array",
7218
- "items": "string"
7219
- }
7220
- ],
7221
- "name": "downstreamJobs",
7222
- "default": null,
7223
- "doc": "List of jobs (if any) that use the model"
7224
- },
7225
7337
  {
7226
7338
  "Relationship": {
7227
7339
  "/*": {
@@ -8513,6 +8625,51 @@
8513
8625
  "default": {},
8514
8626
  "doc": "Custom property bag."
8515
8627
  },
8628
+ {
8629
+ "Relationship": {
8630
+ "/*": {
8631
+ "entityTypes": [
8632
+ "dataJob",
8633
+ "dataProcessInstance"
8634
+ ],
8635
+ "isLineage": true,
8636
+ "name": "TrainedBy"
8637
+ }
8638
+ },
8639
+ "type": [
8640
+ "null",
8641
+ {
8642
+ "type": "array",
8643
+ "items": "string"
8644
+ }
8645
+ ],
8646
+ "name": "trainingJobs",
8647
+ "default": null,
8648
+ "doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
8649
+ },
8650
+ {
8651
+ "Relationship": {
8652
+ "/*": {
8653
+ "entityTypes": [
8654
+ "dataJob",
8655
+ "dataProcessInstance"
8656
+ ],
8657
+ "isLineage": true,
8658
+ "isUpstream": false,
8659
+ "name": "UsedBy"
8660
+ }
8661
+ },
8662
+ "type": [
8663
+ "null",
8664
+ {
8665
+ "type": "array",
8666
+ "items": "string"
8667
+ }
8668
+ ],
8669
+ "name": "downstreamJobs",
8670
+ "default": null,
8671
+ "doc": "List of jobs or process instances (if any) that use the model or group."
8672
+ },
8516
8673
  {
8517
8674
  "Searchable": {
8518
8675
  "boostScore": 10.0,
@@ -8569,29 +8726,6 @@
8569
8726
  "default": null,
8570
8727
  "doc": "Date when the MLModelGroup was last modified"
8571
8728
  },
8572
- {
8573
- "Relationship": {
8574
- "/*": {
8575
- "entityTypes": [
8576
- "dataJob"
8577
- ],
8578
- "isLineage": true,
8579
- "name": "TrainedBy"
8580
- }
8581
- },
8582
- "Urn": "Urn",
8583
- "urn_is_array": true,
8584
- "type": [
8585
- "null",
8586
- {
8587
- "type": "array",
8588
- "items": "string"
8589
- }
8590
- ],
8591
- "name": "trainingJobs",
8592
- "default": null,
8593
- "doc": "List of jobs (if any) used to train the model group. Visible in Lineage."
8594
- },
8595
8729
  {
8596
8730
  "type": [
8597
8731
  "null",
@@ -12565,8 +12699,10 @@
12565
12699
  "Relationship": {
12566
12700
  "/*": {
12567
12701
  "entityTypes": [
12568
- "dataset"
12702
+ "dataset",
12703
+ "mlModel"
12569
12704
  ],
12705
+ "isLineage": true,
12570
12706
  "name": "Consumes"
12571
12707
  }
12572
12708
  },
@@ -12586,7 +12722,7 @@
12586
12722
  "items": "string"
12587
12723
  },
12588
12724
  "name": "inputs",
12589
- "doc": "Input datasets to be consumed"
12725
+ "doc": "Input assets consumed"
12590
12726
  }
12591
12727
  ],
12592
12728
  "doc": "Information about the inputs datasets of a Data process"
@@ -12749,6 +12885,8 @@
12749
12885
  "dataset",
12750
12886
  "mlModel"
12751
12887
  ],
12888
+ "isLineage": true,
12889
+ "isUpstream": false,
12752
12890
  "name": "Produces"
12753
12891
  }
12754
12892
  },
@@ -15527,6 +15665,55 @@
15527
15665
  ],
15528
15666
  "doc": "A simple wrapper around a String to persist the client ID for telemetry in DataHub's backend DB"
15529
15667
  },
15668
+ {
15669
+ "type": "record",
15670
+ "Aspect": {
15671
+ "name": "versionSetProperties"
15672
+ },
15673
+ "name": "VersionSetProperties",
15674
+ "namespace": "com.linkedin.pegasus2avro.versionset",
15675
+ "fields": [
15676
+ {
15677
+ "Searchable": {
15678
+ "/*": {
15679
+ "fieldType": "TEXT",
15680
+ "queryByDefault": true
15681
+ }
15682
+ },
15683
+ "type": {
15684
+ "type": "map",
15685
+ "values": "string"
15686
+ },
15687
+ "name": "customProperties",
15688
+ "default": {},
15689
+ "doc": "Custom property bag."
15690
+ },
15691
+ {
15692
+ "Searchable": {
15693
+ "queryByDefault": "false"
15694
+ },
15695
+ "java": {
15696
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
15697
+ },
15698
+ "Urn": "Urn",
15699
+ "type": "string",
15700
+ "name": "latest",
15701
+ "doc": "The latest versioned entity linked to in this version set"
15702
+ },
15703
+ {
15704
+ "type": {
15705
+ "type": "enum",
15706
+ "name": "VersioningScheme",
15707
+ "namespace": "com.linkedin.pegasus2avro.versionset",
15708
+ "symbols": [
15709
+ "ALPHANUMERIC_GENERATED_BY_DATAHUB"
15710
+ ]
15711
+ },
15712
+ "name": "versioningScheme",
15713
+ "doc": "What versioning scheme is being utilized for the versioned entities sort criterion. Static once set"
15714
+ }
15715
+ ]
15716
+ },
15530
15717
  {
15531
15718
  "type": "record",
15532
15719
  "Aspect": {
@@ -16263,6 +16450,32 @@
16263
16450
  ],
16264
16451
  "doc": "Key for a DataHub ingestion source"
16265
16452
  },
16453
+ {
16454
+ "type": "record",
16455
+ "Aspect": {
16456
+ "name": "versionSetKey",
16457
+ "keyForEntity": "versionSet",
16458
+ "entityCategory": "core",
16459
+ "entityAspects": [
16460
+ "versionSetProperties"
16461
+ ]
16462
+ },
16463
+ "name": "VersionSetKey",
16464
+ "namespace": "com.linkedin.pegasus2avro.metadata.key",
16465
+ "fields": [
16466
+ {
16467
+ "type": "string",
16468
+ "name": "id",
16469
+ "doc": "ID of the Version Set, generated from platform + asset id / name"
16470
+ },
16471
+ {
16472
+ "type": "string",
16473
+ "name": "entityType",
16474
+ "doc": "Type of entities included in version set, limits to a single entity type between linked versioned entities"
16475
+ }
16476
+ ],
16477
+ "doc": "Key for a Version Set entity"
16478
+ },
16266
16479
  "com.linkedin.pegasus2avro.metadata.key.DataHubRetentionKey",
16267
16480
  {
16268
16481
  "type": "record",