acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.2.0.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (223) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/METADATA +2617 -2590
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/RECORD +223 -189
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/entry_points.txt +2 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/dataset/dataset.py +1 -1
  6. datahub/api/entities/external/__init__.py +0 -0
  7. datahub/api/entities/external/external_entities.py +239 -0
  8. datahub/api/entities/external/external_tag.py +145 -0
  9. datahub/api/entities/external/lake_formation_external_entites.py +161 -0
  10. datahub/api/entities/external/restricted_text.py +247 -0
  11. datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
  12. datahub/cli/check_cli.py +88 -7
  13. datahub/cli/cli_utils.py +63 -0
  14. datahub/cli/container_cli.py +5 -0
  15. datahub/cli/delete_cli.py +124 -27
  16. datahub/cli/docker_check.py +107 -12
  17. datahub/cli/docker_cli.py +149 -227
  18. datahub/cli/exists_cli.py +0 -2
  19. datahub/cli/get_cli.py +0 -2
  20. datahub/cli/iceberg_cli.py +5 -0
  21. datahub/cli/ingest_cli.py +3 -15
  22. datahub/cli/migrate.py +2 -0
  23. datahub/cli/put_cli.py +1 -4
  24. datahub/cli/quickstart_versioning.py +50 -7
  25. datahub/cli/specific/assertions_cli.py +0 -4
  26. datahub/cli/specific/datacontract_cli.py +0 -3
  27. datahub/cli/specific/dataproduct_cli.py +0 -11
  28. datahub/cli/specific/dataset_cli.py +1 -8
  29. datahub/cli/specific/forms_cli.py +0 -4
  30. datahub/cli/specific/group_cli.py +0 -2
  31. datahub/cli/specific/structuredproperties_cli.py +1 -4
  32. datahub/cli/specific/user_cli.py +0 -2
  33. datahub/cli/state_cli.py +0 -2
  34. datahub/cli/timeline_cli.py +0 -2
  35. datahub/configuration/pydantic_migration_helpers.py +7 -5
  36. datahub/emitter/rest_emitter.py +70 -12
  37. datahub/entrypoints.py +4 -3
  38. datahub/ingestion/api/decorators.py +15 -3
  39. datahub/ingestion/api/report.py +332 -3
  40. datahub/ingestion/api/sink.py +3 -0
  41. datahub/ingestion/api/source.py +48 -44
  42. datahub/ingestion/autogenerated/__init__.py +0 -0
  43. datahub/ingestion/autogenerated/capability_summary.json +3449 -0
  44. datahub/ingestion/autogenerated/lineage.json +401 -0
  45. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  46. datahub/ingestion/extractor/schema_util.py +13 -4
  47. datahub/ingestion/glossary/classification_mixin.py +5 -0
  48. datahub/ingestion/graph/client.py +100 -15
  49. datahub/ingestion/graph/config.py +1 -0
  50. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
  51. datahub/ingestion/run/pipeline.py +54 -2
  52. datahub/ingestion/sink/datahub_rest.py +13 -0
  53. datahub/ingestion/source/abs/source.py +1 -1
  54. datahub/ingestion/source/aws/aws_common.py +4 -0
  55. datahub/ingestion/source/aws/glue.py +489 -244
  56. datahub/ingestion/source/aws/tag_entities.py +292 -0
  57. datahub/ingestion/source/azure/azure_common.py +2 -2
  58. datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
  59. datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
  60. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
  61. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
  62. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  63. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  64. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  65. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  66. datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
  67. datahub/ingestion/source/common/subtypes.py +45 -0
  68. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  69. datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
  70. datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
  71. datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
  72. datahub/ingestion/source/dbt/dbt_common.py +6 -2
  73. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  74. datahub/ingestion/source/debug/__init__.py +0 -0
  75. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  76. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  77. datahub/ingestion/source/dremio/dremio_config.py +2 -0
  78. datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
  79. datahub/ingestion/source/dremio/dremio_source.py +94 -81
  80. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  81. datahub/ingestion/source/file.py +3 -0
  82. datahub/ingestion/source/fivetran/fivetran.py +34 -26
  83. datahub/ingestion/source/gcs/gcs_source.py +13 -2
  84. datahub/ingestion/source/ge_data_profiler.py +76 -28
  85. datahub/ingestion/source/ge_profiling_config.py +11 -0
  86. datahub/ingestion/source/hex/api.py +26 -1
  87. datahub/ingestion/source/iceberg/iceberg.py +3 -1
  88. datahub/ingestion/source/identity/azure_ad.py +1 -1
  89. datahub/ingestion/source/identity/okta.py +1 -14
  90. datahub/ingestion/source/kafka/kafka.py +16 -0
  91. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  92. datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
  93. datahub/ingestion/source/looker/looker_source.py +1 -0
  94. datahub/ingestion/source/mlflow.py +11 -1
  95. datahub/ingestion/source/mock_data/__init__.py +0 -0
  96. datahub/ingestion/source/mock_data/datahub_mock_data.py +507 -0
  97. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  98. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  99. datahub/ingestion/source/nifi.py +1 -1
  100. datahub/ingestion/source/powerbi/powerbi.py +1 -5
  101. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  102. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  103. datahub/ingestion/source/preset.py +2 -2
  104. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
  105. datahub/ingestion/source/redshift/redshift.py +21 -1
  106. datahub/ingestion/source/redshift/usage.py +4 -3
  107. datahub/ingestion/source/s3/report.py +4 -2
  108. datahub/ingestion/source/s3/source.py +367 -115
  109. datahub/ingestion/source/sac/sac.py +3 -1
  110. datahub/ingestion/source/salesforce.py +6 -3
  111. datahub/ingestion/source/sigma/sigma.py +7 -1
  112. datahub/ingestion/source/slack/slack.py +2 -1
  113. datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
  114. datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
  115. datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
  116. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  117. datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
  118. datahub/ingestion/source/snowflake/snowflake_v2.py +16 -2
  119. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  120. datahub/ingestion/source/sql/athena.py +119 -11
  121. datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
  122. datahub/ingestion/source/sql/clickhouse.py +3 -1
  123. datahub/ingestion/source/sql/cockroachdb.py +0 -1
  124. datahub/ingestion/source/sql/hana.py +3 -1
  125. datahub/ingestion/source/sql/hive_metastore.py +3 -11
  126. datahub/ingestion/source/sql/mariadb.py +0 -1
  127. datahub/ingestion/source/sql/mssql/source.py +239 -34
  128. datahub/ingestion/source/sql/mysql.py +0 -1
  129. datahub/ingestion/source/sql/oracle.py +1 -1
  130. datahub/ingestion/source/sql/postgres.py +0 -1
  131. datahub/ingestion/source/sql/sql_common.py +121 -34
  132. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  133. datahub/ingestion/source/sql/teradata.py +997 -235
  134. datahub/ingestion/source/sql/vertica.py +10 -6
  135. datahub/ingestion/source/sql_queries.py +2 -2
  136. datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
  137. datahub/ingestion/source/superset.py +58 -3
  138. datahub/ingestion/source/tableau/tableau.py +58 -37
  139. datahub/ingestion/source/tableau/tableau_common.py +4 -2
  140. datahub/ingestion/source/tableau/tableau_constant.py +0 -4
  141. datahub/ingestion/source/unity/config.py +5 -0
  142. datahub/ingestion/source/unity/proxy.py +118 -0
  143. datahub/ingestion/source/unity/source.py +195 -17
  144. datahub/ingestion/source/unity/tag_entities.py +295 -0
  145. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  146. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
  147. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  148. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  149. datahub/metadata/_internal_schema_classes.py +1522 -569
  150. datahub/metadata/_urns/urn_defs.py +1826 -1658
  151. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  152. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  153. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  154. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  155. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +29 -0
  156. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
  157. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
  158. datahub/metadata/schema.avsc +17758 -17097
  159. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  160. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  161. datahub/metadata/schemas/Applications.avsc +38 -0
  162. datahub/metadata/schemas/ChartKey.avsc +1 -0
  163. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  164. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  165. datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
  166. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  167. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  168. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  169. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  170. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +237 -0
  171. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  172. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
  173. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  174. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  175. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  176. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  177. datahub/metadata/schemas/DataProductKey.avsc +1 -0
  178. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  179. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  180. datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
  181. datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
  182. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  183. datahub/metadata/schemas/LogicalParent.avsc +140 -0
  184. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  185. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  186. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  187. datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
  188. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  189. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  190. datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
  191. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  192. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  193. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  194. datahub/metadata/schemas/__init__.py +3 -3
  195. datahub/sdk/__init__.py +2 -0
  196. datahub/sdk/_all_entities.py +7 -0
  197. datahub/sdk/_shared.py +116 -0
  198. datahub/sdk/chart.py +315 -0
  199. datahub/sdk/container.py +7 -0
  200. datahub/sdk/dashboard.py +432 -0
  201. datahub/sdk/dataflow.py +7 -0
  202. datahub/sdk/datajob.py +45 -13
  203. datahub/sdk/dataset.py +8 -2
  204. datahub/sdk/entity_client.py +82 -2
  205. datahub/sdk/lineage_client.py +683 -82
  206. datahub/sdk/main_client.py +46 -16
  207. datahub/sdk/mlmodel.py +101 -38
  208. datahub/sdk/mlmodelgroup.py +7 -0
  209. datahub/sdk/search_client.py +4 -3
  210. datahub/sdk/search_filters.py +95 -27
  211. datahub/specific/chart.py +1 -1
  212. datahub/specific/dataproduct.py +4 -0
  213. datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
  214. datahub/sql_parsing/sqlglot_lineage.py +62 -13
  215. datahub/telemetry/telemetry.py +17 -11
  216. datahub/testing/sdk_v2_helpers.py +7 -1
  217. datahub/upgrade/upgrade.py +56 -14
  218. datahub/utilities/server_config_util.py +8 -0
  219. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  220. datahub/utilities/stats_collections.py +4 -0
  221. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/WHEEL +0 -0
  222. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/licenses/LICENSE +0 -0
  223. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/top_level.txt +0 -0
datahub/sdk/container.py CHANGED
@@ -23,12 +23,14 @@ from datahub.sdk._shared import (
23
23
  HasInstitutionalMemory,
24
24
  HasOwnership,
25
25
  HasPlatformInstance,
26
+ HasStructuredProperties,
26
27
  HasSubtype,
27
28
  HasTags,
28
29
  HasTerms,
29
30
  LinksInputType,
30
31
  OwnersInputType,
31
32
  ParentContainerInputType,
33
+ StructuredPropertyInputType,
32
34
  TagsInputType,
33
35
  TermsInputType,
34
36
  make_time_stamp,
@@ -44,6 +46,7 @@ class Container(
44
46
  HasContainer,
45
47
  HasOwnership,
46
48
  HasInstitutionalMemory,
49
+ HasStructuredProperties,
47
50
  HasTags,
48
51
  HasTerms,
49
52
  HasDomain,
@@ -78,6 +81,7 @@ class Container(
78
81
  tags: Optional[TagsInputType] = None,
79
82
  terms: Optional[TermsInputType] = None,
80
83
  domain: Optional[DomainInputType] = None,
84
+ structured_properties: Optional[StructuredPropertyInputType] = None,
81
85
  extra_aspects: ExtraAspectsType = None,
82
86
  ):
83
87
  # Hack: while the type annotations say container_key is always a ContainerKey,
@@ -145,6 +149,9 @@ class Container(
145
149
  self.set_terms(terms)
146
150
  if domain is not None:
147
151
  self.set_domain(domain)
152
+ if structured_properties is not None:
153
+ for key, value in structured_properties.items():
154
+ self.set_structured_property(property_urn=key, values=value)
148
155
 
149
156
  @classmethod
150
157
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
@@ -0,0 +1,432 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Dict, List, Optional, Type, Union
5
+
6
+ from typing_extensions import Self
7
+
8
+ import datahub.metadata.schema_classes as models
9
+ from datahub.metadata.urns import ChartUrn, DashboardUrn, DatasetUrn, Urn
10
+ from datahub.sdk._shared import (
11
+ ChartUrnOrStr,
12
+ DashboardUrnOrStr,
13
+ DataPlatformInstanceUrnOrStr,
14
+ DataPlatformUrnOrStr,
15
+ DatasetUrnOrStr,
16
+ DomainInputType,
17
+ HasContainer,
18
+ HasDomain,
19
+ HasInstitutionalMemory,
20
+ HasOwnership,
21
+ HasPlatformInstance,
22
+ HasSubtype,
23
+ HasTags,
24
+ HasTerms,
25
+ LinksInputType,
26
+ OwnersInputType,
27
+ TagsInputType,
28
+ TermsInputType,
29
+ )
30
+ from datahub.sdk.chart import Chart
31
+ from datahub.sdk.dataset import Dataset
32
+ from datahub.sdk.entity import Entity, ExtraAspectsType
33
+
34
+
35
+ class Dashboard(
36
+ HasPlatformInstance,
37
+ HasSubtype,
38
+ HasOwnership,
39
+ HasContainer,
40
+ HasInstitutionalMemory,
41
+ HasTags,
42
+ HasTerms,
43
+ HasDomain,
44
+ Entity,
45
+ ):
46
+ """Represents a dashboard in DataHub."""
47
+
48
+ __slots__ = ()
49
+
50
+ @classmethod
51
+ def get_urn_type(cls) -> Type[DashboardUrn]:
52
+ """Get the URN type for dashboards.
53
+ Returns:
54
+ The DashboardUrn class.
55
+ """
56
+ return DashboardUrn
57
+
58
+ def __init__(
59
+ self,
60
+ *,
61
+ # Identity.
62
+ name: str,
63
+ platform: DataPlatformUrnOrStr,
64
+ display_name: Optional[str] = None,
65
+ platform_instance: Optional[DataPlatformInstanceUrnOrStr] = None,
66
+ # Dashboard properties.
67
+ description: str = "",
68
+ external_url: Optional[str] = None,
69
+ dashboard_url: Optional[str] = None,
70
+ custom_properties: Optional[Dict[str, str]] = None,
71
+ last_modified: Optional[datetime] = None,
72
+ last_refreshed: Optional[datetime] = None,
73
+ input_datasets: Optional[List[Union[DatasetUrnOrStr, Dataset]]] = None,
74
+ charts: Optional[List[Union[ChartUrnOrStr, Chart]]] = None,
75
+ dashboards: Optional[List[Union[DashboardUrnOrStr, Dashboard]]] = None,
76
+ # Standard aspects.
77
+ subtype: Optional[str] = None,
78
+ owners: Optional[OwnersInputType] = None,
79
+ links: Optional[LinksInputType] = None,
80
+ tags: Optional[TagsInputType] = None,
81
+ terms: Optional[TermsInputType] = None,
82
+ domain: Optional[DomainInputType] = None,
83
+ extra_aspects: ExtraAspectsType = None,
84
+ ):
85
+ """Initialize a new Dashboard instance."""
86
+ urn = DashboardUrn.create_from_ids(
87
+ platform=str(platform),
88
+ name=name,
89
+ platform_instance=str(platform_instance) if platform_instance else None,
90
+ )
91
+ super().__init__(urn)
92
+ self._set_extra_aspects(extra_aspects)
93
+
94
+ self._set_platform_instance(platform, platform_instance)
95
+
96
+ # Initialize DashboardInfoClass with default values
97
+ dashboard_info = models.DashboardInfoClass(
98
+ title=display_name or name,
99
+ description=description or "",
100
+ lastModified=models.ChangeAuditStampsClass(
101
+ lastModified=None,
102
+ ),
103
+ customProperties={},
104
+ chartEdges=[],
105
+ datasetEdges=[],
106
+ dashboards=[],
107
+ )
108
+
109
+ if last_modified:
110
+ dashboard_info.lastModified = models.ChangeAuditStampsClass(
111
+ lastModified=models.AuditStampClass(
112
+ time=int(last_modified.timestamp()),
113
+ actor="urn:li:corpuser:datahub",
114
+ ),
115
+ )
116
+
117
+ # Set additional properties
118
+ if description is not None:
119
+ self.set_description(description)
120
+ if display_name is not None:
121
+ self.set_display_name(display_name)
122
+ if external_url is not None:
123
+ self.set_external_url(external_url)
124
+ if dashboard_url is not None:
125
+ self.set_dashboard_url(dashboard_url)
126
+ if custom_properties is not None:
127
+ self.set_custom_properties(custom_properties)
128
+ if last_modified is not None:
129
+ self.set_last_modified(last_modified)
130
+ if last_refreshed is not None:
131
+ self.set_last_refreshed(last_refreshed)
132
+ if subtype is not None:
133
+ self.set_subtype(subtype)
134
+ if owners is not None:
135
+ self.set_owners(owners)
136
+ if links is not None:
137
+ self.set_links(links)
138
+ if tags is not None:
139
+ self.set_tags(tags)
140
+ if terms is not None:
141
+ self.set_terms(terms)
142
+ if domain is not None:
143
+ self.set_domain(domain)
144
+ if input_datasets is not None:
145
+ self.set_input_datasets(input_datasets)
146
+ if charts is not None:
147
+ self.set_charts(charts)
148
+ if dashboards is not None:
149
+ self.set_dashboards(dashboards)
150
+
151
+ @classmethod
152
+ def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
153
+ assert isinstance(urn, DashboardUrn)
154
+ entity = cls(
155
+ platform=urn.dashboard_tool,
156
+ name=urn.dashboard_id,
157
+ )
158
+ return entity._init_from_graph(current_aspects)
159
+
160
+ @property
161
+ def urn(self) -> DashboardUrn:
162
+ assert isinstance(self._urn, DashboardUrn)
163
+ return self._urn
164
+
165
+ def _ensure_dashboard_props(self) -> models.DashboardInfoClass:
166
+ """Get the dashboard properties safely."""
167
+ return self._setdefault_aspect(
168
+ models.DashboardInfoClass(
169
+ title=self.urn.dashboard_id,
170
+ description="",
171
+ lastModified=models.ChangeAuditStampsClass(
172
+ lastModified=models.AuditStampClass(
173
+ time=0, actor="urn:li:corpuser:unknown"
174
+ )
175
+ ),
176
+ customProperties={},
177
+ chartEdges=[],
178
+ datasetEdges=[],
179
+ dashboards=[],
180
+ )
181
+ )
182
+
183
+ @property
184
+ def name(self) -> str:
185
+ """Get the name of the dashboard."""
186
+ return self.urn.dashboard_id
187
+
188
+ @property
189
+ def title(self) -> str:
190
+ """Get the title of the dashboard."""
191
+ return self._ensure_dashboard_props().title
192
+
193
+ def set_title(self, title: str) -> None:
194
+ """Set the title of the dashboard."""
195
+ props = self._ensure_dashboard_props()
196
+ props.title = title
197
+ self._set_aspect(props)
198
+
199
+ @property
200
+ def description(self) -> Optional[str]:
201
+ """Get the description of the dashboard."""
202
+ props = self._ensure_dashboard_props()
203
+ return props.description
204
+
205
+ def set_description(self, description: str) -> None:
206
+ """Set the description of the dashboard."""
207
+ props = self._ensure_dashboard_props()
208
+ props.description = description
209
+ self._set_aspect(props)
210
+
211
+ @property
212
+ def display_name(self) -> Optional[str]:
213
+ """Get the display name of the dashboard."""
214
+ return self.title
215
+
216
+ def set_display_name(self, display_name: str) -> None:
217
+ """Set the display name of the dashboard."""
218
+ self.set_title(display_name)
219
+
220
+ @property
221
+ def external_url(self) -> Optional[str]:
222
+ """Get the external URL of the dashboard."""
223
+ props = self._ensure_dashboard_props()
224
+ return props.externalUrl
225
+
226
+ def set_external_url(self, external_url: str) -> None:
227
+ """Set the external URL of the dashboard."""
228
+ props = self._ensure_dashboard_props()
229
+ props.externalUrl = external_url
230
+ self._set_aspect(props)
231
+
232
+ @property
233
+ def dashboard_url(self) -> Optional[str]:
234
+ """Get the dashboard URL."""
235
+ props = self._ensure_dashboard_props()
236
+ return props.dashboardUrl
237
+
238
+ def set_dashboard_url(self, dashboard_url: str) -> None:
239
+ """Set the dashboard URL."""
240
+ props = self._ensure_dashboard_props()
241
+ props.dashboardUrl = dashboard_url
242
+ self._set_aspect(props)
243
+
244
+ @property
245
+ def custom_properties(self) -> Dict[str, str]:
246
+ """Get the custom properties of the dashboard."""
247
+ props = self._ensure_dashboard_props()
248
+ return props.customProperties or {}
249
+
250
+ def set_custom_properties(self, custom_properties: Dict[str, str]) -> None:
251
+ """Set the custom properties of the dashboard."""
252
+ props = self._ensure_dashboard_props()
253
+ props.customProperties = custom_properties
254
+ self._set_aspect(props)
255
+
256
+ @property
257
+ def last_modified(self) -> Optional[datetime]:
258
+ """Get the last modification timestamp of the dashboard."""
259
+ props = self._ensure_dashboard_props()
260
+ if props.lastModified.lastModified.time == 0:
261
+ return None
262
+ return datetime.fromtimestamp(props.lastModified.lastModified.time)
263
+
264
+ def set_last_modified(self, last_modified: datetime) -> None:
265
+ """Set the last modification timestamp of the dashboard."""
266
+ props = self._ensure_dashboard_props()
267
+ props.lastModified = models.ChangeAuditStampsClass(
268
+ lastModified=models.AuditStampClass(
269
+ time=int(last_modified.timestamp()),
270
+ actor="urn:li:corpuser:datahub",
271
+ ),
272
+ )
273
+ self._set_aspect(props)
274
+
275
+ @property
276
+ def last_refreshed(self) -> Optional[datetime]:
277
+ """Get the last refresh timestamp of the dashboard."""
278
+ props = self._ensure_dashboard_props()
279
+ return (
280
+ datetime.fromtimestamp(props.lastRefreshed)
281
+ if props.lastRefreshed is not None
282
+ else None
283
+ )
284
+
285
+ def set_last_refreshed(self, last_refreshed: datetime) -> None:
286
+ """Set the last refresh timestamp of the dashboard."""
287
+ props = self._ensure_dashboard_props()
288
+ props.lastRefreshed = int(last_refreshed.timestamp())
289
+ self._set_aspect(props)
290
+
291
+ @property
292
+ def input_datasets(self) -> List[DatasetUrn]:
293
+ """Get the input datasets of the dashboard."""
294
+ props = self._ensure_dashboard_props()
295
+ return [
296
+ DatasetUrn.from_string(edge.destinationUrn)
297
+ for edge in (props.datasetEdges or [])
298
+ ]
299
+
300
+ def set_input_datasets(
301
+ self, input_datasets: List[Union[DatasetUrnOrStr, Dataset]]
302
+ ) -> None:
303
+ """Set the input datasets of the dashboard."""
304
+ props = self._ensure_dashboard_props()
305
+ dataset_edges = props.datasetEdges or []
306
+ for dataset in input_datasets:
307
+ if isinstance(dataset, Dataset):
308
+ dataset_urn = dataset.urn
309
+ else:
310
+ dataset_urn = DatasetUrn.from_string(dataset)
311
+ dataset_edges.append(models.EdgeClass(destinationUrn=str(dataset_urn)))
312
+ props.datasetEdges = dataset_edges
313
+ self._set_aspect(props)
314
+
315
+ def add_input_dataset(self, input_dataset: Union[DatasetUrnOrStr, Dataset]) -> None:
316
+ """Add an input dataset to the dashboard."""
317
+ if isinstance(input_dataset, Dataset):
318
+ input_dataset_urn = input_dataset.urn
319
+ else:
320
+ input_dataset_urn = DatasetUrn.from_string(input_dataset)
321
+ props = self._ensure_dashboard_props()
322
+ dataset_edges = props.datasetEdges or []
323
+ existing_urns = [edge.destinationUrn for edge in dataset_edges]
324
+ if str(input_dataset_urn) not in existing_urns:
325
+ dataset_edges.append(
326
+ models.EdgeClass(destinationUrn=str(input_dataset_urn))
327
+ )
328
+ props.datasetEdges = dataset_edges
329
+ self._set_aspect(props)
330
+
331
+ def remove_input_dataset(
332
+ self, input_dataset: Union[DatasetUrnOrStr, Dataset]
333
+ ) -> None:
334
+ """Remove an input dataset from the dashboard."""
335
+ if isinstance(input_dataset, Dataset):
336
+ input_dataset_urn = input_dataset.urn
337
+ else:
338
+ input_dataset_urn = DatasetUrn.from_string(input_dataset)
339
+ props = self._ensure_dashboard_props()
340
+ props.datasetEdges = [
341
+ edge
342
+ for edge in (props.datasetEdges or [])
343
+ if edge.destinationUrn != str(input_dataset_urn)
344
+ ]
345
+ self._set_aspect(props)
346
+
347
+ @property
348
+ def charts(self) -> List[ChartUrn]:
349
+ """Get the charts of the dashboard."""
350
+ chart_edges = self._ensure_dashboard_props().chartEdges
351
+ if chart_edges is None:
352
+ return []
353
+ return [ChartUrn.from_string(edge.destinationUrn) for edge in chart_edges]
354
+
355
+ def set_charts(self, charts: List[Union[ChartUrnOrStr, Chart]]) -> None:
356
+ """Set the charts of the dashboard."""
357
+ props = self._ensure_dashboard_props()
358
+ chart_edges = props.chartEdges or []
359
+ for chart in charts:
360
+ if isinstance(chart, Chart):
361
+ chart_urn = chart.urn
362
+ else:
363
+ chart_urn = ChartUrn.from_string(chart)
364
+ chart_edges.append(models.EdgeClass(destinationUrn=str(chart_urn)))
365
+ props.chartEdges = chart_edges
366
+ self._set_aspect(props)
367
+
368
+ def add_chart(self, chart: Union[ChartUrnOrStr, Chart]) -> None:
369
+ """Add a chart to the dashboard."""
370
+ if isinstance(chart, Chart):
371
+ chart_urn = chart.urn
372
+ else:
373
+ chart_urn = ChartUrn.from_string(chart)
374
+ props = self._ensure_dashboard_props()
375
+ chart_edges = props.chartEdges or []
376
+ existing_urns = [
377
+ edge.destinationUrn
378
+ for edge in chart_edges
379
+ if edge.destinationUrn is not None
380
+ ]
381
+ if str(chart_urn) not in existing_urns:
382
+ chart_edges.append(models.EdgeClass(destinationUrn=str(chart_urn)))
383
+ props.chartEdges = chart_edges
384
+ self._set_aspect(props)
385
+
386
+ def remove_chart(self, chart: Union[ChartUrnOrStr, Chart]) -> None:
387
+ """Remove a chart from the dashboard."""
388
+ if isinstance(chart, Chart):
389
+ chart_urn = chart.urn
390
+ else:
391
+ chart_urn = ChartUrn.from_string(chart)
392
+ props = self._ensure_dashboard_props()
393
+ props.chartEdges = [
394
+ edge
395
+ for edge in (props.chartEdges or [])
396
+ if edge.destinationUrn != str(chart_urn)
397
+ ]
398
+ self._set_aspect(props)
399
+
400
+ @property
401
+ def dashboards(self) -> List[DashboardUrn]:
402
+ """Get the dashboards of the dashboard."""
403
+ props = self._ensure_dashboard_props()
404
+ return [
405
+ DashboardUrn.from_string(dashboard.destinationUrn)
406
+ for dashboard in (props.dashboards or [])
407
+ ]
408
+
409
+ def set_dashboards(
410
+ self, dashboards: List[Union[DashboardUrnOrStr, Dashboard]]
411
+ ) -> None:
412
+ """Set the dashboards of the dashboard."""
413
+ props = self._ensure_dashboard_props()
414
+ for dashboard in dashboards:
415
+ if isinstance(dashboard, Dashboard):
416
+ dashboard_urn = dashboard.urn
417
+ else:
418
+ dashboard_urn = DashboardUrn.from_string(dashboard)
419
+ props.dashboards.append(models.EdgeClass(destinationUrn=str(dashboard_urn)))
420
+ self._set_aspect(props)
421
+
422
+ def add_dashboard(self, dashboard: Union[DashboardUrnOrStr, Dashboard]) -> None:
423
+ """Add a dashboard to the dashboard."""
424
+ if isinstance(dashboard, Dashboard):
425
+ dashboard_urn = dashboard.urn
426
+ else:
427
+ dashboard_urn = DashboardUrn.from_string(dashboard)
428
+ props = self._ensure_dashboard_props()
429
+ dashboards = props.dashboards or []
430
+ existing_urns = [dashboard.destinationUrn for dashboard in dashboards]
431
+ if str(dashboard_urn) not in existing_urns:
432
+ dashboards.append(models.EdgeClass(destinationUrn=str(dashboard_urn)))
datahub/sdk/dataflow.py CHANGED
@@ -21,12 +21,14 @@ from datahub.sdk._shared import (
21
21
  HasInstitutionalMemory,
22
22
  HasOwnership,
23
23
  HasPlatformInstance,
24
+ HasStructuredProperties,
24
25
  HasSubtype,
25
26
  HasTags,
26
27
  HasTerms,
27
28
  LinksInputType,
28
29
  OwnersInputType,
29
30
  ParentContainerInputType,
31
+ StructuredPropertyInputType,
30
32
  TagsInputType,
31
33
  TermsInputType,
32
34
  make_time_stamp,
@@ -45,6 +47,7 @@ class DataFlow(
45
47
  HasTags,
46
48
  HasTerms,
47
49
  HasDomain,
50
+ HasStructuredProperties,
48
51
  Entity,
49
52
  ):
50
53
  """Represents a dataflow in DataHub.
@@ -86,6 +89,7 @@ class DataFlow(
86
89
  terms: Optional[TermsInputType] = None,
87
90
  domain: Optional[DomainInputType] = None,
88
91
  parent_container: ParentContainerInputType | Unset = unset,
92
+ structured_properties: Optional[StructuredPropertyInputType] = None,
89
93
  extra_aspects: ExtraAspectsType = None,
90
94
  ):
91
95
  """Initialize a new Dataflow instance.
@@ -150,6 +154,9 @@ class DataFlow(
150
154
  self.set_domain(domain)
151
155
  if parent_container is not unset:
152
156
  self._set_container(parent_container)
157
+ if structured_properties is not None:
158
+ for key, value in structured_properties.items():
159
+ self.set_structured_property(property_urn=key, values=value)
153
160
 
154
161
  @classmethod
155
162
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
datahub/sdk/datajob.py CHANGED
@@ -6,6 +6,7 @@ from typing import Dict, List, Optional, Type
6
6
 
7
7
  from typing_extensions import Self
8
8
 
9
+ import datahub.emitter.mce_builder as builder
9
10
  import datahub.metadata.schema_classes as models
10
11
  from datahub.cli.cli_utils import first_non_null
11
12
  from datahub.errors import IngestionAttributionWarning
@@ -25,11 +26,13 @@ from datahub.sdk._shared import (
25
26
  HasInstitutionalMemory,
26
27
  HasOwnership,
27
28
  HasPlatformInstance,
29
+ HasStructuredProperties,
28
30
  HasSubtype,
29
31
  HasTags,
30
32
  HasTerms,
31
33
  LinksInputType,
32
34
  OwnersInputType,
35
+ StructuredPropertyInputType,
33
36
  TagsInputType,
34
37
  TermsInputType,
35
38
  make_time_stamp,
@@ -48,6 +51,7 @@ class DataJob(
48
51
  HasTags,
49
52
  HasTerms,
50
53
  HasDomain,
54
+ HasStructuredProperties,
51
55
  Entity,
52
56
  ):
53
57
  """Represents a data job in DataHub.
@@ -61,7 +65,7 @@ class DataJob(
61
65
  """Get the URN type for data jobs."""
62
66
  return DataJobUrn
63
67
 
64
- def __init__(
68
+ def __init__( # noqa: C901
65
69
  self,
66
70
  *,
67
71
  name: str,
@@ -81,9 +85,11 @@ class DataJob(
81
85
  tags: Optional[TagsInputType] = None,
82
86
  terms: Optional[TermsInputType] = None,
83
87
  domain: Optional[DomainInputType] = None,
84
- extra_aspects: ExtraAspectsType = None,
85
88
  inlets: Optional[List[DatasetUrnOrStr]] = None,
86
89
  outlets: Optional[List[DatasetUrnOrStr]] = None,
90
+ fine_grained_lineages: Optional[List[models.FineGrainedLineageClass]] = None,
91
+ structured_properties: Optional[StructuredPropertyInputType] = None,
92
+ extra_aspects: ExtraAspectsType = None,
87
93
  ):
88
94
  """
89
95
  Initialize a DataJob with either a DataFlow or a DataFlowUrn with platform instance.
@@ -99,12 +105,14 @@ class DataJob(
99
105
  ValueError: If neither flow nor (flow_urn and platform_instance) are provided
100
106
  """
101
107
  if flow is None:
102
- if flow_urn is None or platform_instance is None:
108
+ if flow_urn is None:
103
109
  raise ValueError(
104
110
  "You must provide either: 1. a DataFlow object, or 2. a DataFlowUrn (and a platform_instance config if required)"
105
111
  )
106
112
  flow_urn = DataFlowUrn.from_string(flow_urn)
107
- if flow_urn.flow_id.startswith(f"{platform_instance}."):
113
+ if platform_instance and flow_urn.flow_id.startswith(
114
+ f"{platform_instance}."
115
+ ):
108
116
  flow_name = flow_urn.flow_id[len(platform_instance) + 1 :]
109
117
  else:
110
118
  flow_name = flow_urn.flow_id
@@ -129,8 +137,6 @@ class DataJob(
129
137
  )
130
138
  self._setdefault_aspect(job_info)
131
139
  self._ensure_datajob_props().flowUrn = str(flow.urn)
132
-
133
- # Set properties if provided
134
140
  if description is not None:
135
141
  self.set_description(description)
136
142
  if external_url is not None:
@@ -141,8 +147,6 @@ class DataJob(
141
147
  self.set_created(created)
142
148
  if last_modified is not None:
143
149
  self.set_last_modified(last_modified)
144
-
145
- # Set standard aspects
146
150
  if subtype is not None:
147
151
  self.set_subtype(subtype)
148
152
  if owners is not None:
@@ -155,10 +159,19 @@ class DataJob(
155
159
  self.set_terms(terms)
156
160
  if domain is not None:
157
161
  self.set_domain(domain)
162
+ if structured_properties is not None:
163
+ for key, value in structured_properties.items():
164
+ self.set_structured_property(property_urn=key, values=value)
158
165
  if inlets is not None:
159
166
  self.set_inlets(inlets)
160
167
  if outlets is not None:
161
168
  self.set_outlets(outlets)
169
+ if fine_grained_lineages is not None:
170
+ self.set_fine_grained_lineages(fine_grained_lineages)
171
+
172
+ if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
173
+ env = self.flow_urn.cluster.upper()
174
+ self._ensure_datajob_props().env = env
162
175
 
163
176
  @classmethod
164
177
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
@@ -194,9 +207,7 @@ class DataJob(
194
207
  ) -> Optional[models.DataJobInputOutputClass]:
195
208
  return self._get_aspect(models.DataJobInputOutputClass)
196
209
 
197
- def _ensure_datajob_inputoutput_props(
198
- self,
199
- ) -> models.DataJobInputOutputClass:
210
+ def _ensure_datajob_inputoutput_props(self) -> models.DataJobInputOutputClass:
200
211
  return self._setdefault_aspect(
201
212
  models.DataJobInputOutputClass(inputDatasets=[], outputDatasets=[])
202
213
  )
@@ -300,12 +311,11 @@ class DataJob(
300
311
  browse_path.append(
301
312
  models.BrowsePathEntryClass(id=entry.id, urn=entry.urn)
302
313
  )
303
-
304
- # Add the job itself to the path
305
314
  browse_path.append(models.BrowsePathEntryClass(id=flow.name, urn=str(flow.urn)))
306
315
  # Set the browse path aspect
307
316
  self._set_aspect(models.BrowsePathsV2Class(path=browse_path))
308
317
 
318
+ # TODO: support datajob input/output
309
319
  @property
310
320
  def inlets(self) -> List[DatasetUrn]:
311
321
  """Get the inlets of the data job."""
@@ -333,3 +343,25 @@ class DataJob(
333
343
  self._ensure_datajob_inputoutput_props().outputDatasets.append(
334
344
  str(outlet_urn)
335
345
  )
346
+
347
+ @property
348
+ def fine_grained_lineages(self) -> List[models.FineGrainedLineageClass]:
349
+ io_aspect = self._get_datajob_inputoutput_props()
350
+ return (
351
+ io_aspect.fineGrainedLineages
352
+ if io_aspect and io_aspect.fineGrainedLineages
353
+ else []
354
+ )
355
+
356
+ def set_fine_grained_lineages(
357
+ self, lineages: List[models.FineGrainedLineageClass]
358
+ ) -> None:
359
+ io_aspect = self._ensure_datajob_inputoutput_props()
360
+ if io_aspect.fineGrainedLineages is None:
361
+ io_aspect.fineGrainedLineages = []
362
+ io_aspect.fineGrainedLineages.extend(lineages)
363
+
364
+ @property
365
+ def env(self) -> Optional[str]:
366
+ """Get the environment of the data job."""
367
+ return str(self._ensure_datajob_props().env)