acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.2.0.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (223) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/METADATA +2617 -2590
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/RECORD +223 -189
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/entry_points.txt +2 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/dataset/dataset.py +1 -1
  6. datahub/api/entities/external/__init__.py +0 -0
  7. datahub/api/entities/external/external_entities.py +239 -0
  8. datahub/api/entities/external/external_tag.py +145 -0
  9. datahub/api/entities/external/lake_formation_external_entites.py +161 -0
  10. datahub/api/entities/external/restricted_text.py +247 -0
  11. datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
  12. datahub/cli/check_cli.py +88 -7
  13. datahub/cli/cli_utils.py +63 -0
  14. datahub/cli/container_cli.py +5 -0
  15. datahub/cli/delete_cli.py +124 -27
  16. datahub/cli/docker_check.py +107 -12
  17. datahub/cli/docker_cli.py +149 -227
  18. datahub/cli/exists_cli.py +0 -2
  19. datahub/cli/get_cli.py +0 -2
  20. datahub/cli/iceberg_cli.py +5 -0
  21. datahub/cli/ingest_cli.py +3 -15
  22. datahub/cli/migrate.py +2 -0
  23. datahub/cli/put_cli.py +1 -4
  24. datahub/cli/quickstart_versioning.py +50 -7
  25. datahub/cli/specific/assertions_cli.py +0 -4
  26. datahub/cli/specific/datacontract_cli.py +0 -3
  27. datahub/cli/specific/dataproduct_cli.py +0 -11
  28. datahub/cli/specific/dataset_cli.py +1 -8
  29. datahub/cli/specific/forms_cli.py +0 -4
  30. datahub/cli/specific/group_cli.py +0 -2
  31. datahub/cli/specific/structuredproperties_cli.py +1 -4
  32. datahub/cli/specific/user_cli.py +0 -2
  33. datahub/cli/state_cli.py +0 -2
  34. datahub/cli/timeline_cli.py +0 -2
  35. datahub/configuration/pydantic_migration_helpers.py +7 -5
  36. datahub/emitter/rest_emitter.py +70 -12
  37. datahub/entrypoints.py +4 -3
  38. datahub/ingestion/api/decorators.py +15 -3
  39. datahub/ingestion/api/report.py +332 -3
  40. datahub/ingestion/api/sink.py +3 -0
  41. datahub/ingestion/api/source.py +48 -44
  42. datahub/ingestion/autogenerated/__init__.py +0 -0
  43. datahub/ingestion/autogenerated/capability_summary.json +3449 -0
  44. datahub/ingestion/autogenerated/lineage.json +401 -0
  45. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  46. datahub/ingestion/extractor/schema_util.py +13 -4
  47. datahub/ingestion/glossary/classification_mixin.py +5 -0
  48. datahub/ingestion/graph/client.py +100 -15
  49. datahub/ingestion/graph/config.py +1 -0
  50. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
  51. datahub/ingestion/run/pipeline.py +54 -2
  52. datahub/ingestion/sink/datahub_rest.py +13 -0
  53. datahub/ingestion/source/abs/source.py +1 -1
  54. datahub/ingestion/source/aws/aws_common.py +4 -0
  55. datahub/ingestion/source/aws/glue.py +489 -244
  56. datahub/ingestion/source/aws/tag_entities.py +292 -0
  57. datahub/ingestion/source/azure/azure_common.py +2 -2
  58. datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
  59. datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
  60. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
  61. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
  62. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  63. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  64. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  65. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  66. datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
  67. datahub/ingestion/source/common/subtypes.py +45 -0
  68. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  69. datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
  70. datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
  71. datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
  72. datahub/ingestion/source/dbt/dbt_common.py +6 -2
  73. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  74. datahub/ingestion/source/debug/__init__.py +0 -0
  75. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  76. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  77. datahub/ingestion/source/dremio/dremio_config.py +2 -0
  78. datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
  79. datahub/ingestion/source/dremio/dremio_source.py +94 -81
  80. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  81. datahub/ingestion/source/file.py +3 -0
  82. datahub/ingestion/source/fivetran/fivetran.py +34 -26
  83. datahub/ingestion/source/gcs/gcs_source.py +13 -2
  84. datahub/ingestion/source/ge_data_profiler.py +76 -28
  85. datahub/ingestion/source/ge_profiling_config.py +11 -0
  86. datahub/ingestion/source/hex/api.py +26 -1
  87. datahub/ingestion/source/iceberg/iceberg.py +3 -1
  88. datahub/ingestion/source/identity/azure_ad.py +1 -1
  89. datahub/ingestion/source/identity/okta.py +1 -14
  90. datahub/ingestion/source/kafka/kafka.py +16 -0
  91. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  92. datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
  93. datahub/ingestion/source/looker/looker_source.py +1 -0
  94. datahub/ingestion/source/mlflow.py +11 -1
  95. datahub/ingestion/source/mock_data/__init__.py +0 -0
  96. datahub/ingestion/source/mock_data/datahub_mock_data.py +507 -0
  97. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  98. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  99. datahub/ingestion/source/nifi.py +1 -1
  100. datahub/ingestion/source/powerbi/powerbi.py +1 -5
  101. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  102. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  103. datahub/ingestion/source/preset.py +2 -2
  104. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
  105. datahub/ingestion/source/redshift/redshift.py +21 -1
  106. datahub/ingestion/source/redshift/usage.py +4 -3
  107. datahub/ingestion/source/s3/report.py +4 -2
  108. datahub/ingestion/source/s3/source.py +367 -115
  109. datahub/ingestion/source/sac/sac.py +3 -1
  110. datahub/ingestion/source/salesforce.py +6 -3
  111. datahub/ingestion/source/sigma/sigma.py +7 -1
  112. datahub/ingestion/source/slack/slack.py +2 -1
  113. datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
  114. datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
  115. datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
  116. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  117. datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
  118. datahub/ingestion/source/snowflake/snowflake_v2.py +16 -2
  119. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  120. datahub/ingestion/source/sql/athena.py +119 -11
  121. datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
  122. datahub/ingestion/source/sql/clickhouse.py +3 -1
  123. datahub/ingestion/source/sql/cockroachdb.py +0 -1
  124. datahub/ingestion/source/sql/hana.py +3 -1
  125. datahub/ingestion/source/sql/hive_metastore.py +3 -11
  126. datahub/ingestion/source/sql/mariadb.py +0 -1
  127. datahub/ingestion/source/sql/mssql/source.py +239 -34
  128. datahub/ingestion/source/sql/mysql.py +0 -1
  129. datahub/ingestion/source/sql/oracle.py +1 -1
  130. datahub/ingestion/source/sql/postgres.py +0 -1
  131. datahub/ingestion/source/sql/sql_common.py +121 -34
  132. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  133. datahub/ingestion/source/sql/teradata.py +997 -235
  134. datahub/ingestion/source/sql/vertica.py +10 -6
  135. datahub/ingestion/source/sql_queries.py +2 -2
  136. datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
  137. datahub/ingestion/source/superset.py +58 -3
  138. datahub/ingestion/source/tableau/tableau.py +58 -37
  139. datahub/ingestion/source/tableau/tableau_common.py +4 -2
  140. datahub/ingestion/source/tableau/tableau_constant.py +0 -4
  141. datahub/ingestion/source/unity/config.py +5 -0
  142. datahub/ingestion/source/unity/proxy.py +118 -0
  143. datahub/ingestion/source/unity/source.py +195 -17
  144. datahub/ingestion/source/unity/tag_entities.py +295 -0
  145. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  146. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
  147. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  148. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  149. datahub/metadata/_internal_schema_classes.py +1522 -569
  150. datahub/metadata/_urns/urn_defs.py +1826 -1658
  151. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  152. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  153. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  154. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  155. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +29 -0
  156. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
  157. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
  158. datahub/metadata/schema.avsc +17758 -17097
  159. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  160. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  161. datahub/metadata/schemas/Applications.avsc +38 -0
  162. datahub/metadata/schemas/ChartKey.avsc +1 -0
  163. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  164. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  165. datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
  166. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  167. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  168. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  169. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  170. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +237 -0
  171. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  172. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
  173. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  174. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  175. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  176. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  177. datahub/metadata/schemas/DataProductKey.avsc +1 -0
  178. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  179. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  180. datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
  181. datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
  182. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  183. datahub/metadata/schemas/LogicalParent.avsc +140 -0
  184. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  185. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  186. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  187. datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
  188. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  189. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  190. datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
  191. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  192. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  193. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  194. datahub/metadata/schemas/__init__.py +3 -3
  195. datahub/sdk/__init__.py +2 -0
  196. datahub/sdk/_all_entities.py +7 -0
  197. datahub/sdk/_shared.py +116 -0
  198. datahub/sdk/chart.py +315 -0
  199. datahub/sdk/container.py +7 -0
  200. datahub/sdk/dashboard.py +432 -0
  201. datahub/sdk/dataflow.py +7 -0
  202. datahub/sdk/datajob.py +45 -13
  203. datahub/sdk/dataset.py +8 -2
  204. datahub/sdk/entity_client.py +82 -2
  205. datahub/sdk/lineage_client.py +683 -82
  206. datahub/sdk/main_client.py +46 -16
  207. datahub/sdk/mlmodel.py +101 -38
  208. datahub/sdk/mlmodelgroup.py +7 -0
  209. datahub/sdk/search_client.py +4 -3
  210. datahub/sdk/search_filters.py +95 -27
  211. datahub/specific/chart.py +1 -1
  212. datahub/specific/dataproduct.py +4 -0
  213. datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
  214. datahub/sql_parsing/sqlglot_lineage.py +62 -13
  215. datahub/telemetry/telemetry.py +17 -11
  216. datahub/testing/sdk_v2_helpers.py +7 -1
  217. datahub/upgrade/upgrade.py +56 -14
  218. datahub/utilities/server_config_util.py +8 -0
  219. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  220. datahub/utilities/stats_collections.py +4 -0
  221. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/WHEEL +0 -0
  222. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/licenses/LICENSE +0 -0
  223. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,7 @@
8
8
  "glossaryTerms",
9
9
  "editableMlModelProperties",
10
10
  "domains",
11
+ "applications",
11
12
  "ownership",
12
13
  "mlModelProperties",
13
14
  "intendedUse",
@@ -74,13 +75,17 @@
74
75
  "DEV": "Designates development fabrics",
75
76
  "EI": "Designates early-integration fabrics",
76
77
  "NON_PROD": "Designates non-production fabrics",
78
+ "PRD": "Alternative Prod spelling",
77
79
  "PRE": "Designates pre-production fabrics",
78
80
  "PROD": "Designates production fabrics",
79
81
  "QA": "Designates quality assurance fabrics",
80
82
  "RVW": "Designates review fabrics",
81
83
  "SANDBOX": "Designates sandbox fabrics",
84
+ "SBX": "Alternative spelling for sandbox",
85
+ "SIT": "System Integration Testing",
82
86
  "STG": "Designates staging fabrics",
83
87
  "TEST": "Designates testing fabrics",
88
+ "TST": "Alternative Test spelling",
84
89
  "UAT": "Designates user acceptance testing fabrics"
85
90
  },
86
91
  "name": "FabricType",
@@ -97,6 +102,10 @@
97
102
  "PROD",
98
103
  "CORP",
99
104
  "RVW",
105
+ "PRD",
106
+ "TST",
107
+ "SIT",
108
+ "SBX",
100
109
  "SANDBOX"
101
110
  ],
102
111
  "doc": "Fabric group type"
@@ -8,6 +8,7 @@
8
8
  "glossaryTerms",
9
9
  "editableMlPrimaryKeyProperties",
10
10
  "domains",
11
+ "applications",
11
12
  "mlPrimaryKeyProperties",
12
13
  "ownership",
13
14
  "institutionalMemory",
@@ -2424,13 +2424,17 @@
2424
2424
  "DEV": "Designates development fabrics",
2425
2425
  "EI": "Designates early-integration fabrics",
2426
2426
  "NON_PROD": "Designates non-production fabrics",
2427
+ "PRD": "Alternative Prod spelling",
2427
2428
  "PRE": "Designates pre-production fabrics",
2428
2429
  "PROD": "Designates production fabrics",
2429
2430
  "QA": "Designates quality assurance fabrics",
2430
2431
  "RVW": "Designates review fabrics",
2431
2432
  "SANDBOX": "Designates sandbox fabrics",
2433
+ "SBX": "Alternative spelling for sandbox",
2434
+ "SIT": "System Integration Testing",
2432
2435
  "STG": "Designates staging fabrics",
2433
2436
  "TEST": "Designates testing fabrics",
2437
+ "TST": "Alternative Test spelling",
2434
2438
  "UAT": "Designates user acceptance testing fabrics"
2435
2439
  },
2436
2440
  "name": "FabricType",
@@ -2447,6 +2451,10 @@
2447
2451
  "PROD",
2448
2452
  "CORP",
2449
2453
  "RVW",
2454
+ "PRD",
2455
+ "TST",
2456
+ "SIT",
2457
+ "SBX",
2450
2458
  "SANDBOX"
2451
2459
  ],
2452
2460
  "doc": "Fabric group type"
@@ -7733,13 +7741,15 @@
7733
7741
  "type": "enum",
7734
7742
  "symbolDocs": {
7735
7743
  "EQUALS": "Whether the field matches the value",
7744
+ "NOT_EQUALS": "Whether the field does not match the value",
7736
7745
  "STARTS_WITH": "Whether the field value starts with the value"
7737
7746
  },
7738
7747
  "name": "PolicyMatchCondition",
7739
7748
  "namespace": "com.linkedin.pegasus2avro.policy",
7740
7749
  "symbols": [
7741
7750
  "EQUALS",
7742
- "STARTS_WITH"
7751
+ "STARTS_WITH",
7752
+ "NOT_EQUALS"
7743
7753
  ],
7744
7754
  "doc": "The matching condition in a filter criterion"
7745
7755
  },
@@ -7761,6 +7771,15 @@
7761
7771
  "name": "filter",
7762
7772
  "default": null,
7763
7773
  "doc": "Filter to apply privileges to"
7774
+ },
7775
+ {
7776
+ "type": [
7777
+ "null",
7778
+ "com.linkedin.pegasus2avro.policy.PolicyMatchFilter"
7779
+ ],
7780
+ "name": "privilegeConstraints",
7781
+ "default": null,
7782
+ "doc": "Constraints around what sub-resources operations are allowed to modify, i.e. NOT_EQUALS - cannot modify a particular defined tag, EQUALS - can only modify a particular defined tag, STARTS_WITH - can only modify a tag starting with xyz"
7764
7783
  }
7765
7784
  ],
7766
7785
  "doc": "Information used to filter DataHub resource."
@@ -15,6 +15,7 @@
15
15
  "browsePaths",
16
16
  "institutionalMemory",
17
17
  "domains",
18
+ "applications",
18
19
  "subTypes",
19
20
  "dataPlatformInstance",
20
21
  "browsePathsV2",
@@ -15,13 +15,6 @@
15
15
  "namespace": "com.linkedin.pegasus2avro.query",
16
16
  "fields": [
17
17
  {
18
- "Relationship": {
19
- "entityTypes": [
20
- "dataset",
21
- "schemaField"
22
- ],
23
- "name": "IsAssociatedWith"
24
- },
25
18
  "Searchable": {
26
19
  "fieldName": "entities",
27
20
  "fieldType": "URN"
@@ -32,11 +25,7 @@
32
25
  "type": "string",
33
26
  "name": "entity",
34
27
  "doc": "An entity which is the subject of a query.",
35
- "Urn": "Urn",
36
- "entityTypes": [
37
- "dataset",
38
- "schemaField"
39
- ]
28
+ "Urn": "Urn"
40
29
  }
41
30
  ],
42
31
  "doc": "A single subject of a particular query.\nIn the future, we may evolve this model to include richer details\nabout the Query Subject in relation to the query."
@@ -14,7 +14,8 @@
14
14
  "documentation",
15
15
  "testResults",
16
16
  "deprecation",
17
- "subTypes"
17
+ "subTypes",
18
+ "logicalParent"
18
19
  ]
19
20
  },
20
21
  "name": "SchemaFieldKey",
@@ -15,10 +15,10 @@ import pathlib
15
15
  def _load_schema(schema_name: str) -> str:
16
16
  return (pathlib.Path(__file__).parent / f"{schema_name}.avsc").read_text()
17
17
 
18
- def getMetadataChangeEventSchema() -> str:
19
- return _load_schema("MetadataChangeEvent")
20
-
21
18
  def getMetadataChangeProposalSchema() -> str:
22
19
  return _load_schema("MetadataChangeProposal")
23
20
 
21
+ def getMetadataChangeEventSchema() -> str:
22
+ return _load_schema("MetadataChangeEvent")
23
+
24
24
  # fmt: on
datahub/sdk/__init__.py CHANGED
@@ -18,7 +18,9 @@ from datahub.metadata.urns import (
18
18
  SchemaFieldUrn,
19
19
  TagUrn,
20
20
  )
21
+ from datahub.sdk.chart import Chart
21
22
  from datahub.sdk.container import Container
23
+ from datahub.sdk.dashboard import Dashboard
22
24
  from datahub.sdk.dataflow import DataFlow
23
25
  from datahub.sdk.datajob import DataJob
24
26
  from datahub.sdk.dataset import Dataset
@@ -1,6 +1,8 @@
1
1
  from typing import Dict, List, Type
2
2
 
3
+ from datahub.sdk.chart import Chart
3
4
  from datahub.sdk.container import Container
5
+ from datahub.sdk.dashboard import Dashboard
4
6
  from datahub.sdk.dataflow import DataFlow
5
7
  from datahub.sdk.datajob import DataJob
6
8
  from datahub.sdk.dataset import Dataset
@@ -8,6 +10,8 @@ from datahub.sdk.entity import Entity
8
10
  from datahub.sdk.mlmodel import MLModel
9
11
  from datahub.sdk.mlmodelgroup import MLModelGroup
10
12
 
13
+ # Base entity classes that don't have circular dependencies
14
+ # Those that do are imported in the EntityClient where needed
11
15
  # TODO: Is there a better way to declare this?
12
16
  ENTITY_CLASSES_LIST: List[Type[Entity]] = [
13
17
  Container,
@@ -16,8 +20,11 @@ ENTITY_CLASSES_LIST: List[Type[Entity]] = [
16
20
  MLModelGroup,
17
21
  DataFlow,
18
22
  DataJob,
23
+ Dashboard,
24
+ Chart,
19
25
  ]
20
26
 
27
+ # Create the mapping of entity types to classes
21
28
  ENTITY_CLASSES: Dict[str, Type[Entity]] = {
22
29
  cls.get_urn_type().ENTITY_TYPE: cls for cls in ENTITY_CLASSES_LIST
23
30
  }
datahub/sdk/_shared.py CHANGED
@@ -26,9 +26,11 @@ from datahub.emitter.mce_builder import (
26
26
  from datahub.emitter.mcp_builder import ContainerKey
27
27
  from datahub.errors import MultipleSubtypesWarning, SdkUsageError
28
28
  from datahub.metadata.urns import (
29
+ ChartUrn,
29
30
  ContainerUrn,
30
31
  CorpGroupUrn,
31
32
  CorpUserUrn,
33
+ DashboardUrn,
32
34
  DataFlowUrn,
33
35
  DataJobUrn,
34
36
  DataPlatformInstanceUrn,
@@ -38,6 +40,7 @@ from datahub.metadata.urns import (
38
40
  DomainUrn,
39
41
  GlossaryTermUrn,
40
42
  OwnershipTypeUrn,
43
+ StructuredPropertyUrn,
41
44
  TagUrn,
42
45
  Urn,
43
46
  VersionSetUrn,
@@ -49,11 +52,20 @@ from datahub.utilities.urns.error import InvalidUrnError
49
52
  if TYPE_CHECKING:
50
53
  from datahub.sdk.container import Container
51
54
  UrnOrStr: TypeAlias = Union[Urn, str]
55
+ ChartUrnOrStr: TypeAlias = Union[str, ChartUrn]
52
56
  DatasetUrnOrStr: TypeAlias = Union[str, DatasetUrn]
53
57
  DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
54
58
  DataflowUrnOrStr: TypeAlias = Union[str, DataFlowUrn]
59
+ DashboardUrnOrStr: TypeAlias = Union[str, DashboardUrn]
60
+ DataPlatformInstanceUrnOrStr: TypeAlias = Union[str, DataPlatformInstanceUrn]
61
+ DataPlatformUrnOrStr: TypeAlias = Union[str, DataPlatformUrn]
55
62
 
56
63
  ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
64
+ StructuredPropertyUrnOrStr: TypeAlias = Union[str, StructuredPropertyUrn]
65
+ StructuredPropertyValueType: TypeAlias = Union[str, float, int]
66
+ StructuredPropertyInputType: TypeAlias = Dict[
67
+ StructuredPropertyUrnOrStr, Sequence[StructuredPropertyValueType]
68
+ ]
57
69
 
58
70
  TrainingMetricsInputType: TypeAlias = Union[
59
71
  List[models.MLMetricClass], Dict[str, Optional[str]]
@@ -717,3 +729,107 @@ class HasVersion(Entity):
717
729
  a for a in version_props.aliases if a.versionTag != alias
718
730
  ]
719
731
  self._set_aspect(version_props)
732
+
733
+
734
+ class HasStructuredProperties(Entity):
735
+ """
736
+ Mixin for entities that support structured properties
737
+ """
738
+
739
+ __slots__ = ()
740
+
741
+ @property
742
+ def structured_properties(
743
+ self,
744
+ ) -> Optional[List[models.StructuredPropertyValueAssignmentClass]]:
745
+ """
746
+ Retrieve structured properties for the entity
747
+
748
+ Returns:
749
+ Optional list of structured property value assignments
750
+ """
751
+ sp_aspect = self._get_aspect(models.StructuredPropertiesClass)
752
+ return sp_aspect.properties if sp_aspect else None
753
+
754
+ def _ensure_structured_properties(self) -> models.StructuredPropertiesClass:
755
+ """
756
+ Ensure structured properties aspect exists, creating it if necessary
757
+
758
+ Returns:
759
+ StructuredPropertiesClass aspect
760
+ """
761
+ return self._setdefault_aspect(models.StructuredPropertiesClass(properties=[]))
762
+
763
+ def set_structured_property(
764
+ self,
765
+ property_urn: StructuredPropertyUrnOrStr,
766
+ values: Sequence[StructuredPropertyValueType],
767
+ ) -> None:
768
+ """
769
+ Update an existing structured property or add if it doesn't exist
770
+
771
+ Args:
772
+ property_urn: URN of the structured property
773
+ values: List of values for the property
774
+ """
775
+ # validate property_urn is a valid structured property urn
776
+ property_urn = StructuredPropertyUrn.from_string(property_urn)
777
+
778
+ properties = self._ensure_structured_properties()
779
+
780
+ # Find existing property assignment
781
+ existing_prop = next(
782
+ (
783
+ prop
784
+ for prop in properties.properties
785
+ if prop.propertyUrn == str(property_urn)
786
+ ),
787
+ None,
788
+ )
789
+ current_timestamp = make_ts_millis(datetime.now())
790
+
791
+ if existing_prop:
792
+ # Update existing property
793
+ existing_prop.values = list(values)
794
+ existing_prop.lastModified = models.AuditStampClass(
795
+ time=current_timestamp,
796
+ actor=DEFAULT_ACTOR_URN,
797
+ )
798
+ else:
799
+ # Create new property assignment
800
+ new_property = models.StructuredPropertyValueAssignmentClass(
801
+ propertyUrn=str(property_urn),
802
+ values=list(values),
803
+ created=models.AuditStampClass(
804
+ time=current_timestamp,
805
+ actor=DEFAULT_ACTOR_URN,
806
+ ),
807
+ lastModified=models.AuditStampClass(
808
+ time=current_timestamp,
809
+ actor=DEFAULT_ACTOR_URN,
810
+ ),
811
+ )
812
+ add_list_unique(
813
+ properties.properties,
814
+ key=lambda prop: prop.propertyUrn,
815
+ item=new_property,
816
+ )
817
+
818
+ self._set_aspect(properties)
819
+
820
+ def remove_structured_property(
821
+ self, property_urn: StructuredPropertyUrnOrStr
822
+ ) -> None:
823
+ """
824
+ Remove a structured property from the entity
825
+
826
+ Args:
827
+ property_urn: URN of the structured property to remove
828
+ """
829
+ remove_list_unique(
830
+ self._ensure_structured_properties().properties,
831
+ key=lambda prop: prop.propertyUrn,
832
+ item=models.StructuredPropertyValueAssignmentClass(
833
+ propertyUrn=str(property_urn), values=[]
834
+ ),
835
+ )
datahub/sdk/chart.py ADDED
@@ -0,0 +1,315 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Dict, List, Optional, Type, Union
5
+
6
+ from typing_extensions import Self
7
+
8
+ import datahub.metadata.schema_classes as models
9
+ from datahub.emitter.enum_helpers import get_enum_options
10
+ from datahub.metadata.urns import ChartUrn, DatasetUrn, Urn
11
+ from datahub.sdk._shared import (
12
+ DataPlatformInstanceUrnOrStr,
13
+ DataPlatformUrnOrStr,
14
+ DatasetUrnOrStr,
15
+ DomainInputType,
16
+ HasContainer,
17
+ HasDomain,
18
+ HasInstitutionalMemory,
19
+ HasOwnership,
20
+ HasPlatformInstance,
21
+ HasSubtype,
22
+ HasTags,
23
+ HasTerms,
24
+ LinksInputType,
25
+ OwnersInputType,
26
+ TagsInputType,
27
+ TermsInputType,
28
+ )
29
+ from datahub.sdk.dataset import Dataset
30
+ from datahub.sdk.entity import Entity, ExtraAspectsType
31
+
32
+
33
+ class Chart(
34
+ HasPlatformInstance,
35
+ HasSubtype,
36
+ HasOwnership,
37
+ HasContainer,
38
+ HasInstitutionalMemory,
39
+ HasTags,
40
+ HasTerms,
41
+ HasDomain,
42
+ Entity,
43
+ ):
44
+ """Represents a chart in DataHub."""
45
+
46
+ __slots__ = ()
47
+
48
+ @classmethod
49
+ def get_urn_type(cls) -> Type[ChartUrn]:
50
+ """Get the URN type for charts.
51
+ Returns:
52
+ The ChartUrn class.
53
+ """
54
+ return ChartUrn
55
+
56
+ def __init__(
57
+ self,
58
+ *,
59
+ # Identity.
60
+ name: str,
61
+ platform: DataPlatformUrnOrStr,
62
+ display_name: Optional[str] = None,
63
+ platform_instance: Optional[DataPlatformInstanceUrnOrStr] = None,
64
+ # Chart properties.
65
+ description: Optional[str] = "",
66
+ external_url: Optional[str] = None,
67
+ chart_url: Optional[str] = None,
68
+ custom_properties: Optional[Dict[str, str]] = None,
69
+ last_modified: Optional[datetime] = None,
70
+ last_refreshed: Optional[datetime] = None,
71
+ chart_type: Optional[Union[str, models.ChartTypeClass]] = None,
72
+ access: Optional[str] = None,
73
+ # Standard aspects.
74
+ subtype: Optional[str] = None,
75
+ owners: Optional[OwnersInputType] = None,
76
+ links: Optional[LinksInputType] = None,
77
+ tags: Optional[TagsInputType] = None,
78
+ terms: Optional[TermsInputType] = None,
79
+ domain: Optional[DomainInputType] = None,
80
+ input_datasets: Optional[List[Union[DatasetUrnOrStr, Dataset]]] = None,
81
+ extra_aspects: ExtraAspectsType = None,
82
+ ):
83
+ """Initialize a new Chart instance."""
84
+ urn = ChartUrn.create_from_ids(
85
+ platform=str(platform),
86
+ name=name,
87
+ platform_instance=str(platform_instance) if platform_instance else None,
88
+ )
89
+ super().__init__(urn)
90
+ self._set_extra_aspects(extra_aspects)
91
+
92
+ self._set_platform_instance(platform, platform_instance)
93
+
94
+ # Set additional properties
95
+ if external_url is not None:
96
+ self.set_external_url(external_url)
97
+ if chart_url is not None:
98
+ self.set_chart_url(chart_url)
99
+ if custom_properties is not None:
100
+ self.set_custom_properties(custom_properties)
101
+ if last_refreshed is not None:
102
+ self.set_last_refreshed(last_refreshed)
103
+ if chart_type is not None:
104
+ self.set_chart_type(chart_type)
105
+ if access is not None:
106
+ self.set_access(access)
107
+ if subtype is not None:
108
+ self.set_subtype(subtype)
109
+ if owners is not None:
110
+ self.set_owners(owners)
111
+ if links is not None:
112
+ self.set_links(links)
113
+ if tags is not None:
114
+ self.set_tags(tags)
115
+ if terms is not None:
116
+ self.set_terms(terms)
117
+ if domain is not None:
118
+ self.set_domain(domain)
119
+ if last_modified is not None:
120
+ self.set_last_modified(last_modified)
121
+ if input_datasets is not None:
122
+ self.set_input_datasets(input_datasets)
123
+ if description is not None:
124
+ self.set_description(description)
125
+ if display_name is not None:
126
+ self.set_display_name(display_name)
127
+
128
+ @classmethod
129
+ def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
130
+ assert isinstance(urn, ChartUrn)
131
+ entity = cls(
132
+ platform=urn.dashboard_tool,
133
+ name=urn.chart_id,
134
+ )
135
+ return entity._init_from_graph(current_aspects)
136
+
137
+ @property
138
+ def urn(self) -> ChartUrn:
139
+ assert isinstance(self._urn, ChartUrn)
140
+ return self._urn
141
+
142
+ def _ensure_chart_props(self) -> models.ChartInfoClass:
143
+ """Ensure chart properties exist, using a safer approach."""
144
+ return self._setdefault_aspect(
145
+ models.ChartInfoClass(
146
+ title=self.urn.chart_id,
147
+ description="",
148
+ lastModified=models.ChangeAuditStampsClass(),
149
+ )
150
+ )
151
+
152
+ @property
153
+ def name(self) -> str:
154
+ """Get the name of the chart."""
155
+ return self.urn.chart_id
156
+
157
+ @property
158
+ def title(self) -> str:
159
+ """Get the title of the chart."""
160
+ return self._ensure_chart_props().title
161
+
162
+ def set_title(self, title: str) -> None:
163
+ """Set the title of the chart."""
164
+ self._ensure_chart_props().title = title
165
+
166
+ @property
167
+ def description(self) -> Optional[str]:
168
+ """Get the description of the chart."""
169
+ return self._ensure_chart_props().description
170
+
171
+ def set_description(self, description: str) -> None:
172
+ """Set the description of the chart."""
173
+ self._ensure_chart_props().description = description
174
+
175
+ @property
176
+ def display_name(self) -> Optional[str]:
177
+ """Get the display name of the chart."""
178
+ return self.title
179
+
180
+ def set_display_name(self, display_name: str) -> None:
181
+ """Set the display name of the chart."""
182
+ self.set_title(display_name)
183
+
184
+ @property
185
+ def external_url(self) -> Optional[str]:
186
+ """Get the external URL of the chart."""
187
+ return self._ensure_chart_props().externalUrl
188
+
189
+ def set_external_url(self, external_url: str) -> None:
190
+ """Set the external URL of the chart."""
191
+ self._ensure_chart_props().externalUrl = external_url
192
+
193
+ @property
194
+ def chart_url(self) -> Optional[str]:
195
+ """Get the chart URL."""
196
+ return self._ensure_chart_props().chartUrl
197
+
198
+ def set_chart_url(self, chart_url: str) -> None:
199
+ """Set the chart URL."""
200
+ self._ensure_chart_props().chartUrl = chart_url
201
+
202
+ @property
203
+ def custom_properties(self) -> Dict[str, str]:
204
+ """Get the custom properties of the chart."""
205
+ return self._ensure_chart_props().customProperties
206
+
207
+ def set_custom_properties(self, custom_properties: Dict[str, str]) -> None:
208
+ """Set the custom properties of the chart."""
209
+ self._ensure_chart_props().customProperties = custom_properties
210
+
211
+ @property
212
+ def last_modified(self) -> Optional[datetime]:
213
+ """Get the last modification timestamp of the chart."""
214
+ last_modified_time = self._ensure_chart_props().lastModified.lastModified.time
215
+ if not last_modified_time:
216
+ return None
217
+ return datetime.fromtimestamp(last_modified_time)
218
+
219
+ def set_last_modified(self, last_modified: datetime) -> None:
220
+ """Set the last modification timestamp of the chart."""
221
+ chart_props = self._ensure_chart_props()
222
+ chart_props.lastModified = models.ChangeAuditStampsClass(
223
+ lastModified=models.AuditStampClass(
224
+ time=int(last_modified.timestamp()),
225
+ actor="urn:li:corpuser:datahub",
226
+ )
227
+ )
228
+
229
+ @property
230
+ def last_refreshed(self) -> Optional[datetime]:
231
+ """Get the last refresh timestamp of the chart."""
232
+ last_refreshed_time = self._ensure_chart_props().lastRefreshed
233
+ return (
234
+ datetime.fromtimestamp(last_refreshed_time)
235
+ if last_refreshed_time is not None
236
+ else None
237
+ )
238
+
239
+ def set_last_refreshed(self, last_refreshed: datetime) -> None:
240
+ """Set the last refresh timestamp of the chart."""
241
+ chart_props = self._ensure_chart_props()
242
+ chart_props.lastRefreshed = int(last_refreshed.timestamp())
243
+
244
+ @property
245
+ def chart_type(self) -> Optional[str]:
246
+ """Get the type of the chart as a string."""
247
+ chart_type = self._ensure_chart_props().type
248
+ return str(chart_type) if chart_type is not None else None
249
+
250
+ def set_chart_type(self, chart_type: Union[str, models.ChartTypeClass]) -> None:
251
+ """Set the type of the chart."""
252
+ if isinstance(chart_type, str):
253
+ assert chart_type in get_enum_options(models.ChartTypeClass), (
254
+ f"Invalid chart type: {chart_type}"
255
+ )
256
+ self._ensure_chart_props().type = chart_type
257
+
258
+ @property
259
+ def access(self) -> Optional[str]:
260
+ """Get the access level of the chart as a string."""
261
+ access = self._ensure_chart_props().access
262
+ return str(access) if access is not None else None
263
+
264
+ def set_access(self, access: Union[str, models.AccessLevelClass]) -> None:
265
+ """Set the access level of the chart."""
266
+ if isinstance(access, str):
267
+ assert access in get_enum_options(models.AccessLevelClass), (
268
+ f"Invalid access level: {access}"
269
+ )
270
+ self._ensure_chart_props().access = access
271
+
272
+ @property
273
+ def input_datasets(self) -> List[DatasetUrn]:
274
+ """Get the input datasets of the chart."""
275
+ props = self._ensure_chart_props()
276
+ # Convert all inputs to DatasetUrn
277
+ return [DatasetUrn.from_string(input_urn) for input_urn in (props.inputs or [])]
278
+
279
+ def set_input_datasets(
280
+ self, input_datasets: List[Union[DatasetUrnOrStr, Dataset]]
281
+ ) -> None:
282
+ """Set the input datasets of the chart."""
283
+ # Convert all inputs to strings
284
+ inputs = []
285
+ for input_dataset in input_datasets:
286
+ if isinstance(input_dataset, Dataset):
287
+ inputs.append(str(input_dataset.urn))
288
+ else:
289
+ inputs.append(str(input_dataset))
290
+ self._ensure_chart_props().inputs = inputs
291
+
292
+ def add_input_dataset(self, input_dataset: Union[DatasetUrnOrStr, Dataset]) -> None:
293
+ """Add an input to the chart."""
294
+ if isinstance(input_dataset, Dataset):
295
+ input_dataset_urn = input_dataset.urn
296
+ elif isinstance(input_dataset, str):
297
+ input_dataset_urn = DatasetUrn.from_string(input_dataset)
298
+ else: # isinstance(input_dataset, DatasetUrn)
299
+ input_dataset_urn = input_dataset
300
+
301
+ chart_props = self._ensure_chart_props()
302
+ inputs = chart_props.inputs or []
303
+ if str(input_dataset_urn) not in inputs:
304
+ inputs.append(str(input_dataset_urn))
305
+ chart_props.inputs = inputs
306
+
307
+ def remove_input_dataset(
308
+ self, input_dataset: Union[DatasetUrnOrStr, Dataset]
309
+ ) -> None:
310
+ """Remove an input from the chart."""
311
+ chart_props = self._ensure_chart_props()
312
+ inputs = chart_props.inputs or []
313
+ if input_dataset in inputs:
314
+ inputs.remove(str(input_dataset))
315
+ chart_props.inputs = inputs