acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,453 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Dict, List, Optional, Sequence, Type, Union
5
+
6
+ from deprecated.sphinx import deprecated
7
+ from typing_extensions import Self
8
+
9
+ import datahub.metadata.schema_classes as models
10
+ from datahub.metadata.urns import ChartUrn, DashboardUrn, DatasetUrn, Urn
11
+ from datahub.sdk._shared import (
12
+ ActorUrnOrStr,
13
+ ChangeAuditStampsMixin,
14
+ ChartUrnOrStr,
15
+ DashboardUrnOrStr,
16
+ DataPlatformInstanceUrnOrStr,
17
+ DataPlatformUrnOrStr,
18
+ DatasetUrnOrStr,
19
+ DomainInputType,
20
+ HasContainer,
21
+ HasDomain,
22
+ HasInstitutionalMemory,
23
+ HasOwnership,
24
+ HasPlatformInstance,
25
+ HasSubtype,
26
+ HasTags,
27
+ HasTerms,
28
+ LinksInputType,
29
+ OwnersInputType,
30
+ ParentContainerInputType,
31
+ TagsInputType,
32
+ TermsInputType,
33
+ )
34
+ from datahub.sdk.chart import Chart
35
+ from datahub.sdk.dataset import Dataset
36
+ from datahub.sdk.entity import Entity, ExtraAspectsType
37
+ from datahub.utilities.sentinels import Unset, unset
38
+
39
+
40
+ class Dashboard(
41
+ ChangeAuditStampsMixin,
42
+ HasPlatformInstance,
43
+ HasSubtype,
44
+ HasOwnership,
45
+ HasContainer,
46
+ HasInstitutionalMemory,
47
+ HasTags,
48
+ HasTerms,
49
+ HasDomain,
50
+ Entity,
51
+ ):
52
+ """Represents a dashboard in DataHub."""
53
+
54
+ __slots__ = ()
55
+
56
+ @classmethod
57
+ def get_urn_type(cls) -> Type[DashboardUrn]:
58
+ """Get the URN type for dashboards.
59
+ Returns:
60
+ The DashboardUrn class.
61
+ """
62
+ return DashboardUrn
63
+
64
+ def __init__(
65
+ self,
66
+ *,
67
+ # Identity.
68
+ name: str,
69
+ platform: DataPlatformUrnOrStr,
70
+ display_name: Optional[str] = None,
71
+ platform_instance: Optional[DataPlatformInstanceUrnOrStr] = None,
72
+ # Dashboard properties.
73
+ description: Optional[str] = None,
74
+ external_url: Optional[str] = None,
75
+ dashboard_url: Optional[str] = None,
76
+ custom_properties: Optional[Dict[str, str]] = None,
77
+ last_modified: Optional[datetime] = None,
78
+ last_modified_by: Optional[ActorUrnOrStr] = None,
79
+ created_at: Optional[datetime] = None,
80
+ created_by: Optional[ActorUrnOrStr] = None,
81
+ deleted_on: Optional[datetime] = None,
82
+ deleted_by: Optional[ActorUrnOrStr] = None,
83
+ last_refreshed: Optional[datetime] = None,
84
+ input_datasets: Optional[Sequence[Union[DatasetUrnOrStr, Dataset]]] = None,
85
+ charts: Optional[Sequence[Union[ChartUrnOrStr, Chart]]] = None,
86
+ dashboards: Optional[Sequence[Union[DashboardUrnOrStr, Dashboard]]] = None,
87
+ # Standard aspects.
88
+ parent_container: ParentContainerInputType | Unset = unset,
89
+ subtype: Optional[str] = None,
90
+ owners: Optional[OwnersInputType] = None,
91
+ links: Optional[LinksInputType] = None,
92
+ tags: Optional[TagsInputType] = None,
93
+ terms: Optional[TermsInputType] = None,
94
+ domain: Optional[DomainInputType] = None,
95
+ extra_aspects: ExtraAspectsType = None,
96
+ ):
97
+ """Initialize a new Dashboard instance."""
98
+ urn = DashboardUrn.create_from_ids(
99
+ platform=str(platform),
100
+ name=name,
101
+ platform_instance=str(platform_instance) if platform_instance else None,
102
+ )
103
+ super().__init__(urn)
104
+ self._set_extra_aspects(extra_aspects)
105
+
106
+ self._set_platform_instance(platform, platform_instance)
107
+ self._ensure_dashboard_props(display_name=display_name)
108
+
109
+ self._init_dashboard_properties(
110
+ description,
111
+ display_name,
112
+ external_url,
113
+ dashboard_url,
114
+ custom_properties,
115
+ last_modified,
116
+ last_modified_by,
117
+ created_at,
118
+ created_by,
119
+ last_refreshed,
120
+ deleted_on,
121
+ deleted_by,
122
+ input_datasets,
123
+ charts,
124
+ dashboards,
125
+ )
126
+ self._init_standard_aspects(
127
+ parent_container, subtype, owners, links, tags, terms, domain
128
+ )
129
+
130
+ def _init_dashboard_properties(
131
+ self,
132
+ description: Optional[str],
133
+ display_name: Optional[str],
134
+ external_url: Optional[str],
135
+ dashboard_url: Optional[str],
136
+ custom_properties: Optional[Dict[str, str]],
137
+ last_modified: Optional[datetime],
138
+ last_modified_by: Optional[ActorUrnOrStr],
139
+ created_at: Optional[datetime],
140
+ created_by: Optional[ActorUrnOrStr],
141
+ last_refreshed: Optional[datetime],
142
+ deleted_on: Optional[datetime],
143
+ deleted_by: Optional[ActorUrnOrStr],
144
+ input_datasets: Optional[Sequence[Union[DatasetUrnOrStr, Dataset]]],
145
+ charts: Optional[Sequence[Union[ChartUrnOrStr, Chart]]],
146
+ dashboards: Optional[Sequence[Union[DashboardUrnOrStr, Dashboard]]],
147
+ ) -> None:
148
+ """Initialize dashboard-specific properties."""
149
+ if description is not None:
150
+ self.set_description(description)
151
+ if display_name is not None:
152
+ self.set_display_name(display_name)
153
+ if external_url is not None:
154
+ self.set_external_url(external_url)
155
+ if dashboard_url is not None:
156
+ self.set_dashboard_url(dashboard_url)
157
+ if custom_properties is not None:
158
+ self.set_custom_properties(custom_properties)
159
+ if last_modified is not None:
160
+ self.set_last_modified(last_modified)
161
+ if last_modified_by is not None:
162
+ self.set_last_modified_by(last_modified_by)
163
+ if created_at is not None:
164
+ self.set_created_at(created_at)
165
+ if created_by is not None:
166
+ self.set_created_by(created_by)
167
+ if deleted_on is not None:
168
+ self.set_deleted_on(deleted_on)
169
+ if deleted_by is not None:
170
+ self.set_deleted_by(deleted_by)
171
+ if last_refreshed is not None:
172
+ self.set_last_refreshed(last_refreshed)
173
+ if input_datasets is not None:
174
+ self.set_input_datasets(input_datasets)
175
+ if charts is not None:
176
+ self.set_charts(charts)
177
+ if dashboards is not None:
178
+ self.set_dashboards(dashboards)
179
+
180
+ def _init_standard_aspects(
181
+ self,
182
+ parent_container: ParentContainerInputType | Unset,
183
+ subtype: Optional[str],
184
+ owners: Optional[OwnersInputType],
185
+ links: Optional[LinksInputType],
186
+ tags: Optional[TagsInputType],
187
+ terms: Optional[TermsInputType],
188
+ domain: Optional[DomainInputType],
189
+ ) -> None:
190
+ """Initialize standard aspects."""
191
+ if parent_container is not unset:
192
+ self._set_container(parent_container)
193
+ if subtype is not None:
194
+ self.set_subtype(subtype)
195
+ if owners is not None:
196
+ self.set_owners(owners)
197
+ if links is not None:
198
+ self.set_links(links)
199
+ if tags is not None:
200
+ self.set_tags(tags)
201
+ if terms is not None:
202
+ self.set_terms(terms)
203
+ if domain is not None:
204
+ self.set_domain(domain)
205
+
206
+ @classmethod
207
+ def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
208
+ assert isinstance(urn, DashboardUrn)
209
+ entity = cls(
210
+ platform=urn.dashboard_tool,
211
+ name=urn.dashboard_id,
212
+ )
213
+ return entity._init_from_graph(current_aspects)
214
+
215
+ @property
216
+ def urn(self) -> DashboardUrn:
217
+ assert isinstance(self._urn, DashboardUrn)
218
+ return self._urn
219
+
220
+ def _ensure_dashboard_props(
221
+ self, display_name: Optional[str] = None
222
+ ) -> models.DashboardInfoClass:
223
+ """Get the dashboard properties safely."""
224
+ return self._setdefault_aspect(
225
+ models.DashboardInfoClass(
226
+ title=display_name or self.urn.dashboard_id,
227
+ description="",
228
+ lastModified=models.ChangeAuditStampsClass(),
229
+ customProperties={},
230
+ dashboards=[],
231
+ )
232
+ )
233
+
234
+ def _get_audit_stamps(self) -> models.ChangeAuditStampsClass:
235
+ """Get the audit stamps from the dashboard properties."""
236
+ return self._ensure_dashboard_props().lastModified
237
+
238
+ def _set_audit_stamps(self, audit_stamps: models.ChangeAuditStampsClass) -> None:
239
+ """Set the audit stamps on the dashboard properties."""
240
+ self._ensure_dashboard_props().lastModified = audit_stamps
241
+
242
+ @property
243
+ def name(self) -> str:
244
+ """Get the name of the dashboard."""
245
+ return self.urn.dashboard_id
246
+
247
+ @property
248
+ @deprecated("Use display_name instead", version="1.2.0.7")
249
+ def title(self) -> str:
250
+ """Get the display name of the dashboard."""
251
+ return self.display_name
252
+
253
+ @deprecated("Use set_display_name instead", version="1.2.0.7")
254
+ def set_title(self, title: str) -> None:
255
+ """Set the display name of the dashboard."""
256
+ self.set_display_name(title)
257
+
258
+ @property
259
+ def description(self) -> Optional[str]:
260
+ """Get the description of the dashboard."""
261
+ # Because description is a required field, we treat "" as None.
262
+ return self._ensure_dashboard_props().description or None
263
+
264
+ def set_description(self, description: str) -> None:
265
+ """Set the description of the dashboard."""
266
+ self._ensure_dashboard_props().description = description
267
+
268
+ @property
269
+ def display_name(self) -> str:
270
+ """Get the display name of the dashboard."""
271
+ return self._ensure_dashboard_props().title
272
+
273
+ def set_display_name(self, display_name: str) -> None:
274
+ """Set the display name of the dashboard."""
275
+ self._ensure_dashboard_props().title = display_name
276
+
277
+ @property
278
+ def external_url(self) -> Optional[str]:
279
+ """Get the external URL of the dashboard."""
280
+ return self._ensure_dashboard_props().externalUrl
281
+
282
+ def set_external_url(self, external_url: str) -> None:
283
+ """Set the external URL of the dashboard."""
284
+ self._ensure_dashboard_props().externalUrl = external_url
285
+
286
+ @property
287
+ def dashboard_url(self) -> Optional[str]:
288
+ """Get the dashboard URL."""
289
+ return self._ensure_dashboard_props().dashboardUrl
290
+
291
+ def set_dashboard_url(self, dashboard_url: str) -> None:
292
+ """Set the dashboard URL."""
293
+ self._ensure_dashboard_props().dashboardUrl = dashboard_url
294
+
295
+ @property
296
+ def custom_properties(self) -> Dict[str, str]:
297
+ """Get the custom properties of the dashboard."""
298
+ props = self._ensure_dashboard_props()
299
+ return props.customProperties or {}
300
+
301
+ def set_custom_properties(self, custom_properties: Dict[str, str]) -> None:
302
+ """Set the custom properties of the dashboard."""
303
+ self._ensure_dashboard_props().customProperties = custom_properties
304
+
305
+ @property
306
+ def last_refreshed(self) -> Optional[datetime]:
307
+ """Get the last refresh timestamp of the dashboard."""
308
+ props = self._ensure_dashboard_props()
309
+ return (
310
+ datetime.fromtimestamp(props.lastRefreshed)
311
+ if props.lastRefreshed is not None
312
+ else None
313
+ )
314
+
315
+ def set_last_refreshed(self, last_refreshed: datetime) -> None:
316
+ """Set the last refresh timestamp of the dashboard."""
317
+ self._ensure_dashboard_props().lastRefreshed = int(last_refreshed.timestamp())
318
+
319
+ @property
320
+ def input_datasets(self) -> List[DatasetUrn]:
321
+ """Get the input datasets of the dashboard."""
322
+ props = self._ensure_dashboard_props()
323
+ return [
324
+ DatasetUrn.from_string(edge.destinationUrn)
325
+ for edge in (props.datasetEdges or [])
326
+ ]
327
+
328
+ def set_input_datasets(
329
+ self, input_datasets: Sequence[Union[DatasetUrnOrStr, Dataset]]
330
+ ) -> None:
331
+ """Set the input datasets of the dashboard."""
332
+ props = self._ensure_dashboard_props()
333
+ dataset_edges = props.datasetEdges or []
334
+ for dataset in input_datasets:
335
+ if isinstance(dataset, Dataset):
336
+ dataset_urn = dataset.urn
337
+ else:
338
+ dataset_urn = DatasetUrn.from_string(dataset)
339
+ dataset_edges.append(models.EdgeClass(destinationUrn=str(dataset_urn)))
340
+ props.datasetEdges = dataset_edges
341
+
342
+ def add_input_dataset(self, input_dataset: Union[DatasetUrnOrStr, Dataset]) -> None:
343
+ """Add an input dataset to the dashboard."""
344
+ if isinstance(input_dataset, Dataset):
345
+ input_dataset_urn = input_dataset.urn
346
+ else:
347
+ input_dataset_urn = DatasetUrn.from_string(input_dataset)
348
+ props = self._ensure_dashboard_props()
349
+ dataset_edges = props.datasetEdges or []
350
+ existing_urns = [edge.destinationUrn for edge in dataset_edges]
351
+ if str(input_dataset_urn) not in existing_urns:
352
+ dataset_edges.append(
353
+ models.EdgeClass(destinationUrn=str(input_dataset_urn))
354
+ )
355
+ props.datasetEdges = dataset_edges
356
+
357
+ def remove_input_dataset(
358
+ self, input_dataset: Union[DatasetUrnOrStr, Dataset]
359
+ ) -> None:
360
+ """Remove an input dataset from the dashboard."""
361
+ if isinstance(input_dataset, Dataset):
362
+ input_dataset_urn = input_dataset.urn
363
+ else:
364
+ input_dataset_urn = DatasetUrn.from_string(input_dataset)
365
+ props = self._ensure_dashboard_props()
366
+ props.datasetEdges = [
367
+ edge
368
+ for edge in (props.datasetEdges or [])
369
+ if edge.destinationUrn != str(input_dataset_urn)
370
+ ]
371
+
372
+ @property
373
+ def charts(self) -> List[ChartUrn]:
374
+ """Get the charts of the dashboard."""
375
+ chart_edges = self._ensure_dashboard_props().chartEdges
376
+ if chart_edges is None:
377
+ return []
378
+ return [ChartUrn.from_string(edge.destinationUrn) for edge in chart_edges]
379
+
380
+ def set_charts(self, charts: Sequence[Union[ChartUrnOrStr, Chart]]) -> None:
381
+ """Set the charts of the dashboard."""
382
+ props = self._ensure_dashboard_props()
383
+ chart_edges = props.chartEdges or []
384
+ for chart in charts:
385
+ if isinstance(chart, Chart):
386
+ chart_urn = chart.urn
387
+ else:
388
+ chart_urn = ChartUrn.from_string(chart)
389
+ chart_edges.append(models.EdgeClass(destinationUrn=str(chart_urn)))
390
+ props.chartEdges = chart_edges
391
+
392
+ def add_chart(self, chart: Union[ChartUrnOrStr, Chart]) -> None:
393
+ """Add a chart to the dashboard."""
394
+ if isinstance(chart, Chart):
395
+ chart_urn = chart.urn
396
+ else:
397
+ chart_urn = ChartUrn.from_string(chart)
398
+ props = self._ensure_dashboard_props()
399
+ chart_edges = props.chartEdges or []
400
+ existing_urns = [
401
+ edge.destinationUrn
402
+ for edge in chart_edges
403
+ if edge.destinationUrn is not None
404
+ ]
405
+ if str(chart_urn) not in existing_urns:
406
+ chart_edges.append(models.EdgeClass(destinationUrn=str(chart_urn)))
407
+ props.chartEdges = chart_edges
408
+
409
+ def remove_chart(self, chart: Union[ChartUrnOrStr, Chart]) -> None:
410
+ """Remove a chart from the dashboard."""
411
+ if isinstance(chart, Chart):
412
+ chart_urn = chart.urn
413
+ else:
414
+ chart_urn = ChartUrn.from_string(chart)
415
+ props = self._ensure_dashboard_props()
416
+ props.chartEdges = [
417
+ edge
418
+ for edge in (props.chartEdges or [])
419
+ if edge.destinationUrn != str(chart_urn)
420
+ ]
421
+
422
+ @property
423
+ def dashboards(self) -> List[DashboardUrn]:
424
+ """Get the dashboards of the dashboard."""
425
+ props = self._ensure_dashboard_props()
426
+ return [
427
+ DashboardUrn.from_string(dashboard.destinationUrn)
428
+ for dashboard in (props.dashboards or [])
429
+ ]
430
+
431
+ def set_dashboards(
432
+ self, dashboards: Sequence[Union[DashboardUrnOrStr, Dashboard]]
433
+ ) -> None:
434
+ """Set the dashboards of the dashboard."""
435
+ props = self._ensure_dashboard_props()
436
+ for dashboard in dashboards:
437
+ if isinstance(dashboard, Dashboard):
438
+ dashboard_urn = dashboard.urn
439
+ else:
440
+ dashboard_urn = DashboardUrn.from_string(dashboard)
441
+ props.dashboards.append(models.EdgeClass(destinationUrn=str(dashboard_urn)))
442
+
443
+ def add_dashboard(self, dashboard: Union[DashboardUrnOrStr, Dashboard]) -> None:
444
+ """Add a dashboard to the dashboard."""
445
+ if isinstance(dashboard, Dashboard):
446
+ dashboard_urn = dashboard.urn
447
+ else:
448
+ dashboard_urn = DashboardUrn.from_string(dashboard)
449
+ props = self._ensure_dashboard_props()
450
+ dashboards = props.dashboards or []
451
+ existing_urns = [dashboard.destinationUrn for dashboard in dashboards]
452
+ if str(dashboard_urn) not in existing_urns:
453
+ dashboards.append(models.EdgeClass(destinationUrn=str(dashboard_urn)))
datahub/sdk/dataflow.py CHANGED
@@ -21,12 +21,14 @@ from datahub.sdk._shared import (
21
21
  HasInstitutionalMemory,
22
22
  HasOwnership,
23
23
  HasPlatformInstance,
24
+ HasStructuredProperties,
24
25
  HasSubtype,
25
26
  HasTags,
26
27
  HasTerms,
27
28
  LinksInputType,
28
29
  OwnersInputType,
29
30
  ParentContainerInputType,
31
+ StructuredPropertyInputType,
30
32
  TagsInputType,
31
33
  TermsInputType,
32
34
  make_time_stamp,
@@ -45,6 +47,7 @@ class DataFlow(
45
47
  HasTags,
46
48
  HasTerms,
47
49
  HasDomain,
50
+ HasStructuredProperties,
48
51
  Entity,
49
52
  ):
50
53
  """Represents a dataflow in DataHub.
@@ -86,6 +89,7 @@ class DataFlow(
86
89
  terms: Optional[TermsInputType] = None,
87
90
  domain: Optional[DomainInputType] = None,
88
91
  parent_container: ParentContainerInputType | Unset = unset,
92
+ structured_properties: Optional[StructuredPropertyInputType] = None,
89
93
  extra_aspects: ExtraAspectsType = None,
90
94
  ):
91
95
  """Initialize a new Dataflow instance.
@@ -150,6 +154,9 @@ class DataFlow(
150
154
  self.set_domain(domain)
151
155
  if parent_container is not unset:
152
156
  self._set_container(parent_container)
157
+ if structured_properties is not None:
158
+ for key, value in structured_properties.items():
159
+ self.set_structured_property(property_urn=key, values=value)
153
160
 
154
161
  @classmethod
155
162
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
datahub/sdk/datajob.py CHANGED
@@ -6,6 +6,7 @@ from typing import Dict, List, Optional, Type
6
6
 
7
7
  from typing_extensions import Self
8
8
 
9
+ import datahub.emitter.mce_builder as builder
9
10
  import datahub.metadata.schema_classes as models
10
11
  from datahub.cli.cli_utils import first_non_null
11
12
  from datahub.errors import IngestionAttributionWarning
@@ -25,11 +26,13 @@ from datahub.sdk._shared import (
25
26
  HasInstitutionalMemory,
26
27
  HasOwnership,
27
28
  HasPlatformInstance,
29
+ HasStructuredProperties,
28
30
  HasSubtype,
29
31
  HasTags,
30
32
  HasTerms,
31
33
  LinksInputType,
32
34
  OwnersInputType,
35
+ StructuredPropertyInputType,
33
36
  TagsInputType,
34
37
  TermsInputType,
35
38
  make_time_stamp,
@@ -48,6 +51,7 @@ class DataJob(
48
51
  HasTags,
49
52
  HasTerms,
50
53
  HasDomain,
54
+ HasStructuredProperties,
51
55
  Entity,
52
56
  ):
53
57
  """Represents a data job in DataHub.
@@ -61,7 +65,7 @@ class DataJob(
61
65
  """Get the URN type for data jobs."""
62
66
  return DataJobUrn
63
67
 
64
- def __init__(
68
+ def __init__( # noqa: C901
65
69
  self,
66
70
  *,
67
71
  name: str,
@@ -81,9 +85,11 @@ class DataJob(
81
85
  tags: Optional[TagsInputType] = None,
82
86
  terms: Optional[TermsInputType] = None,
83
87
  domain: Optional[DomainInputType] = None,
84
- extra_aspects: ExtraAspectsType = None,
85
88
  inlets: Optional[List[DatasetUrnOrStr]] = None,
86
89
  outlets: Optional[List[DatasetUrnOrStr]] = None,
90
+ fine_grained_lineages: Optional[List[models.FineGrainedLineageClass]] = None,
91
+ structured_properties: Optional[StructuredPropertyInputType] = None,
92
+ extra_aspects: ExtraAspectsType = None,
87
93
  ):
88
94
  """
89
95
  Initialize a DataJob with either a DataFlow or a DataFlowUrn with platform instance.
@@ -99,12 +105,14 @@ class DataJob(
99
105
  ValueError: If neither flow nor (flow_urn and platform_instance) are provided
100
106
  """
101
107
  if flow is None:
102
- if flow_urn is None or platform_instance is None:
108
+ if flow_urn is None:
103
109
  raise ValueError(
104
110
  "You must provide either: 1. a DataFlow object, or 2. a DataFlowUrn (and a platform_instance config if required)"
105
111
  )
106
112
  flow_urn = DataFlowUrn.from_string(flow_urn)
107
- if flow_urn.flow_id.startswith(f"{platform_instance}."):
113
+ if platform_instance and flow_urn.flow_id.startswith(
114
+ f"{platform_instance}."
115
+ ):
108
116
  flow_name = flow_urn.flow_id[len(platform_instance) + 1 :]
109
117
  else:
110
118
  flow_name = flow_urn.flow_id
@@ -129,8 +137,6 @@ class DataJob(
129
137
  )
130
138
  self._setdefault_aspect(job_info)
131
139
  self._ensure_datajob_props().flowUrn = str(flow.urn)
132
-
133
- # Set properties if provided
134
140
  if description is not None:
135
141
  self.set_description(description)
136
142
  if external_url is not None:
@@ -141,8 +147,6 @@ class DataJob(
141
147
  self.set_created(created)
142
148
  if last_modified is not None:
143
149
  self.set_last_modified(last_modified)
144
-
145
- # Set standard aspects
146
150
  if subtype is not None:
147
151
  self.set_subtype(subtype)
148
152
  if owners is not None:
@@ -155,10 +159,19 @@ class DataJob(
155
159
  self.set_terms(terms)
156
160
  if domain is not None:
157
161
  self.set_domain(domain)
162
+ if structured_properties is not None:
163
+ for key, value in structured_properties.items():
164
+ self.set_structured_property(property_urn=key, values=value)
158
165
  if inlets is not None:
159
166
  self.set_inlets(inlets)
160
167
  if outlets is not None:
161
168
  self.set_outlets(outlets)
169
+ if fine_grained_lineages is not None:
170
+ self.set_fine_grained_lineages(fine_grained_lineages)
171
+
172
+ if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
173
+ env = self.flow_urn.cluster.upper()
174
+ self._ensure_datajob_props().env = env
162
175
 
163
176
  @classmethod
164
177
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
@@ -194,9 +207,7 @@ class DataJob(
194
207
  ) -> Optional[models.DataJobInputOutputClass]:
195
208
  return self._get_aspect(models.DataJobInputOutputClass)
196
209
 
197
- def _ensure_datajob_inputoutput_props(
198
- self,
199
- ) -> models.DataJobInputOutputClass:
210
+ def _ensure_datajob_inputoutput_props(self) -> models.DataJobInputOutputClass:
200
211
  return self._setdefault_aspect(
201
212
  models.DataJobInputOutputClass(inputDatasets=[], outputDatasets=[])
202
213
  )
@@ -300,12 +311,11 @@ class DataJob(
300
311
  browse_path.append(
301
312
  models.BrowsePathEntryClass(id=entry.id, urn=entry.urn)
302
313
  )
303
-
304
- # Add the job itself to the path
305
314
  browse_path.append(models.BrowsePathEntryClass(id=flow.name, urn=str(flow.urn)))
306
315
  # Set the browse path aspect
307
316
  self._set_aspect(models.BrowsePathsV2Class(path=browse_path))
308
317
 
318
+ # TODO: support datajob input/output
309
319
  @property
310
320
  def inlets(self) -> List[DatasetUrn]:
311
321
  """Get the inlets of the data job."""
@@ -333,3 +343,25 @@ class DataJob(
333
343
  self._ensure_datajob_inputoutput_props().outputDatasets.append(
334
344
  str(outlet_urn)
335
345
  )
346
+
347
+ @property
348
+ def fine_grained_lineages(self) -> List[models.FineGrainedLineageClass]:
349
+ io_aspect = self._get_datajob_inputoutput_props()
350
+ return (
351
+ io_aspect.fineGrainedLineages
352
+ if io_aspect and io_aspect.fineGrainedLineages
353
+ else []
354
+ )
355
+
356
+ def set_fine_grained_lineages(
357
+ self, lineages: List[models.FineGrainedLineageClass]
358
+ ) -> None:
359
+ io_aspect = self._ensure_datajob_inputoutput_props()
360
+ if io_aspect.fineGrainedLineages is None:
361
+ io_aspect.fineGrainedLineages = []
362
+ io_aspect.fineGrainedLineages.extend(lineages)
363
+
364
+ @property
365
+ def env(self) -> Optional[str]:
366
+ """Get the environment of the data job."""
367
+ return str(self._ensure_datajob_props().env)