acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -36,8 +36,10 @@ csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource
36
36
  datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource
37
37
  datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource
38
38
  datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource
39
+ datahub-debug = datahub.ingestion.source.debug.datahub_debug:DataHubDebugSource
39
40
  datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
40
41
  datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
42
+ datahub-mock-data = datahub.ingestion.source.mock_data.datahub_mock_data:DataHubMockDataSource
41
43
  dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource
42
44
  dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource
43
45
  delta-lake = datahub.ingestion.source.delta_lake:DeltaLakeSource
@@ -46,6 +48,7 @@ dremio = datahub.ingestion.source.dremio.dremio_source:DremioSource
46
48
  druid = datahub.ingestion.source.sql.druid:DruidSource
47
49
  dynamodb = datahub.ingestion.source.dynamodb.dynamodb:DynamoDBSource
48
50
  elasticsearch = datahub.ingestion.source.elastic_search:ElasticsearchSource
51
+ excel = datahub.ingestion.source.excel.source:ExcelSource
49
52
  feast = datahub.ingestion.source.feast:FeastRepositorySource
50
53
  file = datahub.ingestion.source.file:GenericFileSource
51
54
  fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource
@@ -53,6 +56,7 @@ gcs = datahub.ingestion.source.gcs.gcs_source:GCSSource
53
56
  glue = datahub.ingestion.source.aws.glue:GlueSource
54
57
  grafana = datahub.ingestion.source.grafana.grafana_source:GrafanaSource
55
58
  hana = datahub.ingestion.source.sql.hana:HanaSource
59
+ hex = datahub.ingestion.source.hex.hex:HexSource
56
60
  hive = datahub.ingestion.source.sql.hive:HiveSource
57
61
  hive-metastore = datahub.ingestion.source.sql.hive_metastore:HiveMetastoreSource
58
62
  iceberg = datahub.ingestion.source.iceberg.iceberg:IcebergSource
@@ -90,6 +94,7 @@ sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource
90
94
  salesforce = datahub.ingestion.source.salesforce:SalesforceSource
91
95
  sigma = datahub.ingestion.source.sigma.sigma:SigmaSource
92
96
  slack = datahub.ingestion.source.slack.slack:SlackSource
97
+ snaplogic = datahub.ingestion.source.snaplogic.snaplogic:SnaplogicSource
93
98
  snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source
94
99
  snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource
95
100
  snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource
@@ -101,7 +106,7 @@ tableau = datahub.ingestion.source.tableau.tableau:TableauSource
101
106
  teradata = datahub.ingestion.source.sql.teradata:TeradataSource
102
107
  trino = datahub.ingestion.source.sql.trino:TrinoSource
103
108
  unity-catalog = datahub.ingestion.source.unity.source:UnityCatalogSource
104
- vertexai = datahub.ingestion.source.vertexai:VertexAISource
109
+ vertexai = datahub.ingestion.source.vertexai.vertexai:VertexAISource
105
110
  vertica = datahub.ingestion.source.sql.vertica:VerticaSource
106
111
 
107
112
  [datahub.ingestion.transformer.plugins]
@@ -126,6 +131,7 @@ pattern_cleanup_dataset_usage_user = datahub.ingestion.transformer.pattern_clean
126
131
  pattern_cleanup_ownership = datahub.ingestion.transformer.pattern_cleanup_ownership:PatternCleanUpOwnership
127
132
  replace_external_url = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlDataset
128
133
  replace_external_url_container = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlContainer
134
+ set_browse_path = datahub.ingestion.transformer.set_browse_path:SetBrowsePathTransformer
129
135
  set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer
130
136
  simple_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:SimpleAddDatasetDataProduct
131
137
  simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0rc18"
3
+ __version__ = "1.3.0.1rc9"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -55,9 +55,9 @@ class OperationCircuitBreaker(AbstractCircuitBreaker):
55
55
  which is set as Airflow connection.
56
56
  :param partition: The partition to check the operation.
57
57
  :param source_type: The source type to filter on. If not set it will accept any source type.
58
- See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype
58
+ See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
59
59
  :param operation_type: The operation type to filter on. If not set it will accept any source type.
60
- See valid types here: https://datahubproject.io/docs/graphql/enums/#operationtype
60
+ See valid types here: https://docs.datahub.com/docs/graphql/enums/#operationtype
61
61
  """
62
62
 
63
63
  start_time_millis: int = int(
@@ -53,5 +53,5 @@ class BaseEntityAssertion(BaseAssertion):
53
53
  )
54
54
 
55
55
  trigger: Optional[AssertionTrigger] = v1_Field(
56
- description="The trigger schedule for assertion", alias="schedule"
56
+ default=None, description="The trigger schedule for assertion", alias="schedule"
57
57
  )
@@ -131,7 +131,7 @@ class SerializedResourceValue(BaseModel):
131
131
  elif isinstance(object, BaseModel):
132
132
  return SerializedResourceValue(
133
133
  content_type=models.SerializedValueContentTypeClass.JSON,
134
- blob=json.dumps(object.dict()).encode("utf-8"),
134
+ blob=json.dumps(object.dict(), sort_keys=True).encode("utf-8"),
135
135
  schema_type=models.SerializedValueSchemaTypeClass.JSON,
136
136
  schema_ref=object.__class__.__name__,
137
137
  )
@@ -71,7 +71,7 @@ class CorpGroup(BaseModel):
71
71
  _rename_admins_to_owners = pydantic_renamed_field("admins", "owners")
72
72
 
73
73
  @pydantic.validator("owners", "members", each_item=True)
74
- def make_urn_if_needed(v):
74
+ def make_urn_if_needed(cls, v):
75
75
  if isinstance(v, str):
76
76
  return builder.make_user_urn(v)
77
77
  return v
@@ -1,5 +1,5 @@
1
1
  import collections
2
- from typing import Iterable, List, Optional, Tuple
2
+ from typing import Dict, Iterable, List, Optional, Tuple, Union
3
3
 
4
4
  from ruamel.yaml import YAML
5
5
  from typing_extensions import Literal
@@ -25,6 +25,8 @@ from datahub.metadata.schema_classes import (
25
25
  FreshnessContractClass,
26
26
  SchemaContractClass,
27
27
  StatusClass,
28
+ StructuredPropertiesClass,
29
+ StructuredPropertyValueAssignmentClass,
28
30
  )
29
31
  from datahub.utilities.urns.urn import guess_entity_type
30
32
 
@@ -47,8 +49,12 @@ class DataContract(v1_ConfigModel):
47
49
  entity: str = v1_Field(
48
50
  description="The entity urn that the Data Contract is associated with"
49
51
  )
50
- # TODO: add support for properties
51
- # properties: Optional[Dict[str, str]] = None
52
+ properties: Optional[Dict[str, Union[str, float, List[Union[str, float]]]]] = (
53
+ v1_Field(
54
+ default=None,
55
+ description="Structured properties associated with the data contract.",
56
+ )
57
+ )
52
58
 
53
59
  schema_field: Optional[SchemaAssertion] = v1_Field(default=None, alias="schema")
54
60
 
@@ -172,6 +178,30 @@ class DataContract(v1_ConfigModel):
172
178
  )
173
179
  yield from dq_assertion_mcps
174
180
 
181
+ # Construct the structured properties aspect if properties are defined
182
+ structured_properties_aspect: Optional[StructuredPropertiesClass] = None
183
+ if self.properties:
184
+ property_assignments: List[StructuredPropertyValueAssignmentClass] = []
185
+ for key, value in self.properties.items():
186
+ # Use f-string formatting for the property URN, like in dataset.py
187
+ prop_urn = f"urn:li:structuredProperty:{key}"
188
+ # Ensure value is a list for StructuredPropertyValueAssignmentClass
189
+ values_list = value if isinstance(value, list) else [value]
190
+ property_assignments.append(
191
+ StructuredPropertyValueAssignmentClass(
192
+ propertyUrn=prop_urn,
193
+ values=[
194
+ str(v) for v in values_list
195
+ ], # Ensure all values are strings
196
+ )
197
+ )
198
+ if (
199
+ property_assignments
200
+ ): # Only create aspect if there are valid assignments
201
+ structured_properties_aspect = StructuredPropertiesClass(
202
+ properties=property_assignments
203
+ )
204
+
175
205
  # Now that we've generated the assertions, we can generate
176
206
  # the actual data contract.
177
207
  yield from MetadataChangeProposalWrapper.construct_many(
@@ -202,6 +232,8 @@ class DataContract(v1_ConfigModel):
202
232
  if True
203
233
  else None
204
234
  ),
235
+ # Add structured properties aspect if defined
236
+ structured_properties_aspect,
205
237
  ],
206
238
  )
207
239
 
@@ -9,6 +9,7 @@ from datahub.metadata.schema_classes import (
9
9
  AuditStampClass,
10
10
  DataFlowInfoClass,
11
11
  DataFlowSnapshotClass,
12
+ DataPlatformInstanceClass,
12
13
  GlobalTagsClass,
13
14
  MetadataChangeEventClass,
14
15
  OwnerClass,
@@ -29,7 +30,7 @@ class DataFlow:
29
30
  """The DataHub representation of data-flow.
30
31
 
31
32
  Args:
32
- urn (int): Unique identifier of the DataFlow in DataHub. For more detail refer https://datahubproject.io/docs/what/urn/.
33
+ urn (int): Unique identifier of the DataFlow in DataHub. For more detail refer https://docs.datahub.com/docs/what/urn/.
33
34
  id (str): Identifier of DataFlow in orchestrator.
34
35
  orchestrator (str): orchestrator. for example airflow.
35
36
  cluster (Optional[str]): [deprecated] Please use env.
@@ -39,8 +40,8 @@ class DataFlow:
39
40
  url (Optional[str]): URL pointing to DataFlow.
40
41
  tags (Set[str]): tags that need to be apply on DataFlow.
41
42
  owners (Set[str]): owners that need to be apply on DataFlow.
42
- platform_instance (Optional[str]): The instance of the platform that all assets produced by this orchestrator belong to. For more detail refer https://datahubproject.io/docs/platform-instances/.
43
- env (Optional[str]): The environment that all assets produced by this orchestrator belong to. For more detail and possible values refer https://datahubproject.io/docs/graphql/enums/#fabrictype.
43
+ platform_instance (Optional[str]): The instance of the platform that all assets produced by this orchestrator belong to. For more detail refer https://docs.datahub.com/docs/platform-instances/.
44
+ env (Optional[str]): The environment that all assets produced by this orchestrator belong to. For more detail and possible values refer https://docs.datahub.com/docs/graphql/enums/#fabrictype.
44
45
  """
45
46
 
46
47
  urn: DataFlowUrn = field(init=False)
@@ -164,6 +165,20 @@ class DataFlow:
164
165
  )
165
166
  yield mcp
166
167
 
168
+ if self.platform_instance:
169
+ instance = builder.make_dataplatform_instance_urn(
170
+ platform=self.orchestrator,
171
+ instance=self.platform_instance,
172
+ )
173
+ mcp = MetadataChangeProposalWrapper(
174
+ entityUrn=str(self.urn),
175
+ aspect=DataPlatformInstanceClass(
176
+ platform=builder.make_data_platform_urn(self.orchestrator),
177
+ instance=instance,
178
+ ),
179
+ )
180
+ yield mcp
181
+
167
182
  for owner in self.generate_ownership_aspect():
168
183
  mcp = MetadataChangeProposalWrapper(
169
184
  entityUrn=str(self.urn),
@@ -10,6 +10,7 @@ from datahub.metadata.schema_classes import (
10
10
  AzkabanJobTypeClass,
11
11
  DataJobInfoClass,
12
12
  DataJobInputOutputClass,
13
+ DataPlatformInstanceClass,
13
14
  FineGrainedLineageClass,
14
15
  GlobalTagsClass,
15
16
  OwnerClass,
@@ -45,6 +46,7 @@ class DataJob:
45
46
  outlets (List[str]): List of urns the DataProcessInstance produces
46
47
  fine_grained_lineages: Column lineage for the inlets and outlets
47
48
  upstream_urns: List[DataJobUrn] = field(default_factory=list)
49
+ platform_instance (Optional[str]): The instance of the platform that all assets produced by this orchestrator belong to.
48
50
  """
49
51
 
50
52
  id: str
@@ -61,6 +63,7 @@ class DataJob:
61
63
  outlets: List[DatasetUrn] = field(default_factory=list)
62
64
  fine_grained_lineages: List[FineGrainedLineageClass] = field(default_factory=list)
63
65
  upstream_urns: List[DataJobUrn] = field(default_factory=list)
66
+ platform_instance: Optional[str] = None
64
67
 
65
68
  def __post_init__(self):
66
69
  job_flow_urn = DataFlowUrn.create_from_ids(
@@ -105,7 +108,9 @@ class DataJob:
105
108
  return [tags]
106
109
 
107
110
  def generate_mcp(
108
- self, materialize_iolets: bool = True
111
+ self,
112
+ generate_lineage: bool = True,
113
+ materialize_iolets: bool = True,
109
114
  ) -> Iterable[MetadataChangeProposalWrapper]:
110
115
  env: Optional[str] = None
111
116
  if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
@@ -127,6 +132,20 @@ class DataJob:
127
132
  )
128
133
  yield mcp
129
134
 
135
+ if self.platform_instance:
136
+ instance = builder.make_dataplatform_instance_urn(
137
+ platform=self.flow_urn.orchestrator,
138
+ instance=self.platform_instance,
139
+ )
140
+ mcp = MetadataChangeProposalWrapper(
141
+ entityUrn=str(self.urn),
142
+ aspect=DataPlatformInstanceClass(
143
+ platform=builder.make_data_platform_urn(self.flow_urn.orchestrator),
144
+ instance=instance,
145
+ ),
146
+ )
147
+ yield mcp
148
+
130
149
  mcp = MetadataChangeProposalWrapper(
131
150
  entityUrn=str(self.urn),
132
151
  aspect=StatusClass(
@@ -135,9 +154,10 @@ class DataJob:
135
154
  )
136
155
  yield mcp
137
156
 
138
- yield from self.generate_data_input_output_mcp(
139
- materialize_iolets=materialize_iolets
140
- )
157
+ if generate_lineage:
158
+ yield from self.generate_data_input_output_mcp(
159
+ materialize_iolets=materialize_iolets
160
+ )
141
161
 
142
162
  for owner in self.generate_ownership_aspect():
143
163
  mcp = MetadataChangeProposalWrapper(
@@ -159,6 +159,7 @@ class DataProcessInstance:
159
159
  env=self.template_urn.get_env(),
160
160
  orchestrator=self.template_urn.get_orchestrator_name(),
161
161
  id=self.template_urn.get_flow_id(),
162
+ platform_instance=self.data_platform_instance,
162
163
  )
163
164
  for mcp in template_object.generate_mcp():
164
165
  self._emit_mcp(mcp, emitter, callback)
@@ -168,6 +169,7 @@ class DataProcessInstance:
168
169
  id=self.template_urn.get_job_id(),
169
170
  upstream_urns=input_datajob_urns,
170
171
  flow_urn=self.template_urn.get_data_flow_urn(),
172
+ platform_instance=self.data_platform_instance,
171
173
  )
172
174
  for mcp in template_object.generate_mcp():
173
175
  self._emit_mcp(mcp, emitter, callback)
@@ -382,6 +384,7 @@ class DataProcessInstance:
382
384
  cluster=datajob.flow_urn.cluster,
383
385
  template_urn=datajob.urn,
384
386
  id=id,
387
+ data_platform_instance=datajob.platform_instance,
385
388
  )
386
389
  dpi._template_object = datajob
387
390
 
@@ -438,6 +441,7 @@ class DataProcessInstance:
438
441
  orchestrator=dataflow.orchestrator,
439
442
  cluster=cast(str, dataflow.env),
440
443
  template_urn=dataflow.urn,
444
+ data_platform_instance=dataflow.platform_instance,
441
445
  )
442
446
  dpi._template_object = dataflow
443
447
  return dpi
@@ -6,9 +6,10 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
6
6
 
7
7
  import pydantic
8
8
  from ruamel.yaml import YAML
9
+ from typing_extensions import assert_never
9
10
 
10
11
  import datahub.emitter.mce_builder as builder
11
- from datahub.configuration.common import ConfigModel
12
+ from datahub.configuration.common import ConfigModel, LaxStr
12
13
  from datahub.emitter.generic_emitter import Emitter
13
14
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
14
15
  from datahub.ingestion.graph.client import DataHubGraph
@@ -110,8 +111,9 @@ class DataProduct(ConfigModel):
110
111
  description: Optional[str] = None
111
112
  tags: Optional[List[str]] = None
112
113
  terms: Optional[List[str]] = None
113
- properties: Optional[Dict[str, str]] = None
114
+ properties: Optional[Dict[str, LaxStr]] = None
114
115
  external_url: Optional[str] = None
116
+ output_ports: Optional[List[str]] = None
115
117
  _original_yaml_dict: Optional[dict] = None
116
118
 
117
119
  @pydantic.validator("assets", each_item=True)
@@ -123,6 +125,22 @@ class DataProduct(ConfigModel):
123
125
 
124
126
  return v
125
127
 
128
+ @pydantic.validator("output_ports", each_item=True)
129
+ def output_ports_must_be_urns(cls, v: str) -> str:
130
+ try:
131
+ Urn.create_from_string(v)
132
+ except Exception as e:
133
+ raise ValueError(f"Output port {v} is not an urn: {e}") from e
134
+
135
+ return v
136
+
137
+ @pydantic.validator("output_ports", each_item=True)
138
+ def output_ports_must_be_from_asset_list(cls, v: str, values: dict) -> str:
139
+ assets = values.get("assets", [])
140
+ if v not in assets:
141
+ raise ValueError(f"Output port {v} is not in asset list")
142
+ return v
143
+
126
144
  @property
127
145
  def urn(self) -> str:
128
146
  if self.id.startswith("urn:li:dataProduct:"):
@@ -180,6 +198,7 @@ class DataProduct(ConfigModel):
180
198
  DataProductAssociationClass(
181
199
  destinationUrn=asset,
182
200
  created=self._mint_auditstamp("yaml"),
201
+ outputPort=asset in (self.output_ports or []),
183
202
  )
184
203
  for asset in self.assets
185
204
  ]
@@ -203,6 +222,7 @@ class DataProduct(ConfigModel):
203
222
  DataProductAssociationClass(
204
223
  destinationUrn=asset,
205
224
  created=self._mint_auditstamp("yaml"),
225
+ outputPort=asset in (self.output_ports or []),
206
226
  )
207
227
  for asset in self.assets or []
208
228
  ],
@@ -368,6 +388,13 @@ class DataProduct(ConfigModel):
368
388
  external_url=(
369
389
  data_product_properties.externalUrl if data_product_properties else None
370
390
  ),
391
+ output_ports=[
392
+ e.destinationUrn
393
+ for e in (data_product_properties.assets or [])
394
+ if e.outputPort
395
+ ]
396
+ if data_product_properties
397
+ else None,
371
398
  )
372
399
 
373
400
  def _patch_ownership(
@@ -414,7 +441,9 @@ class DataProduct(ConfigModel):
414
441
  "type": new_owner_type_map[owner_urn],
415
442
  }
416
443
  else:
417
- patches_drop[i] = o
444
+ patches_drop[i] = o.model_dump()
445
+ else:
446
+ assert_never(o)
418
447
 
419
448
  # Figure out what if any are new owners to add
420
449
  new_owners_to_add = {o for o in new_owner_type_map} - set(owners_matched)