acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ from typing import Any, Callable, Optional, Tuple, TypeVar
7
7
 
8
8
  import click
9
9
  import humanfriendly
10
- from packaging.version import Version
10
+ from packaging.version import InvalidVersion, Version
11
11
  from pydantic import BaseModel
12
12
 
13
13
  from datahub._version import __version__
@@ -28,10 +28,24 @@ class VersionStats(BaseModel, arbitrary_types_allowed=True):
28
28
  release_date: Optional[datetime] = None
29
29
 
30
30
 
31
+ def _safe_version_stats(version_string: str) -> Optional[VersionStats]:
32
+ """
33
+ Safely create a VersionStats object from a version string.
34
+ Returns None if the version string is invalid.
35
+ """
36
+ try:
37
+ return VersionStats(version=Version(version_string), release_date=None)
38
+ except InvalidVersion:
39
+ log.warning(f"Invalid version format received: {version_string!r}")
40
+ return None
41
+
42
+
31
43
  class ServerVersionStats(BaseModel):
32
44
  current: VersionStats
33
45
  latest: Optional[VersionStats] = None
34
46
  current_server_type: Optional[str] = None
47
+ current_server_default_cli_version: Optional[VersionStats] = None
48
+ is_cloud_server: Optional[bool] = None
35
49
 
36
50
 
37
51
  class ClientVersionStats(BaseModel):
@@ -44,7 +58,7 @@ class DataHubVersionStats(BaseModel):
44
58
  client: ClientVersionStats
45
59
 
46
60
 
47
- async def get_client_version_stats():
61
+ async def get_client_version_stats() -> ClientVersionStats:
48
62
  import aiohttp
49
63
 
50
64
  current_version_string = __version__
@@ -52,6 +66,7 @@ async def get_client_version_stats():
52
66
  client_version_stats: ClientVersionStats = ClientVersionStats(
53
67
  current=VersionStats(version=current_version, release_date=None), latest=None
54
68
  )
69
+
55
70
  async with aiohttp.ClientSession() as session:
56
71
  pypi_url = "https://pypi.org/pypi/acryl_datahub/json"
57
72
  async with session.get(pypi_url) as resp:
@@ -131,7 +146,9 @@ async def get_server_config(gms_url: str, token: Optional[str]) -> RestServiceCo
131
146
 
132
147
  async def get_server_version_stats(
133
148
  server: Optional[DataHubGraph] = None,
134
- ) -> Tuple[Optional[str], Optional[Version], Optional[datetime]]:
149
+ ) -> Tuple[
150
+ Optional[str], Optional[Version], Optional[str], Optional[datetime], Optional[bool]
151
+ ]:
135
152
  import aiohttp
136
153
 
137
154
  server_config: Optional[RestServiceConfig] = None
@@ -151,12 +168,15 @@ async def get_server_version_stats(
151
168
 
152
169
  server_type = None
153
170
  server_version: Optional[Version] = None
171
+ current_server_default_cli_version = None
154
172
  current_server_release_date = None
173
+ is_cloud_server: Optional[bool] = None
155
174
  if server_config:
156
175
  server_version_string = server_config.service_version
157
176
  commit_hash = server_config.commit_hash
158
177
  server_type = server_config.server_type
159
-
178
+ current_server_default_cli_version = server_config.default_cli_version
179
+ is_cloud_server = server_config.is_datahub_cloud
160
180
  if server_type == "quickstart" and commit_hash:
161
181
  async with aiohttp.ClientSession(
162
182
  headers={"Accept": "application/vnd.github.v3+json"}
@@ -171,7 +191,13 @@ async def get_server_version_stats(
171
191
  if server_version_string and server_version_string.startswith("v"):
172
192
  server_version = Version(server_version_string[1:])
173
193
 
174
- return (server_type, server_version, current_server_release_date)
194
+ return (
195
+ server_type,
196
+ server_version,
197
+ current_server_default_cli_version,
198
+ current_server_release_date,
199
+ is_cloud_server,
200
+ )
175
201
 
176
202
 
177
203
  def retrieve_version_stats(
@@ -214,7 +240,9 @@ async def _retrieve_version_stats(
214
240
  (
215
241
  current_server_type,
216
242
  current_server_version,
243
+ current_server_default_cli_version,
217
244
  current_server_release_date,
245
+ is_cloud_server,
218
246
  ) = results[2]
219
247
 
220
248
  server_version_stats = None
@@ -223,12 +251,18 @@ async def _retrieve_version_stats(
223
251
  current=VersionStats(
224
252
  version=current_server_version, release_date=current_server_release_date
225
253
  ),
254
+ current_server_default_cli_version=(
255
+ _safe_version_stats(current_server_default_cli_version)
256
+ if current_server_default_cli_version
257
+ else None
258
+ ),
226
259
  latest=(
227
260
  VersionStats(version=last_server_version, release_date=last_server_date)
228
261
  if last_server_version
229
262
  else None
230
263
  ),
231
264
  current_server_type=current_server_type,
265
+ is_cloud_server=is_cloud_server,
232
266
  )
233
267
 
234
268
  if client_version_stats and server_version_stats:
@@ -255,21 +289,14 @@ def valid_client_version(version: Version) -> bool:
255
289
  """Only version strings like 0.4.5 and 0.6.7.8 are valid. 0.8.6.7rc1 is not"""
256
290
  if version.is_prerelease or version.is_postrelease or version.is_devrelease:
257
291
  return False
258
- if version.major == 0 and version.minor in [8, 9, 10, 11]:
259
- return True
260
-
261
- return False
292
+ return True
262
293
 
263
294
 
264
295
  def valid_server_version(version: Version) -> bool:
265
296
  """Only version strings like 0.8.x, 0.9.x or 0.10.x are valid. 0.1.x is not"""
266
297
  if version.is_prerelease or version.is_postrelease or version.is_devrelease:
267
298
  return False
268
-
269
- if version.major == 0 and version.minor in [8, 9, 10]:
270
- return True
271
-
272
- return False
299
+ return True
273
300
 
274
301
 
275
302
  def is_client_server_compatible(client: VersionStats, server: VersionStats) -> int:
@@ -291,6 +318,27 @@ def is_client_server_compatible(client: VersionStats, server: VersionStats) -> i
291
318
  return server.version.micro - client.version.micro
292
319
 
293
320
 
321
+ def is_server_default_cli_ahead(version_stats: DataHubVersionStats) -> bool:
322
+ """
323
+ Check if the server default CLI version is ahead of the current CLI version.
324
+ Returns True if server default CLI is newer and both versions are valid.
325
+ """
326
+ if not version_stats.server.current_server_default_cli_version:
327
+ return False
328
+
329
+ current_cli = version_stats.client.current
330
+ server_default_cli = version_stats.server.current_server_default_cli_version
331
+
332
+ is_valid_client_version = valid_client_version(current_cli.version)
333
+ is_valid_server_version = valid_client_version(server_default_cli.version)
334
+
335
+ if not (is_valid_client_version and is_valid_server_version):
336
+ return False
337
+
338
+ compatibility_result = is_client_server_compatible(current_cli, server_default_cli)
339
+ return compatibility_result > 0
340
+
341
+
294
342
  def _maybe_print_upgrade_message(
295
343
  version_stats: Optional[DataHubVersionStats],
296
344
  ) -> None:
@@ -312,9 +360,15 @@ def _maybe_print_upgrade_message(
312
360
  if version_stats.client.latest
313
361
  else None
314
362
  )
315
- client_server_compat = is_client_server_compatible(
316
- version_stats.client.current, version_stats.server.current
317
- )
363
+ client_server_compat = 0
364
+ # Skip version compatibility checks for cloud servers (serverEnv="cloud")
365
+ # Cloud servers use different versioning schemes between server and CLI
366
+ is_cloud = version_stats.server.is_cloud_server
367
+
368
+ if not is_cloud:
369
+ client_server_compat = is_client_server_compatible(
370
+ version_stats.client.current, version_stats.server.current
371
+ )
318
372
 
319
373
  if latest_release_date and current_release_date:
320
374
  assert version_stats.client.latest
@@ -429,6 +483,8 @@ def check_upgrade_post(
429
483
 
430
484
 
431
485
  def check_upgrade(func: Callable[..., T]) -> Callable[..., T]:
486
+ log.debug(f"Checking upgrade for {func.__module__}.{func.__name__}")
487
+
432
488
  @wraps(func)
433
489
  def async_wrapper(*args: Any, **kwargs: Any) -> Any:
434
490
  with PerfTimer() as timer:
@@ -1,7 +1,6 @@
1
1
  import collections
2
2
  import gzip
3
3
  import logging
4
- import os
5
4
  import pathlib
6
5
  import pickle
7
6
  import shutil
@@ -28,18 +27,18 @@ from typing import (
28
27
  Union,
29
28
  )
30
29
 
30
+ from datahub.configuration.env_vars import get_override_sqlite_version_req
31
31
  from datahub.ingestion.api.closeable import Closeable
32
32
  from datahub.utilities.sentinels import Unset, unset
33
33
 
34
34
  logger: logging.Logger = logging.getLogger(__name__)
35
35
 
36
- OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR = (
37
- os.environ.get("OVERRIDE_SQLITE_VERSION_REQ") or ""
38
- )
39
- OVERRIDE_SQLITE_VERSION_REQUIREMENT = (
40
- OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR
41
- and OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR.lower() != "false"
42
- )
36
+
37
+ def _get_sqlite_version_override() -> bool:
38
+ """Check if SQLite version requirement should be overridden at runtime."""
39
+ override_str = get_override_sqlite_version_req()
40
+ return bool(override_str and override_str.lower() != "false")
41
+
43
42
 
44
43
  _DEFAULT_FILE_NAME = "sqlite.db"
45
44
  _DEFAULT_TABLE_NAME = "data"
@@ -231,7 +230,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
231
230
  # We use the ON CONFLICT clause to implement UPSERTs with sqlite.
232
231
  # This was added in 3.24.0 from 2018-06-04.
233
232
  # See https://www.sqlite.org/lang_conflict.html
234
- if OVERRIDE_SQLITE_VERSION_REQUIREMENT:
233
+ if _get_sqlite_version_override():
235
234
  self._use_sqlite_on_conflict = False
236
235
  else:
237
236
  raise RuntimeError("SQLite version 3.24.0 or later is required")
@@ -1,6 +1,7 @@
1
- import os
2
1
  import sys
3
2
 
3
+ from datahub.configuration.env_vars import get_test_mode
4
+
4
5
 
5
6
  def is_pytest_running() -> bool:
6
- return "pytest" in sys.modules and os.environ.get("DATAHUB_TEST_MODE") == "1"
7
+ return "pytest" in sys.modules and get_test_mode() == "1"
@@ -15,13 +15,13 @@ import collections
15
15
  import contextlib
16
16
  import itertools
17
17
  import logging
18
- import os
19
18
  import pathlib
20
19
  import sys
21
20
  from typing import Deque, Iterator, Optional
22
21
 
23
22
  import click
24
23
 
24
+ from datahub.configuration.env_vars import get_no_color, get_suppress_logging_manager
25
25
  from datahub.utilities.tee_io import TeeIO
26
26
 
27
27
  BASE_LOGGING_FORMAT = (
@@ -38,7 +38,7 @@ IN_MEMORY_LOG_BUFFER_SIZE = 2000 # lines
38
38
  IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH = 2000 # characters
39
39
 
40
40
 
41
- NO_COLOR = os.environ.get("NO_COLOR", False)
41
+ NO_COLOR = get_no_color()
42
42
 
43
43
 
44
44
  def extract_name_from_filename(filename: str, fallback_name: str) -> str:
@@ -179,6 +179,18 @@ class _LogBuffer:
179
179
  return text
180
180
 
181
181
 
182
+ class _ResilientStreamHandler(logging.StreamHandler):
183
+ """StreamHandler that gracefully handles closed streams."""
184
+
185
+ def emit(self, record: logging.LogRecord) -> None:
186
+ try:
187
+ super().emit(record)
188
+ except (ValueError, OSError):
189
+ # Stream was closed (e.g., during pytest teardown)
190
+ # Silently ignore to prevent test failures
191
+ pass
192
+
193
+
182
194
  class _BufferLogHandler(logging.Handler):
183
195
  def __init__(self, storage: _LogBuffer) -> None:
184
196
  super().__init__()
@@ -201,7 +213,11 @@ class _BufferLogHandler(logging.Handler):
201
213
  def _remove_all_handlers(logger: logging.Logger) -> None:
202
214
  for handler in logger.handlers[:]:
203
215
  logger.removeHandler(handler)
204
- handler.close()
216
+ try:
217
+ handler.close()
218
+ except (ValueError, OSError):
219
+ # Handler stream may already be closed (e.g., during pytest teardown)
220
+ pass
205
221
 
206
222
 
207
223
  _log_buffer = _LogBuffer(maxlen=IN_MEMORY_LOG_BUFFER_SIZE)
@@ -219,14 +235,14 @@ _default_formatter = logging.Formatter(BASE_LOGGING_FORMAT)
219
235
  def configure_logging(debug: bool, log_file: Optional[str] = None) -> Iterator[None]:
220
236
  _log_buffer.clear()
221
237
 
222
- if os.environ.get("DATAHUB_SUPPRESS_LOGGING_MANAGER") == "1":
238
+ if get_suppress_logging_manager() == "1":
223
239
  # If we're running in pytest, we don't want to configure logging.
224
240
  yield
225
241
  return
226
242
 
227
243
  with contextlib.ExitStack() as stack:
228
244
  # Create stdout handler.
229
- stream_handler = logging.StreamHandler()
245
+ stream_handler = _ResilientStreamHandler()
230
246
  stream_handler.addFilter(_DatahubLogFilter(debug=debug))
231
247
  stream_handler.setFormatter(_stream_formatter)
232
248
 
@@ -237,7 +253,7 @@ def configure_logging(debug: bool, log_file: Optional[str] = None) -> Iterator[N
237
253
  tee = TeeIO(sys.stdout, file)
238
254
  stack.enter_context(contextlib.redirect_stdout(tee)) # type: ignore
239
255
 
240
- file_handler = logging.StreamHandler(file)
256
+ file_handler = _ResilientStreamHandler(file)
241
257
  file_handler.addFilter(_DatahubLogFilter(debug=True))
242
258
  file_handler.setFormatter(_default_formatter)
243
259
  else:
@@ -83,7 +83,7 @@ class Constants:
83
83
  MATCH = "match"
84
84
  USER_OWNER = "user"
85
85
  GROUP_OWNER = "group"
86
- OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float]
86
+ OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float, list]
87
87
  TAG_PARTITION_KEY = "PARTITION_KEY"
88
88
  TAG_DIST_KEY = "DIST_KEY"
89
89
  TAG_SORT_KEY = "SORT_KEY"
@@ -455,7 +455,34 @@ class OperationProcessor:
455
455
  # function to check if a match clause is satisfied to a value.
456
456
  if not any(
457
457
  isinstance(raw_props_value, t) for t in Constants.OPERAND_DATATYPE_SUPPORTED
458
- ) or not isinstance(raw_props_value, type(match_clause)):
458
+ ):
459
+ return None
460
+
461
+ # Handle list values by checking if any item in the list matches
462
+ if isinstance(raw_props_value, list):
463
+ # For lists, we need to find at least one matching item
464
+ # Return a match with the concatenated values of all matching items
465
+ matching_items = []
466
+ for item in raw_props_value:
467
+ if isinstance(item, str):
468
+ match = re.match(match_clause, item)
469
+ if match:
470
+ matching_items.append(item)
471
+ elif isinstance(match_clause, type(item)):
472
+ match = re.match(str(match_clause), str(item))
473
+ if match:
474
+ matching_items.append(str(item))
475
+
476
+ if matching_items:
477
+ # Create a synthetic match object with all matching items joined
478
+ combined_value = ",".join(matching_items)
479
+ return re.match(
480
+ ".*", combined_value
481
+ ) # Always matches, returns combined value
482
+ return None
483
+
484
+ # Handle scalar values (existing logic)
485
+ elif not isinstance(raw_props_value, type(match_clause)):
459
486
  return None
460
487
  elif isinstance(raw_props_value, str):
461
488
  return re.match(match_clause, raw_props_value)
@@ -1,12 +1,13 @@
1
- import os
2
1
  import pathlib
3
2
  import tempfile
4
3
 
5
4
  import requests
6
5
 
7
- DOCKER_COMPOSE_BASE = os.getenv(
8
- "DOCKER_COMPOSE_BASE",
9
- "https://raw.githubusercontent.com/datahub-project/datahub/master",
6
+ from datahub.configuration.env_vars import get_docker_compose_base
7
+
8
+ DOCKER_COMPOSE_BASE = (
9
+ get_docker_compose_base()
10
+ or "https://raw.githubusercontent.com/datahub-project/datahub/master"
10
11
  )
11
12
  BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json"
12
13
  BOOTSTRAP_MCES_URL = f"{DOCKER_COMPOSE_BASE}/{BOOTSTRAP_MCES_FILE}"
@@ -183,6 +183,14 @@ class RestServiceConfig:
183
183
  managed_ingestion = self.raw_config.get("managedIngestion") or {}
184
184
  return managed_ingestion.get("enabled", False)
185
185
 
186
+ @property
187
+ def default_cli_version(self) -> Optional[str]:
188
+ """
189
+ Get the default CLI version.
190
+ """
191
+ managed_ingestion = self.raw_config.get("managedIngestion") or {}
192
+ return managed_ingestion.get("defaultCliVersion")
193
+
186
194
  @property
187
195
  def is_datahub_cloud(self) -> bool:
188
196
  """
@@ -234,7 +242,8 @@ class RestServiceConfig:
234
242
 
235
243
  # Check if this is a config-based feature
236
244
  if feature in config_based_features:
237
- return config_based_features[feature]()
245
+ result = config_based_features[feature]()
246
+ return bool(result) if result is not None else False
238
247
 
239
248
  # For environment-based features, determine requirements based on cloud vs. non-cloud
240
249
  deployment_type = "cloud" if self.is_datahub_cloud else "core"
@@ -272,8 +272,11 @@ class SQLAlchemyQueryCombiner:
272
272
  self.report.uncombined_queries_issued += 1
273
273
  return _sa_execute_underlying_method(conn, query, *args, **kwargs)
274
274
 
275
- with _sa_execute_method_patching_lock, unittest.mock.patch(
276
- "sqlalchemy.engine.Connection.execute", _sa_execute_fake
275
+ with (
276
+ _sa_execute_method_patching_lock,
277
+ unittest.mock.patch(
278
+ "sqlalchemy.engine.Connection.execute", _sa_execute_fake
279
+ ),
277
280
  ):
278
281
  yield self
279
282
 
@@ -56,3 +56,7 @@ class TopKDict(DefaultDict[_KT, _VT]):
56
56
 
57
57
  def int_top_k_dict() -> TopKDict[str, int]:
58
58
  return TopKDict(int)
59
+
60
+
61
+ def float_top_k_dict() -> TopKDict[str, float]:
62
+ return TopKDict(float)
@@ -1,8 +1,47 @@
1
- from datahub.metadata.urns import Urn
1
+ from typing import Optional
2
2
 
3
- __all__ = ["Urn", "guess_entity_type"]
3
+ from datahub.metadata.urns import (
4
+ DataPlatformUrn,
5
+ Urn,
6
+ )
7
+
8
+ __all__ = ["Urn", "guess_entity_type", "guess_platform_name"]
4
9
 
5
10
 
6
11
  def guess_entity_type(urn: str) -> str:
7
12
  assert urn.startswith("urn:li:"), "urns must start with urn:li:"
8
13
  return urn.split(":")[2]
14
+
15
+
16
+ def guess_platform_name(urn: str) -> Optional[str]:
17
+ """Extract platform from URN using a mapping dictionary."""
18
+ urn_obj = Urn.from_string(urn)
19
+
20
+ try:
21
+ platform = None
22
+ try:
23
+ platform = urn_obj.platform # type: ignore[attr-defined]
24
+ platform_name = DataPlatformUrn.from_string(
25
+ platform
26
+ ).get_entity_id_as_string()
27
+ return platform_name
28
+ except AttributeError:
29
+ pass
30
+ try:
31
+ return urn_obj.orchestrator # type: ignore[attr-defined]
32
+ except AttributeError:
33
+ pass
34
+ try:
35
+ return urn_obj.dashboard_tool # type: ignore[attr-defined]
36
+ except AttributeError:
37
+ pass
38
+ try:
39
+ return urn_obj.ml_model_tool # type: ignore[attr-defined]
40
+ except AttributeError:
41
+ pass
42
+
43
+ if platform is None:
44
+ return None
45
+ except AttributeError:
46
+ pass
47
+ return None