acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,16 @@
1
1
  import logging
2
2
  import pathlib
3
3
  from pathlib import Path
4
+ from typing import Optional
4
5
 
5
6
  import click
6
7
  from click_default_group import DefaultGroup
7
8
 
8
9
  from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
9
10
  from datahub.cli.specific.file_loader import load_file
10
- from datahub.ingestion.graph.client import get_default_graph
11
+ from datahub.configuration.common import OperationalError
12
+ from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
11
13
  from datahub.ingestion.graph.config import ClientMode
12
- from datahub.telemetry import telemetry
13
14
  from datahub.upgrade import upgrade
14
15
 
15
16
  logger = logging.getLogger(__name__)
@@ -33,7 +34,6 @@ def user() -> None:
33
34
  help="Use this flag to overwrite the information that is set via the UI",
34
35
  )
35
36
  @upgrade.check_upgrade
36
- @telemetry.with_telemetry()
37
37
  def upsert(file: Path, override_editable: bool) -> None:
38
38
  """Create or Update a User in DataHub"""
39
39
 
@@ -57,3 +57,172 @@ def upsert(file: Path, override_editable: bool) -> None:
57
57
  f"Update failed for id {user_config.get('id')}. due to {e}",
58
58
  fg="red",
59
59
  )
60
+
61
+
62
+ def validate_user_id_options(
63
+ user_id: Optional[str], email_as_id: bool, email: str
64
+ ) -> str:
65
+ """
66
+ Validate user ID options and return the final user ID to use.
67
+
68
+ Args:
69
+ user_id: Optional explicit user ID
70
+ email_as_id: Whether to use email as the user ID
71
+ email: User's email address
72
+
73
+ Returns:
74
+ The final user ID to use for the URN
75
+
76
+ Raises:
77
+ ValueError: If validation fails (neither or both options provided)
78
+ """
79
+ if not user_id and not email_as_id:
80
+ raise ValueError("Must specify either --id or --email-as-id flag")
81
+
82
+ if user_id and email_as_id:
83
+ raise ValueError("Cannot specify both --id and --email-as-id flag")
84
+
85
+ if email_as_id:
86
+ return email
87
+
88
+ assert user_id is not None
89
+ return user_id
90
+
91
+
92
+ def create_native_user_in_datahub(
93
+ graph: DataHubGraph,
94
+ user_id: str,
95
+ email: str,
96
+ display_name: str,
97
+ password: str,
98
+ role: Optional[str] = None,
99
+ ) -> str:
100
+ """
101
+ Create a native DataHub user.
102
+
103
+ Args:
104
+ graph: DataHubGraph client
105
+ user_id: User identifier (used in URN)
106
+ email: User's email address
107
+ display_name: User's full display name
108
+ password: User's password
109
+ role: Optional role to assign (Admin, Editor, or Reader)
110
+
111
+ Returns:
112
+ The created user's URN
113
+
114
+ Raises:
115
+ ValueError: If user already exists or role is invalid
116
+ OperationalError: If user creation fails due to API/network errors
117
+ """
118
+ user_urn = f"urn:li:corpuser:{user_id}"
119
+
120
+ if graph.exists(user_urn):
121
+ raise ValueError(f"User with ID {user_id} already exists (urn: {user_urn})")
122
+
123
+ created_user_urn = graph.create_native_user(
124
+ user_id=user_id,
125
+ email=email,
126
+ display_name=display_name,
127
+ password=password,
128
+ role=role,
129
+ )
130
+
131
+ return created_user_urn
132
+
133
+
134
+ @user.command(name="add")
135
+ @click.option("--id", "user_id", type=str, help="User identifier (used in URN)")
136
+ @click.option("--email", required=True, type=str, help="User's email address")
137
+ @click.option(
138
+ "--email-as-id",
139
+ is_flag=True,
140
+ default=False,
141
+ help="Use email address as user ID (alternative to --id)",
142
+ )
143
+ @click.option(
144
+ "--display-name", required=True, type=str, help="User's full display name"
145
+ )
146
+ @click.option(
147
+ "--password",
148
+ is_flag=True,
149
+ default=False,
150
+ help="Prompt for password (hidden input)",
151
+ )
152
+ @click.option(
153
+ "--role",
154
+ required=False,
155
+ type=click.Choice(
156
+ ["Admin", "Editor", "Reader", "admin", "editor", "reader"], case_sensitive=False
157
+ ),
158
+ help="Optional role to assign (Admin, Editor, or Reader)",
159
+ )
160
+ @upgrade.check_upgrade
161
+ def add(
162
+ user_id: str,
163
+ email: str,
164
+ email_as_id: bool,
165
+ display_name: str,
166
+ password: bool,
167
+ role: str,
168
+ ) -> None:
169
+ """Create a native DataHub user with email/password authentication"""
170
+
171
+ try:
172
+ final_user_id = validate_user_id_options(user_id, email_as_id, email)
173
+ except ValueError as e:
174
+ click.secho(f"Error: {str(e)}", fg="red")
175
+ raise SystemExit(1) from e
176
+
177
+ if not password:
178
+ click.secho(
179
+ "Error: --password flag is required to prompt for password input",
180
+ fg="red",
181
+ )
182
+ raise SystemExit(1)
183
+
184
+ password_value = click.prompt(
185
+ "Enter password", hide_input=True, confirmation_prompt=True
186
+ )
187
+
188
+ with get_default_graph(ClientMode.CLI) as graph:
189
+ try:
190
+ created_user_urn = create_native_user_in_datahub(
191
+ graph, final_user_id, email, display_name, password_value, role
192
+ )
193
+
194
+ if role:
195
+ click.secho(
196
+ f"Successfully created user {final_user_id} with role {role.capitalize()} (URN: {created_user_urn})",
197
+ fg="green",
198
+ )
199
+ else:
200
+ click.secho(
201
+ f"Successfully created user {final_user_id} (URN: {created_user_urn})",
202
+ fg="green",
203
+ )
204
+ except ValueError as e:
205
+ click.secho(f"Error: {str(e)}", fg="red")
206
+ raise SystemExit(1) from e
207
+ except OperationalError as e:
208
+ error_msg = e.message if hasattr(e, "message") else str(e.args[0])
209
+ click.secho(f"Error: {error_msg}", fg="red")
210
+
211
+ if hasattr(e, "info") and e.info:
212
+ logger.debug(f"Error details: {e.info}")
213
+ if "status_code" in e.info:
214
+ click.secho(f" HTTP Status: {e.info['status_code']}", fg="red")
215
+ if "response_text" in e.info:
216
+ click.secho(
217
+ f" Response: {e.info['response_text'][:200]}", fg="red"
218
+ )
219
+
220
+ click.secho(
221
+ "\nTip: Run with DATAHUB_DEBUG=1 environment variable for detailed logs",
222
+ fg="yellow",
223
+ )
224
+ raise SystemExit(1) from e
225
+ except Exception as e:
226
+ click.secho(f"Unexpected error: {str(e)}", fg="red")
227
+ logger.exception("Unexpected error during user creation")
228
+ raise SystemExit(1) from e
datahub/cli/state_cli.py CHANGED
@@ -6,7 +6,6 @@ from click_default_group import DefaultGroup
6
6
 
7
7
  from datahub.ingestion.graph.client import get_default_graph
8
8
  from datahub.ingestion.graph.config import ClientMode
9
- from datahub.telemetry import telemetry
10
9
  from datahub.upgrade import upgrade
11
10
 
12
11
  logger = logging.getLogger(__name__)
@@ -22,7 +21,6 @@ def state() -> None:
22
21
  @click.option("--pipeline-name", required=True, type=str)
23
22
  @click.option("--platform", required=True, type=str)
24
23
  @upgrade.check_upgrade
25
- @telemetry.with_telemetry()
26
24
  def inspect(pipeline_name: str, platform: str) -> None:
27
25
  """
28
26
  Get the latest stateful ingestion state for a given pipeline.
@@ -10,7 +10,6 @@ from requests import Response
10
10
  from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key
11
11
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
12
12
  from datahub.ingestion.graph.config import ClientMode
13
- from datahub.telemetry import telemetry
14
13
  from datahub.upgrade import upgrade
15
14
  from datahub.utilities.urns.urn import Urn
16
15
 
@@ -130,7 +129,6 @@ def get_timeline(
130
129
  @click.option("--raw", type=bool, is_flag=True, help="Show the raw diff")
131
130
  @click.pass_context
132
131
  @upgrade.check_upgrade
133
- @telemetry.with_telemetry()
134
132
  def timeline(
135
133
  ctx: Any,
136
134
  urn: str,
@@ -1,20 +1,25 @@
1
+ import dataclasses
1
2
  import re
2
3
  import unittest.mock
3
4
  from abc import ABC, abstractmethod
4
5
  from enum import auto
5
6
  from typing import (
6
7
  IO,
8
+ TYPE_CHECKING,
9
+ Annotated,
7
10
  Any,
8
11
  ClassVar,
9
12
  Dict,
10
13
  List,
11
14
  Optional,
12
15
  Type,
16
+ TypeVar,
13
17
  Union,
14
18
  runtime_checkable,
15
19
  )
16
20
 
17
21
  import pydantic
22
+ import pydantic_core
18
23
  from cached_property import cached_property
19
24
  from pydantic import BaseModel, Extra, ValidationError
20
25
  from pydantic.fields import Field
@@ -83,6 +88,29 @@ def redact_raw_config(obj: Any) -> Any:
83
88
  return obj
84
89
 
85
90
 
91
+ if TYPE_CHECKING:
92
+ AnyType = TypeVar("AnyType")
93
+ HiddenFromDocs = Annotated[AnyType, ...]
94
+ else:
95
+ HiddenFromDocs = pydantic.json_schema.SkipJsonSchema
96
+
97
+ LaxStr = Annotated[str, pydantic.BeforeValidator(lambda v: str(v))]
98
+
99
+
100
+ @dataclasses.dataclass(frozen=True)
101
+ class SupportedSources:
102
+ sources: List[str]
103
+
104
+ def __get_pydantic_json_schema__(
105
+ self,
106
+ core_schema: pydantic_core.core_schema.CoreSchema,
107
+ handler: pydantic.GetJsonSchemaHandler,
108
+ ) -> pydantic.json_schema.JsonSchemaValue:
109
+ json_schema = handler(core_schema)
110
+ json_schema.setdefault("schema_extra", {})["supported_sources"] = self.sources
111
+ return json_schema
112
+
113
+
86
114
  class ConfigModel(BaseModel):
87
115
  class Config:
88
116
  @staticmethod
@@ -141,6 +169,17 @@ class PermissiveConfigModel(ConfigModel):
141
169
  extra = Extra.allow
142
170
 
143
171
 
172
+ class ConnectionModel(BaseModel):
173
+ """Represents the config associated with a connection"""
174
+
175
+ class Config:
176
+ if PYDANTIC_VERSION_2:
177
+ extra = "allow"
178
+ else:
179
+ extra = Extra.allow
180
+ underscore_attrs_are_private = True
181
+
182
+
144
183
  class TransformerSemantics(ConfigEnum):
145
184
  """Describes semantics for aspect changes"""
146
185
 
@@ -334,4 +373,4 @@ class KeyValuePattern(ConfigModel):
334
373
 
335
374
 
336
375
  class VersionedConfig(ConfigModel):
337
- version: str = "1"
376
+ version: LaxStr = "1"
@@ -1,13 +1,16 @@
1
- from typing import Type
1
+ from typing import TYPE_CHECKING, Type
2
2
 
3
3
  import pydantic
4
4
 
5
5
  from datahub.ingestion.api.global_context import get_graph_context
6
6
 
7
+ if TYPE_CHECKING:
8
+ from pydantic.deprecated.class_validators import V1RootValidator
9
+
7
10
 
8
11
  def auto_connection_resolver(
9
12
  connection_field: str = "connection",
10
- ) -> classmethod:
13
+ ) -> "V1RootValidator":
11
14
  def _resolve_connection(cls: Type, values: dict) -> dict:
12
15
  if connection_field in values:
13
16
  connection_urn = values.pop(connection_field)
@@ -0,0 +1,331 @@
1
+ # ABOUTME: Central registry for all environment variables used in metadata-ingestion.
2
+ # ABOUTME: All environment variable reads should go through this module for discoverability and maintainability.
3
+
4
+ import os
5
+ from typing import Optional
6
+
7
+ # ============================================================================
8
+ # Core DataHub Configuration
9
+ # ============================================================================
10
+
11
+
12
+ def get_gms_url() -> Optional[str]:
13
+ """Complete GMS URL (takes precedence over separate host/port)."""
14
+ return os.getenv("DATAHUB_GMS_URL")
15
+
16
+
17
+ def get_gms_host() -> Optional[str]:
18
+ """GMS host (fallback for URL, deprecated)."""
19
+ return os.getenv("DATAHUB_GMS_HOST")
20
+
21
+
22
+ def get_gms_port() -> Optional[str]:
23
+ """GMS port number."""
24
+ return os.getenv("DATAHUB_GMS_PORT")
25
+
26
+
27
+ def get_gms_protocol() -> str:
28
+ """Protocol for GMS connection (http/https)."""
29
+ return os.getenv("DATAHUB_GMS_PROTOCOL", "http")
30
+
31
+
32
+ def get_gms_token() -> Optional[str]:
33
+ """Authentication token for GMS."""
34
+ return os.getenv("DATAHUB_GMS_TOKEN")
35
+
36
+
37
+ def get_system_client_id() -> Optional[str]:
38
+ """System client ID for OAuth/auth."""
39
+ return os.getenv("DATAHUB_SYSTEM_CLIENT_ID")
40
+
41
+
42
+ def get_system_client_secret() -> Optional[str]:
43
+ """System client secret for OAuth/auth."""
44
+ return os.getenv("DATAHUB_SYSTEM_CLIENT_SECRET")
45
+
46
+
47
+ def get_skip_config() -> bool:
48
+ """Skip loading config file (forces env variables)."""
49
+ return os.getenv("DATAHUB_SKIP_CONFIG", "").lower() == "true"
50
+
51
+
52
+ def get_gms_base_path() -> str:
53
+ """Base path for GMS API endpoints."""
54
+ return os.getenv("DATAHUB_GMS_BASE_PATH", "")
55
+
56
+
57
+ # ============================================================================
58
+ # REST Emitter Configuration
59
+ # ============================================================================
60
+
61
+
62
+ def get_rest_emitter_default_retry_max_times() -> str:
63
+ """Max retry attempts for failed requests."""
64
+ return os.getenv("DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES", "4")
65
+
66
+
67
+ def get_rest_emitter_batch_max_payload_bytes() -> int:
68
+ """Maximum payload size in bytes for batch operations."""
69
+ return int(
70
+ os.getenv("DATAHUB_REST_EMITTER_BATCH_MAX_PAYLOAD_BYTES", str(15 * 1024 * 1024))
71
+ )
72
+
73
+
74
+ def get_rest_emitter_batch_max_payload_length() -> int:
75
+ """Maximum number of MCPs per batch."""
76
+ return int(os.getenv("DATAHUB_REST_EMITTER_BATCH_MAX_PAYLOAD_LENGTH", "200"))
77
+
78
+
79
+ def get_emit_mode() -> Optional[str]:
80
+ """Emission mode (SYNC_PRIMARY, SYNC_WAIT, ASYNC, ASYNC_WAIT)."""
81
+ return os.getenv("DATAHUB_EMIT_MODE")
82
+
83
+
84
+ def get_rest_emitter_default_endpoint() -> Optional[str]:
85
+ """REST endpoint type (RESTLI or OPENAPI)."""
86
+ return os.getenv("DATAHUB_REST_EMITTER_DEFAULT_ENDPOINT")
87
+
88
+
89
+ def get_emitter_trace() -> bool:
90
+ """Enable detailed emitter tracing."""
91
+ return os.getenv("DATAHUB_EMITTER_TRACE", "").lower() == "true"
92
+
93
+
94
+ # ============================================================================
95
+ # REST Sink Configuration
96
+ # ============================================================================
97
+
98
+
99
+ def get_rest_sink_default_max_threads() -> int:
100
+ """Max thread pool size for async operations."""
101
+ return int(os.getenv("DATAHUB_REST_SINK_DEFAULT_MAX_THREADS", "15"))
102
+
103
+
104
+ def get_rest_sink_default_mode() -> Optional[str]:
105
+ """Sink mode (SYNC, ASYNC, ASYNC_BATCH)."""
106
+ return os.getenv("DATAHUB_REST_SINK_DEFAULT_MODE")
107
+
108
+
109
+ # ============================================================================
110
+ # Telemetry & Monitoring
111
+ # ============================================================================
112
+
113
+
114
+ def get_telemetry_timeout() -> str:
115
+ """Telemetry timeout in seconds."""
116
+ return os.getenv("DATAHUB_TELEMETRY_TIMEOUT", "10")
117
+
118
+
119
+ def get_sentry_dsn() -> Optional[str]:
120
+ """Sentry error tracking DSN."""
121
+ return os.getenv("SENTRY_DSN")
122
+
123
+
124
+ def get_sentry_environment() -> str:
125
+ """Sentry environment (dev/prod)."""
126
+ return os.getenv("SENTRY_ENVIRONMENT", "dev")
127
+
128
+
129
+ # ============================================================================
130
+ # Logging & Debug Configuration
131
+ # ============================================================================
132
+
133
+
134
+ def get_suppress_logging_manager() -> Optional[str]:
135
+ """Suppress DataHub logging manager initialization."""
136
+ return os.getenv("DATAHUB_SUPPRESS_LOGGING_MANAGER")
137
+
138
+
139
+ def get_no_color() -> bool:
140
+ """Disable colored logging output."""
141
+ return os.getenv("NO_COLOR", "").lower() == "true"
142
+
143
+
144
+ def get_test_mode() -> Optional[str]:
145
+ """Indicates running in test context."""
146
+ return os.getenv("DATAHUB_TEST_MODE")
147
+
148
+
149
+ def get_debug() -> bool:
150
+ """Enable debug mode."""
151
+ return os.getenv("DATAHUB_DEBUG", "").lower() == "true"
152
+
153
+
154
+ # ============================================================================
155
+ # Data Processing Configuration
156
+ # ============================================================================
157
+
158
+
159
+ def get_sql_agg_query_log() -> str:
160
+ """SQL aggregator query logging level."""
161
+ return os.getenv("DATAHUB_SQL_AGG_QUERY_LOG", "DISABLED")
162
+
163
+
164
+ def get_dataset_urn_to_lower() -> str:
165
+ """Convert dataset URNs to lowercase."""
166
+ return os.getenv("DATAHUB_DATASET_URN_TO_LOWER", "false")
167
+
168
+
169
+ # ============================================================================
170
+ # Integration-Specific Configuration
171
+ # ============================================================================
172
+
173
+
174
+ def get_kafka_schema_registry_url() -> Optional[str]:
175
+ """Kafka schema registry URL."""
176
+ return os.getenv("KAFKA_SCHEMAREGISTRY_URL")
177
+
178
+
179
+ def get_spark_version() -> Optional[str]:
180
+ """Spark version (for S3 source)."""
181
+ return os.getenv("SPARK_VERSION")
182
+
183
+
184
+ def get_bigquery_schema_parallelism() -> int:
185
+ """Parallelism level for BigQuery schema extraction."""
186
+ return int(os.getenv("DATAHUB_BIGQUERY_SCHEMA_PARALLELISM", "20"))
187
+
188
+
189
+ def get_snowflake_schema_parallelism() -> int:
190
+ """Parallelism level for Snowflake schema extraction."""
191
+ return int(os.getenv("DATAHUB_SNOWFLAKE_SCHEMA_PARALLELISM", "20"))
192
+
193
+
194
+ def get_powerbi_m_query_parse_timeout() -> int:
195
+ """Timeout for PowerBI M query parsing."""
196
+ return int(os.getenv("DATAHUB_POWERBI_M_QUERY_PARSE_TIMEOUT", "60"))
197
+
198
+
199
+ def get_trace_powerbi_mquery_parser() -> bool:
200
+ """Enable PowerBI M query parser tracing."""
201
+ return os.getenv("DATAHUB_TRACE_POWERBI_MQUERY_PARSER", "").lower() == "true"
202
+
203
+
204
+ def get_lookml_git_test_ssh_key() -> Optional[str]:
205
+ """SSH key for LookML Git tests."""
206
+ return os.getenv("DATAHUB_LOOKML_GIT_TEST_SSH_KEY")
207
+
208
+
209
+ # ============================================================================
210
+ # AWS/Cloud Configuration
211
+ # ============================================================================
212
+
213
+
214
+ def get_aws_lambda_function_name() -> Optional[str]:
215
+ """Indicates running in AWS Lambda."""
216
+ return os.getenv("AWS_LAMBDA_FUNCTION_NAME")
217
+
218
+
219
+ def get_aws_execution_env() -> Optional[str]:
220
+ """AWS execution environment."""
221
+ return os.getenv("AWS_EXECUTION_ENV")
222
+
223
+
224
+ def get_aws_web_identity_token_file() -> Optional[str]:
225
+ """OIDC token file path."""
226
+ return os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
227
+
228
+
229
+ def get_aws_role_arn() -> Optional[str]:
230
+ """AWS role ARN for OIDC."""
231
+ return os.getenv("AWS_ROLE_ARN")
232
+
233
+
234
+ def get_aws_app_runner_service_id() -> Optional[str]:
235
+ """AWS App Runner service ID."""
236
+ return os.getenv("AWS_APP_RUNNER_SERVICE_ID")
237
+
238
+
239
+ def get_ecs_container_metadata_uri_v4() -> Optional[str]:
240
+ """ECS metadata endpoint v4."""
241
+ return os.getenv("ECS_CONTAINER_METADATA_URI_V4")
242
+
243
+
244
+ def get_ecs_container_metadata_uri() -> Optional[str]:
245
+ """ECS metadata endpoint v3."""
246
+ return os.getenv("ECS_CONTAINER_METADATA_URI")
247
+
248
+
249
+ def get_elastic_beanstalk_environment_name() -> Optional[str]:
250
+ """Elastic Beanstalk environment."""
251
+ return os.getenv("ELASTIC_BEANSTALK_ENVIRONMENT_NAME")
252
+
253
+
254
+ # ============================================================================
255
+ # Docker & Local Development
256
+ # ============================================================================
257
+
258
+
259
+ def get_compose_project_name() -> str:
260
+ """Docker Compose project name."""
261
+ return os.getenv("DATAHUB_COMPOSE_PROJECT_NAME", "datahub")
262
+
263
+
264
+ def get_docker_compose_base() -> Optional[str]:
265
+ """Base path for Docker Compose files."""
266
+ return os.getenv("DOCKER_COMPOSE_BASE")
267
+
268
+
269
+ def get_datahub_version() -> Optional[str]:
270
+ """DataHub version (set during docker init)."""
271
+ return os.getenv("DATAHUB_VERSION")
272
+
273
+
274
+ def get_mapped_mysql_port() -> Optional[str]:
275
+ """MySQL port mapping (set during docker init)."""
276
+ return os.getenv("DATAHUB_MAPPED_MYSQL_PORT")
277
+
278
+
279
+ def get_mapped_kafka_broker_port() -> Optional[str]:
280
+ """Kafka broker port mapping (set during docker init)."""
281
+ return os.getenv("DATAHUB_MAPPED_KAFKA_BROKER_PORT")
282
+
283
+
284
+ def get_mapped_elastic_port() -> Optional[str]:
285
+ """Elasticsearch port mapping (set during docker init)."""
286
+ return os.getenv("DATAHUB_MAPPED_ELASTIC_PORT")
287
+
288
+
289
+ def get_metadata_service_auth_enabled() -> str:
290
+ """Enable/disable auth in Docker."""
291
+ return os.getenv("METADATA_SERVICE_AUTH_ENABLED", "false")
292
+
293
+
294
+ def get_ui_ingestion_default_cli_version() -> Optional[str]:
295
+ """CLI version for UI ingestion (set during init)."""
296
+ return os.getenv("UI_INGESTION_DEFAULT_CLI_VERSION")
297
+
298
+
299
+ # ============================================================================
300
+ # Utility & Helper Configuration
301
+ # ============================================================================
302
+
303
+
304
+ def get_datahub_component() -> str:
305
+ """Component name for user agent tracking."""
306
+ return os.getenv("DATAHUB_COMPONENT", "datahub")
307
+
308
+
309
+ def get_force_local_quickstart_mapping() -> str:
310
+ """Force local quickstart mapping file."""
311
+ return os.getenv("FORCE_LOCAL_QUICKSTART_MAPPING", "")
312
+
313
+
314
+ def get_dataproduct_external_url() -> Optional[str]:
315
+ """External URL for data products."""
316
+ return os.getenv("DATAHUB_DATAPRODUCT_EXTERNAL_URL")
317
+
318
+
319
+ def get_override_sqlite_version_req() -> str:
320
+ """Override SQLite version requirement."""
321
+ return os.getenv("OVERRIDE_SQLITE_VERSION_REQ", "")
322
+
323
+
324
+ def get_update_entity_registry() -> str:
325
+ """Update entity registry during tests."""
326
+ return os.getenv("UPDATE_ENTITY_REGISTRY", "false")
327
+
328
+
329
+ def get_ci() -> Optional[str]:
330
+ """Indicates running in CI environment."""
331
+ return os.getenv("CI")
@@ -1,15 +1,18 @@
1
- from typing import TypeVar, Union
1
+ from typing import TYPE_CHECKING, Type, TypeVar, Union
2
2
 
3
3
  import pydantic
4
4
 
5
5
  from datahub.ingestion.api.registry import import_path
6
6
 
7
- T = TypeVar("T")
7
+ if TYPE_CHECKING:
8
+ from pydantic.deprecated.class_validators import V1Validator
8
9
 
10
+ _T = TypeVar("_T")
9
11
 
10
- def _pydantic_resolver(v: Union[T, str]) -> T:
12
+
13
+ def _pydantic_resolver(cls: Type, v: Union[str, _T]) -> _T:
11
14
  return import_path(v) if isinstance(v, str) else v
12
15
 
13
16
 
14
- def pydantic_resolve_key(field: str) -> classmethod:
17
+ def pydantic_resolve_key(field: str) -> "V1Validator":
15
18
  return pydantic.validator(field, pre=True, allow_reuse=True)(_pydantic_resolver)