acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/emitter/mcp.py CHANGED
@@ -1,9 +1,11 @@
1
1
  import dataclasses
2
2
  import json
3
- from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union
3
+ import warnings
4
+ from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
4
5
 
5
6
  from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
6
7
  from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
8
+ from datahub.errors import DataHubDeprecationWarning
7
9
  from datahub.metadata.schema_classes import (
8
10
  ChangeTypeClass,
9
11
  DictWrapper,
@@ -69,18 +71,28 @@ class MetadataChangeProposalWrapper:
69
71
  aspectName: Union[None, str] = None
70
72
  aspect: Union[None, _Aspect] = None
71
73
  systemMetadata: Union[None, SystemMetadataClass] = None
74
+ headers: Union[None, Dict[str, str]] = None
72
75
 
73
76
  def __post_init__(self) -> None:
74
77
  if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
75
78
  self.entityType = guess_entity_type(self.entityUrn)
76
79
  elif self.entityUrn and self.entityType:
77
- guessed_entity_type = guess_entity_type(self.entityUrn).lower()
78
- # Entity type checking is actually case insensitive.
79
- # Note that urns are case sensitive, but entity types are not.
80
- if self.entityType.lower() != guessed_entity_type:
80
+ guessed_entity_type = guess_entity_type(self.entityUrn)
81
+ if self.entityType.lower() != guessed_entity_type.lower():
82
+ # If they aren't a case-ignored match, raise an error.
81
83
  raise ValueError(
82
84
  f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}",
83
85
  )
86
+ elif self.entityType != guessed_entity_type:
87
+ # If they only differ in case, normalize and print a warning.
88
+ self.entityType = guessed_entity_type
89
+ warnings.warn(
90
+ f"The passed entityType {self.entityType} differs in case from the expected entity type {guessed_entity_type}. "
91
+ "This will be automatically corrected for now, but will become an error in a future release. "
92
+ "Note that the entityType field is optional and will be automatically inferred from the entityUrn.",
93
+ DataHubDeprecationWarning,
94
+ stacklevel=3,
95
+ )
84
96
  elif self.entityType == _ENTITY_TYPE_UNSET:
85
97
  raise ValueError("entityType must be set if entityUrn is not set")
86
98
 
@@ -112,6 +124,7 @@ class MetadataChangeProposalWrapper:
112
124
  auditHeader=self.auditHeader,
113
125
  aspectName=self.aspectName,
114
126
  systemMetadata=self.systemMetadata,
127
+ headers=self.headers,
115
128
  )
116
129
 
117
130
  def make_mcp(self) -> MetadataChangeProposalClass:
@@ -211,6 +224,7 @@ class MetadataChangeProposalWrapper:
211
224
  aspectName=mcpc.aspectName,
212
225
  aspect=aspect,
213
226
  systemMetadata=mcpc.systemMetadata,
227
+ headers=mcpc.headers,
214
228
  )
215
229
  else:
216
230
  return None
@@ -228,6 +242,7 @@ class MetadataChangeProposalWrapper:
228
242
  changeType=mcl.changeType,
229
243
  auditHeader=mcl.auditHeader,
230
244
  systemMetadata=mcl.systemMetadata,
245
+ headers=mcl.headers,
231
246
  )
232
247
  return cls.try_from_mcpc(mcpc) or mcpc
233
248
 
@@ -117,6 +117,14 @@ class ContainerKey(DatahubKey):
117
117
  PlatformKey = ContainerKey
118
118
 
119
119
 
120
+ class NamespaceKey(ContainerKey):
121
+ """
122
+ For Iceberg namespaces (databases/schemas)
123
+ """
124
+
125
+ namespace: str
126
+
127
+
120
128
  class DatabaseKey(ContainerKey):
121
129
  database: str
122
130
 
@@ -129,6 +137,10 @@ class ProjectIdKey(ContainerKey):
129
137
  project_id: str
130
138
 
131
139
 
140
+ class ExperimentKey(ContainerKey):
141
+ id: str
142
+
143
+
132
144
  class MetastoreKey(ContainerKey):
133
145
  metastore: str
134
146
 
@@ -1,28 +1,151 @@
1
- import itertools
1
+ import json
2
2
  import shlex
3
- from typing import List, Union
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, List, Optional, Union
4
5
 
5
6
  import requests
7
+ from requests.auth import HTTPBasicAuth
8
+
9
+ from datahub.emitter.aspect import JSON_CONTENT_TYPE, JSON_PATCH_CONTENT_TYPE
10
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
11
+ from datahub.emitter.serialization_helper import pre_json_transform
12
+ from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
13
+ MetadataChangeProposal,
14
+ )
15
+ from datahub.metadata.schema_classes import ChangeTypeClass
16
+
17
+
18
+ def _decode_bytes(value: Union[str, bytes]) -> str:
19
+ """Decode bytes to string, if necessary."""
20
+ if isinstance(value, bytes):
21
+ return value.decode()
22
+ return value
6
23
 
7
24
 
8
25
  def _format_header(name: str, value: Union[str, bytes]) -> str:
9
26
  if name == "Authorization":
10
27
  return f"{name!s}: <redacted>"
11
- return f"{name!s}: {value!s}"
28
+ return f"{name!s}: {_decode_bytes(value)}"
12
29
 
13
30
 
14
31
  def make_curl_command(
15
- session: requests.Session, method: str, url: str, payload: str
32
+ session: requests.Session, method: str, url: str, payload: Optional[str] = None
16
33
  ) -> str:
17
- fragments: List[str] = [
18
- "curl",
19
- *itertools.chain(
20
- *[
21
- ("-X", method),
22
- *[("-H", _format_header(k, v)) for (k, v) in session.headers.items()],
23
- ("--data", payload),
24
- ]
25
- ),
26
- url,
27
- ]
34
+ fragments: List[str] = ["curl", "-X", method]
35
+
36
+ for header_name, header_value in session.headers.items():
37
+ fragments.extend(["-H", _format_header(header_name, header_value)])
38
+
39
+ if session.auth:
40
+ if isinstance(session.auth, HTTPBasicAuth):
41
+ fragments.extend(
42
+ ["-u", f"{_decode_bytes(session.auth.username)}:<redacted>"]
43
+ )
44
+ else:
45
+ # For other auth types, they should be handled via headers
46
+ fragments.extend(["-H", "<unknown auth type>"])
47
+
48
+ if payload:
49
+ fragments.extend(["--data", payload])
50
+
51
+ fragments.append(url)
28
52
  return shlex.join(fragments)
53
+
54
+
55
+ @dataclass
56
+ class OpenApiRequest:
57
+ """Represents an OpenAPI request for entity operations."""
58
+
59
+ method: str
60
+ url: str
61
+ payload: List[Dict[str, Any]]
62
+
63
+ @classmethod
64
+ def from_mcp(
65
+ cls,
66
+ mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
67
+ gms_server: str,
68
+ async_flag: bool = False,
69
+ search_sync_flag: bool = False,
70
+ ) -> Optional["OpenApiRequest"]:
71
+ """Factory method to create an OpenApiRequest from a MetadataChangeProposal."""
72
+ if not mcp.aspectName or (
73
+ mcp.changeType != ChangeTypeClass.DELETE and not mcp.aspect
74
+ ):
75
+ return None
76
+
77
+ method = "post"
78
+ url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}?async={'true' if async_flag else 'false'}"
79
+ payload = []
80
+
81
+ if mcp.changeType == ChangeTypeClass.DELETE:
82
+ method = "delete"
83
+ url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}/{mcp.entityUrn}"
84
+ else:
85
+ if mcp.aspect:
86
+ mcp_headers = {}
87
+
88
+ if not async_flag and search_sync_flag:
89
+ mcp_headers["X-DataHub-Sync-Index-Update"] = "true"
90
+
91
+ if mcp.changeType == ChangeTypeClass.PATCH:
92
+ method = "patch"
93
+ obj = mcp.aspect.to_obj()
94
+ content_type = obj.get("contentType")
95
+ if obj.get("value") and content_type == JSON_PATCH_CONTENT_TYPE:
96
+ # Undo double serialization.
97
+ obj = json.loads(obj["value"])
98
+ patch_value = obj
99
+ else:
100
+ raise NotImplementedError(
101
+ f"ChangeType {mcp.changeType} only supports context type {JSON_PATCH_CONTENT_TYPE}, found {content_type}."
102
+ )
103
+
104
+ if isinstance(patch_value, list):
105
+ patch_value = {"patch": patch_value}
106
+
107
+ payload = [
108
+ {
109
+ "urn": mcp.entityUrn,
110
+ mcp.aspectName: {
111
+ "value": patch_value,
112
+ "systemMetadata": mcp.systemMetadata.to_obj()
113
+ if mcp.systemMetadata
114
+ else None,
115
+ "headers": mcp_headers,
116
+ },
117
+ }
118
+ ]
119
+ else:
120
+ if isinstance(mcp, MetadataChangeProposalWrapper):
121
+ aspect_value = pre_json_transform(
122
+ mcp.to_obj(simplified_structure=True)
123
+ )["aspect"]["json"]
124
+ else:
125
+ obj = mcp.aspect.to_obj()
126
+ content_type = obj.get("contentType")
127
+ if obj.get("value") and content_type == JSON_CONTENT_TYPE:
128
+ # Undo double serialization.
129
+ obj = json.loads(obj["value"])
130
+ elif content_type == JSON_PATCH_CONTENT_TYPE:
131
+ raise NotImplementedError(
132
+ f"ChangeType {mcp.changeType} does not support patch."
133
+ )
134
+ aspect_value = pre_json_transform(obj)
135
+
136
+ payload = [
137
+ {
138
+ "urn": mcp.entityUrn,
139
+ mcp.aspectName: {
140
+ "value": aspect_value,
141
+ "systemMetadata": mcp.systemMetadata.to_obj()
142
+ if mcp.systemMetadata
143
+ else None,
144
+ "headers": mcp_headers,
145
+ },
146
+ }
147
+ ]
148
+ else:
149
+ raise ValueError(f"ChangeType {mcp.changeType} requires a value.")
150
+
151
+ return cls(method=method, url=url, payload=payload)
@@ -1,38 +1,124 @@
1
1
  import json
2
2
  import logging
3
+ import re
4
+ import warnings
3
5
  from dataclasses import dataclass
6
+ from datetime import datetime, timezone
4
7
  from typing import Dict, List, Optional, Sequence, Union
5
8
 
6
9
  from requests import Response
7
10
 
8
11
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
12
+ from datahub.errors import APITracingWarning
9
13
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
10
14
  MetadataChangeProposal,
11
15
  )
12
16
 
13
17
  logger = logging.getLogger(__name__)
14
18
 
19
+ _TRACE_HEADER_NAME = "traceparent"
20
+
15
21
 
16
22
  @dataclass
17
23
  class TraceData:
18
24
  trace_id: str
19
25
  data: Dict[str, List[str]]
20
26
 
27
+ @staticmethod
28
+ def extract_trace_id(input_str: Optional[str]) -> Optional[str]:
29
+ """
30
+ Extract the trace ID from various input formats.
31
+
32
+ Args:
33
+ input_str (Optional[str]): Input string potentially containing a trace ID
34
+
35
+ Returns:
36
+ Optional[str]: Extracted trace ID or None if no valid trace ID found
37
+ """
38
+ # Handle None or empty input
39
+ if input_str is None or not str(input_str).strip():
40
+ return None
41
+
42
+ # Convert to string and clean
43
+ input_str = str(input_str).strip()
44
+
45
+ # Special case for test scenarios
46
+ if input_str == "test-trace-id":
47
+ return input_str
48
+
49
+ # Case 1: Full traceparent header (containing hyphens)
50
+ if "-" in input_str:
51
+ parts = input_str.split("-")
52
+ if len(parts) >= 2:
53
+ # The trace ID is the second part (index 1)
54
+ return parts[1]
55
+ return None
56
+
57
+ # Case 2: Direct trace ID (32 hex characters)
58
+ if len(input_str) == 32 and re.match(r"^[0-9a-fA-F]+$", input_str):
59
+ return input_str
60
+
61
+ # Fallback: return the original input if it doesn't match strict criteria
62
+ return input_str
63
+
21
64
  def __post_init__(self) -> None:
22
- if not self.trace_id:
65
+ """
66
+ Validate and potentially process the trace_id during initialization.
67
+ """
68
+ # Explicitly check for None or empty string
69
+ if self.trace_id is None or self.trace_id == "":
23
70
  raise ValueError("trace_id cannot be empty")
71
+
72
+ # Allow extracting trace ID from various input formats
73
+ extracted_id = self.extract_trace_id(self.trace_id)
74
+ if extracted_id is None:
75
+ raise ValueError("Invalid trace_id format")
76
+
77
+ # Update trace_id with the extracted version
78
+ self.trace_id = extracted_id
79
+
80
+ # Validate data
24
81
  if not isinstance(self.data, dict):
25
82
  raise TypeError("data must be a dictionary")
26
83
 
84
+ def extract_timestamp(self) -> datetime:
85
+ """
86
+ Extract the timestamp from a trace ID generated by the TraceIdGenerator.
87
+
88
+ Returns:
89
+ datetime: The timestamp in UTC
27
90
 
28
- def _extract_trace_id(
29
- response: Response, trace_header: str = "traceparent"
30
- ) -> Optional[str]:
91
+ Raises:
92
+ ValueError: If the trace ID is invalid
93
+ """
94
+ # Special case for test trace ID
95
+ if self.trace_id == "test-trace-id":
96
+ return datetime.fromtimestamp(0, tz=timezone.utc)
97
+
98
+ # Validate trace ID length for hex-based trace IDs
99
+ if len(self.trace_id) < 16 or not re.match(
100
+ r"^[0-9a-fA-F]+$", self.trace_id[:16]
101
+ ):
102
+ raise ValueError("Invalid trace ID format")
103
+
104
+ # Extract the first 16 hex characters representing timestamp in microseconds
105
+ timestamp_micros_hex = self.trace_id[:16]
106
+
107
+ # Convert hex to integer
108
+ timestamp_micros = int(timestamp_micros_hex, 16)
109
+
110
+ # Convert microseconds to milliseconds
111
+ timestamp_millis = timestamp_micros // 1000
112
+
113
+ # Convert to datetime in UTC
114
+ return datetime.fromtimestamp(timestamp_millis / 1000, tz=timezone.utc)
115
+
116
+
117
+ def _extract_trace_id(response: Response) -> Optional[str]:
31
118
  """
32
119
  Extract trace ID from response headers.
33
120
  Args:
34
121
  response: HTTP response object
35
- trace_header: Name of the trace header to use
36
122
  Returns:
37
123
  Trace ID if found and response is valid, None otherwise
38
124
  """
@@ -40,9 +126,17 @@ def _extract_trace_id(
40
126
  logger.debug(f"Invalid status code: {response.status_code}")
41
127
  return None
42
128
 
43
- trace_id = response.headers.get(trace_header)
129
+ trace_id = response.headers.get(_TRACE_HEADER_NAME)
44
130
  if not trace_id:
45
- logger.debug(f"Missing trace header: {trace_header}")
131
+ # This will only be printed if
132
+ # 1. we're in async mode (checked by the caller)
133
+ # 2. the server did not return a trace ID
134
+ logger.debug(f"Missing trace header: {_TRACE_HEADER_NAME}")
135
+ warnings.warn(
136
+ "No trace ID found in response headers. API tracing is not active - likely due to an outdated server version.",
137
+ APITracingWarning,
138
+ stacklevel=3,
139
+ )
46
140
  return None
47
141
 
48
142
  return trace_id
@@ -51,20 +145,19 @@ def _extract_trace_id(
51
145
  def extract_trace_data(
52
146
  response: Response,
53
147
  aspects_to_trace: Optional[List[str]] = None,
54
- trace_header: str = "traceparent",
55
148
  ) -> Optional[TraceData]:
56
- """
57
- Extract trace data from a response object.
149
+ """Extract trace data from a response object.
150
+
151
+ If we run into a JSONDecodeError, we'll log an error and return None.
152
+
58
153
  Args:
59
154
  response: HTTP response object
60
155
  aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
61
- trace_header: Name of the trace header to use (default: "traceparent")
156
+
62
157
  Returns:
63
158
  TraceData object if successful, None otherwise
64
- Raises:
65
- JSONDecodeError: If response body cannot be decoded as JSON
66
159
  """
67
- trace_id = _extract_trace_id(response, trace_header)
160
+ trace_id = _extract_trace_id(response)
68
161
  if not trace_id:
69
162
  return None
70
163
 
@@ -104,19 +197,18 @@ def extract_trace_data_from_mcps(
104
197
  response: Response,
105
198
  mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
106
199
  aspects_to_trace: Optional[List[str]] = None,
107
- trace_header: str = "traceparent",
108
200
  ) -> Optional[TraceData]:
109
- """
110
- Extract trace data from a response object and populate data from provided MCPs.
201
+ """Extract trace data from a response object and populate data from provided MCPs.
202
+
111
203
  Args:
112
204
  response: HTTP response object used only for trace_id extraction
113
205
  mcps: List of MCP URN and aspect data
114
206
  aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
115
- trace_header: Name of the trace header to use (default: "traceparent")
207
+
116
208
  Returns:
117
209
  TraceData object if successful, None otherwise
118
210
  """
119
- trace_id = _extract_trace_id(response, trace_header)
211
+ trace_id = _extract_trace_id(response)
120
212
  if not trace_id:
121
213
  return None
122
214