acryl-datahub 0.15.0.6rc2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (205) hide show
  1. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2522 -2493
  2. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +205 -192
  3. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
  5. datahub/_version.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +4 -3
  7. datahub/api/entities/dataset/dataset.py +731 -42
  8. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  9. datahub/cli/check_cli.py +72 -19
  10. datahub/cli/docker_cli.py +3 -3
  11. datahub/cli/iceberg_cli.py +31 -7
  12. datahub/cli/ingest_cli.py +30 -93
  13. datahub/cli/lite_cli.py +4 -2
  14. datahub/cli/specific/dataproduct_cli.py +1 -1
  15. datahub/cli/specific/dataset_cli.py +128 -14
  16. datahub/configuration/common.py +10 -2
  17. datahub/configuration/git.py +1 -3
  18. datahub/configuration/kafka.py +1 -1
  19. datahub/emitter/mce_builder.py +28 -13
  20. datahub/emitter/mcp_builder.py +4 -1
  21. datahub/emitter/response_helper.py +145 -0
  22. datahub/emitter/rest_emitter.py +323 -10
  23. datahub/ingestion/api/decorators.py +1 -1
  24. datahub/ingestion/api/source_helpers.py +4 -0
  25. datahub/ingestion/fs/s3_fs.py +2 -2
  26. datahub/ingestion/glossary/classification_mixin.py +1 -5
  27. datahub/ingestion/graph/client.py +41 -22
  28. datahub/ingestion/graph/entity_versioning.py +3 -3
  29. datahub/ingestion/graph/filters.py +64 -37
  30. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
  31. datahub/ingestion/run/pipeline.py +112 -148
  32. datahub/ingestion/run/sink_callback.py +77 -0
  33. datahub/ingestion/sink/datahub_rest.py +8 -0
  34. datahub/ingestion/source/abs/config.py +2 -4
  35. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
  36. datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
  37. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
  38. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
  39. datahub/ingestion/source/cassandra/cassandra.py +152 -233
  40. datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
  41. datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
  42. datahub/ingestion/source/common/subtypes.py +12 -0
  43. datahub/ingestion/source/csv_enricher.py +3 -3
  44. datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
  45. datahub/ingestion/source/dbt/dbt_common.py +8 -5
  46. datahub/ingestion/source/dbt/dbt_core.py +11 -9
  47. datahub/ingestion/source/dbt/dbt_tests.py +4 -8
  48. datahub/ingestion/source/delta_lake/config.py +8 -1
  49. datahub/ingestion/source/delta_lake/report.py +4 -2
  50. datahub/ingestion/source/delta_lake/source.py +20 -5
  51. datahub/ingestion/source/dremio/dremio_api.py +4 -8
  52. datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
  53. datahub/ingestion/source/dynamodb/dynamodb.py +6 -0
  54. datahub/ingestion/source/elastic_search.py +26 -6
  55. datahub/ingestion/source/feast.py +27 -8
  56. datahub/ingestion/source/file.py +6 -3
  57. datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
  58. datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
  59. datahub/ingestion/source/ge_data_profiler.py +12 -15
  60. datahub/ingestion/source/iceberg/iceberg.py +46 -12
  61. datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
  62. datahub/ingestion/source/identity/okta.py +37 -7
  63. datahub/ingestion/source/kafka/kafka.py +1 -1
  64. datahub/ingestion/source/kafka_connect/common.py +2 -7
  65. datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
  66. datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
  67. datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
  68. datahub/ingestion/source/looker/looker_common.py +6 -5
  69. datahub/ingestion/source/looker/looker_file_loader.py +2 -2
  70. datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
  71. datahub/ingestion/source/looker/looker_source.py +1 -1
  72. datahub/ingestion/source/looker/looker_template_language.py +4 -2
  73. datahub/ingestion/source/looker/lookml_source.py +3 -2
  74. datahub/ingestion/source/metabase.py +57 -35
  75. datahub/ingestion/source/metadata/business_glossary.py +45 -3
  76. datahub/ingestion/source/metadata/lineage.py +2 -2
  77. datahub/ingestion/source/mlflow.py +365 -35
  78. datahub/ingestion/source/mode.py +18 -8
  79. datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
  80. datahub/ingestion/source/nifi.py +37 -11
  81. datahub/ingestion/source/openapi.py +1 -1
  82. datahub/ingestion/source/openapi_parser.py +49 -17
  83. datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
  84. datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
  85. datahub/ingestion/source/powerbi/powerbi.py +1 -3
  86. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
  87. datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
  88. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
  89. datahub/ingestion/source/preset.py +7 -4
  90. datahub/ingestion/source/pulsar.py +3 -2
  91. datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
  92. datahub/ingestion/source/redash.py +31 -7
  93. datahub/ingestion/source/redshift/config.py +4 -0
  94. datahub/ingestion/source/redshift/datashares.py +236 -0
  95. datahub/ingestion/source/redshift/lineage.py +6 -2
  96. datahub/ingestion/source/redshift/lineage_v2.py +24 -9
  97. datahub/ingestion/source/redshift/profile.py +1 -1
  98. datahub/ingestion/source/redshift/query.py +133 -33
  99. datahub/ingestion/source/redshift/redshift.py +46 -73
  100. datahub/ingestion/source/redshift/redshift_schema.py +186 -6
  101. datahub/ingestion/source/redshift/report.py +3 -0
  102. datahub/ingestion/source/s3/config.py +5 -5
  103. datahub/ingestion/source/s3/source.py +20 -41
  104. datahub/ingestion/source/salesforce.py +550 -275
  105. datahub/ingestion/source/schema_inference/object.py +1 -1
  106. datahub/ingestion/source/sigma/sigma.py +1 -1
  107. datahub/ingestion/source/slack/slack.py +31 -10
  108. datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
  109. datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
  110. datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
  111. datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
  112. datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
  113. datahub/ingestion/source/sql/athena.py +10 -16
  114. datahub/ingestion/source/sql/druid.py +1 -5
  115. datahub/ingestion/source/sql/hive.py +15 -6
  116. datahub/ingestion/source/sql/hive_metastore.py +3 -2
  117. datahub/ingestion/source/sql/mssql/job_models.py +29 -0
  118. datahub/ingestion/source/sql/mssql/source.py +11 -5
  119. datahub/ingestion/source/sql/oracle.py +127 -63
  120. datahub/ingestion/source/sql/sql_common.py +16 -18
  121. datahub/ingestion/source/sql/sql_types.py +2 -2
  122. datahub/ingestion/source/sql/teradata.py +19 -5
  123. datahub/ingestion/source/sql/trino.py +2 -2
  124. datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
  125. datahub/ingestion/source/superset.py +222 -62
  126. datahub/ingestion/source/tableau/tableau.py +22 -6
  127. datahub/ingestion/source/tableau/tableau_common.py +3 -2
  128. datahub/ingestion/source/unity/ge_profiler.py +2 -1
  129. datahub/ingestion/source/unity/source.py +11 -1
  130. datahub/ingestion/source/vertexai.py +697 -0
  131. datahub/ingestion/source_config/pulsar.py +3 -1
  132. datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
  133. datahub/lite/duckdb_lite.py +3 -10
  134. datahub/lite/lite_local.py +1 -1
  135. datahub/lite/lite_util.py +4 -3
  136. datahub/metadata/_schema_classes.py +714 -417
  137. datahub/metadata/_urns/urn_defs.py +1673 -1649
  138. datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
  139. datahub/metadata/schema.avsc +16438 -16603
  140. datahub/metadata/schemas/AssertionInfo.avsc +3 -1
  141. datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
  142. datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
  143. datahub/metadata/schemas/ChartInfo.avsc +1 -0
  144. datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
  145. datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
  146. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  147. datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
  148. datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
  149. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
  150. datahub/metadata/schemas/DataProcessKey.avsc +2 -1
  151. datahub/metadata/schemas/DataProductKey.avsc +2 -1
  152. datahub/metadata/schemas/DomainKey.avsc +2 -1
  153. datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
  154. datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
  155. datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
  156. datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
  157. datahub/metadata/schemas/IncidentInfo.avsc +130 -46
  158. datahub/metadata/schemas/InputFields.avsc +3 -1
  159. datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
  160. datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
  161. datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
  162. datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
  163. datahub/metadata/schemas/MLModelKey.avsc +3 -1
  164. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
  165. datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
  166. datahub/metadata/schemas/PostKey.avsc +2 -1
  167. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  168. datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
  169. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
  170. datahub/metadata/schemas/VersionProperties.avsc +18 -0
  171. datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
  172. datahub/pydantic/__init__.py +0 -0
  173. datahub/pydantic/compat.py +58 -0
  174. datahub/sdk/__init__.py +30 -12
  175. datahub/sdk/_all_entities.py +1 -1
  176. datahub/sdk/_attribution.py +4 -0
  177. datahub/sdk/_shared.py +258 -16
  178. datahub/sdk/_utils.py +35 -0
  179. datahub/sdk/container.py +30 -6
  180. datahub/sdk/dataset.py +118 -20
  181. datahub/sdk/{_entity.py → entity.py} +24 -1
  182. datahub/sdk/entity_client.py +1 -1
  183. datahub/sdk/main_client.py +23 -0
  184. datahub/sdk/resolver_client.py +17 -29
  185. datahub/sdk/search_client.py +50 -0
  186. datahub/sdk/search_filters.py +374 -0
  187. datahub/specific/dataset.py +3 -4
  188. datahub/sql_parsing/_sqlglot_patch.py +2 -10
  189. datahub/sql_parsing/schema_resolver.py +1 -1
  190. datahub/sql_parsing/split_statements.py +220 -126
  191. datahub/sql_parsing/sql_parsing_common.py +7 -0
  192. datahub/sql_parsing/sqlglot_lineage.py +1 -1
  193. datahub/sql_parsing/sqlglot_utils.py +1 -4
  194. datahub/testing/check_sql_parser_result.py +5 -6
  195. datahub/testing/compare_metadata_json.py +7 -6
  196. datahub/testing/pytest_hooks.py +56 -0
  197. datahub/upgrade/upgrade.py +2 -2
  198. datahub/utilities/file_backed_collections.py +3 -14
  199. datahub/utilities/ingest_utils.py +106 -0
  200. datahub/utilities/mapping.py +1 -1
  201. datahub/utilities/memory_footprint.py +3 -2
  202. datahub/utilities/sentinels.py +22 -0
  203. datahub/utilities/unified_diff.py +5 -1
  204. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
  205. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,7 @@ from pydantic import BaseModel, Extra, ValidationError
20
20
  from pydantic.fields import Field
21
21
  from typing_extensions import Protocol, Self
22
22
 
23
- from datahub.configuration._config_enum import ConfigEnum as ConfigEnum # noqa: I250
23
+ from datahub.configuration._config_enum import ConfigEnum as ConfigEnum
24
24
  from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
25
25
  from datahub.utilities.dedup_list import deduplicate_list
26
26
 
@@ -130,7 +130,7 @@ class PermissiveConfigModel(ConfigModel):
130
130
  # It is usually used for argument bags that are passed through to third-party libraries.
131
131
 
132
132
  class Config:
133
- if PYDANTIC_VERSION_2:
133
+ if PYDANTIC_VERSION_2: # noqa: SIM108
134
134
  extra = "allow"
135
135
  else:
136
136
  extra = Extra.allow
@@ -198,6 +198,14 @@ class IgnorableError(MetaError):
198
198
  """An error that can be ignored."""
199
199
 
200
200
 
201
+ class TraceTimeoutError(OperationalError):
202
+ """Failure to complete an API Trace within the timeout."""
203
+
204
+
205
+ class TraceValidationError(OperationalError):
206
+ """Failure to complete the expected write operation."""
207
+
208
+
201
209
  @runtime_checkable
202
210
  class ExceptionWithProps(Protocol):
203
211
  def get_telemetry_props(self) -> Dict[str, Any]: ...
@@ -43,9 +43,7 @@ class GitReference(ConfigModel):
43
43
 
44
44
  @validator("repo", pre=True)
45
45
  def simplify_repo_url(cls, repo: str) -> str:
46
- if repo.startswith("github.com/"):
47
- repo = f"https://{repo}"
48
- elif repo.startswith("gitlab.com"):
46
+ if repo.startswith("github.com/") or repo.startswith("gitlab.com"):
49
47
  repo = f"https://{repo}"
50
48
  elif repo.count("/") == 1:
51
49
  repo = f"https://github.com/{repo}"
@@ -44,7 +44,7 @@ class KafkaConsumerConnectionConfig(_KafkaConnectionConfig):
44
44
  try:
45
45
  value = CallableConsumerConfig(value).callable_config()
46
46
  except Exception as e:
47
- raise ConfigurationError(e)
47
+ raise ConfigurationError(e) from e
48
48
  return value
49
49
 
50
50
 
@@ -52,7 +52,15 @@ from datahub.metadata.schema_classes import (
52
52
  UpstreamLineageClass,
53
53
  _Aspect as AspectAbstract,
54
54
  )
55
- from datahub.metadata.urns import DataFlowUrn, DatasetUrn, TagUrn
55
+ from datahub.metadata.urns import (
56
+ ChartUrn,
57
+ DashboardUrn,
58
+ DataFlowUrn,
59
+ DataJobUrn,
60
+ DataPlatformUrn,
61
+ DatasetUrn,
62
+ TagUrn,
63
+ )
56
64
  from datahub.utilities.urn_encoder import UrnEncoder
57
65
 
58
66
  logger = logging.getLogger(__name__)
@@ -119,7 +127,7 @@ def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
119
127
  def make_data_platform_urn(platform: str) -> str:
120
128
  if platform.startswith("urn:li:dataPlatform:"):
121
129
  return platform
122
- return f"urn:li:dataPlatform:{platform}"
130
+ return DataPlatformUrn.create_from_id(platform).urn()
123
131
 
124
132
 
125
133
  def make_dataset_urn(platform: str, name: str, env: str = DEFAULT_ENV) -> str:
@@ -236,7 +244,7 @@ def make_user_urn(username: str) -> str:
236
244
  Makes a user urn if the input is not a user or group urn already
237
245
  """
238
246
  return (
239
- f"urn:li:corpuser:{username}"
247
+ f"urn:li:corpuser:{UrnEncoder.encode_string(username)}"
240
248
  if not username.startswith(("urn:li:corpuser:", "urn:li:corpGroup:"))
241
249
  else username
242
250
  )
@@ -249,7 +257,7 @@ def make_group_urn(groupname: str) -> str:
249
257
  if groupname and groupname.startswith(("urn:li:corpGroup:", "urn:li:corpuser:")):
250
258
  return groupname
251
259
  else:
252
- return f"urn:li:corpGroup:{groupname}"
260
+ return f"urn:li:corpGroup:{UrnEncoder.encode_string(groupname)}"
253
261
 
254
262
 
255
263
  def make_tag_urn(tag: str) -> str:
@@ -301,7 +309,12 @@ def make_data_flow_urn(
301
309
 
302
310
 
303
311
  def make_data_job_urn_with_flow(flow_urn: str, job_id: str) -> str:
304
- return f"urn:li:dataJob:({flow_urn},{job_id})"
312
+ data_flow_urn = DataFlowUrn.from_string(flow_urn)
313
+ data_job_urn = DataJobUrn.create_from_ids(
314
+ data_flow_urn=data_flow_urn.urn(),
315
+ job_id=job_id,
316
+ )
317
+ return data_job_urn.urn()
305
318
 
306
319
 
307
320
  def make_data_process_instance_urn(dataProcessInstanceId: str) -> str:
@@ -324,10 +337,11 @@ def make_dashboard_urn(
324
337
  platform: str, name: str, platform_instance: Optional[str] = None
325
338
  ) -> str:
326
339
  # FIXME: dashboards don't currently include data platform urn prefixes.
327
- if platform_instance:
328
- return f"urn:li:dashboard:({platform},{platform_instance}.{name})"
329
- else:
330
- return f"urn:li:dashboard:({platform},{name})"
340
+ return DashboardUrn.create_from_ids(
341
+ platform=platform,
342
+ name=name,
343
+ platform_instance=platform_instance,
344
+ ).urn()
331
345
 
332
346
 
333
347
  def dashboard_urn_to_key(dashboard_urn: str) -> Optional[DashboardKeyClass]:
@@ -342,10 +356,11 @@ def make_chart_urn(
342
356
  platform: str, name: str, platform_instance: Optional[str] = None
343
357
  ) -> str:
344
358
  # FIXME: charts don't currently include data platform urn prefixes.
345
- if platform_instance:
346
- return f"urn:li:chart:({platform},{platform_instance}.{name})"
347
- else:
348
- return f"urn:li:chart:({platform},{name})"
359
+ return ChartUrn.create_from_ids(
360
+ platform=platform,
361
+ name=name,
362
+ platform_instance=platform_instance,
363
+ ).urn()
349
364
 
350
365
 
351
366
  def chart_urn_to_key(chart_urn: str) -> Optional[ChartKeyClass]:
@@ -36,7 +36,7 @@ from datahub.metadata.schema_classes import (
36
36
  SubTypesClass,
37
37
  TagAssociationClass,
38
38
  )
39
- from datahub.metadata.urns import StructuredPropertyUrn
39
+ from datahub.metadata.urns import ContainerUrn, StructuredPropertyUrn
40
40
 
41
41
  # In https://github.com/datahub-project/datahub/pull/11214, we added a
42
42
  # new env field to container properties. However, populating this field
@@ -87,6 +87,9 @@ class ContainerKey(DatahubKey):
87
87
  def property_dict(self) -> Dict[str, str]:
88
88
  return self.dict(by_alias=True, exclude_none=True)
89
89
 
90
+ def as_urn_typed(self) -> ContainerUrn:
91
+ return ContainerUrn.from_string(self.as_urn())
92
+
90
93
  def as_urn(self) -> str:
91
94
  return make_container_urn(guid=self.guid())
92
95
 
@@ -0,0 +1,145 @@
1
+ import json
2
+ import logging
3
+ from dataclasses import dataclass
4
+ from typing import Dict, List, Optional, Sequence, Union
5
+
6
+ from requests import Response
7
+
8
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
9
+ from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
10
+ MetadataChangeProposal,
11
+ )
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ @dataclass
17
+ class TraceData:
18
+ trace_id: str
19
+ data: Dict[str, List[str]]
20
+
21
+ def __post_init__(self) -> None:
22
+ if not self.trace_id:
23
+ raise ValueError("trace_id cannot be empty")
24
+ if not isinstance(self.data, dict):
25
+ raise TypeError("data must be a dictionary")
26
+
27
+
28
+ def _extract_trace_id(
29
+ response: Response, trace_header: str = "traceparent"
30
+ ) -> Optional[str]:
31
+ """
32
+ Extract trace ID from response headers.
33
+ Args:
34
+ response: HTTP response object
35
+ trace_header: Name of the trace header to use
36
+ Returns:
37
+ Trace ID if found and response is valid, None otherwise
38
+ """
39
+ if not 200 <= response.status_code < 300:
40
+ logger.debug(f"Invalid status code: {response.status_code}")
41
+ return None
42
+
43
+ trace_id = response.headers.get(trace_header)
44
+ if not trace_id:
45
+ logger.debug(f"Missing trace header: {trace_header}")
46
+ return None
47
+
48
+ return trace_id
49
+
50
+
51
+ def extract_trace_data(
52
+ response: Response,
53
+ aspects_to_trace: Optional[List[str]] = None,
54
+ trace_header: str = "traceparent",
55
+ ) -> Optional[TraceData]:
56
+ """
57
+ Extract trace data from a response object.
58
+ Args:
59
+ response: HTTP response object
60
+ aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
61
+ trace_header: Name of the trace header to use (default: "traceparent")
62
+ Returns:
63
+ TraceData object if successful, None otherwise
64
+ Raises:
65
+ JSONDecodeError: If response body cannot be decoded as JSON
66
+ """
67
+ trace_id = _extract_trace_id(response, trace_header)
68
+ if not trace_id:
69
+ return None
70
+
71
+ try:
72
+ json_data = response.json()
73
+ if not isinstance(json_data, list):
74
+ logger.debug("JSON data is not a list")
75
+ return None
76
+
77
+ data: Dict[str, List[str]] = {}
78
+
79
+ for item in json_data:
80
+ urn = item.get("urn")
81
+ if not urn:
82
+ logger.debug(f"Skipping item without URN: {item}")
83
+ continue
84
+
85
+ if aspects_to_trace is None:
86
+ aspect_names = [
87
+ k for k, v in item.items() if k != "urn" and v is not None
88
+ ]
89
+ else:
90
+ aspect_names = [
91
+ field for field in aspects_to_trace if item.get(field) is not None
92
+ ]
93
+
94
+ data[urn] = aspect_names
95
+
96
+ return TraceData(trace_id=trace_id, data=data)
97
+
98
+ except json.JSONDecodeError as e:
99
+ logger.error(f"Failed to decode JSON response: {e}")
100
+ return None
101
+
102
+
103
+ def extract_trace_data_from_mcps(
104
+ response: Response,
105
+ mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
106
+ aspects_to_trace: Optional[List[str]] = None,
107
+ trace_header: str = "traceparent",
108
+ ) -> Optional[TraceData]:
109
+ """
110
+ Extract trace data from a response object and populate data from provided MCPs.
111
+ Args:
112
+ response: HTTP response object used only for trace_id extraction
113
+ mcps: List of MCP URN and aspect data
114
+ aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
115
+ trace_header: Name of the trace header to use (default: "traceparent")
116
+ Returns:
117
+ TraceData object if successful, None otherwise
118
+ """
119
+ trace_id = _extract_trace_id(response, trace_header)
120
+ if not trace_id:
121
+ return None
122
+
123
+ data: Dict[str, List[str]] = {}
124
+ try:
125
+ for mcp in mcps:
126
+ entity_urn = getattr(mcp, "entityUrn", None)
127
+ aspect_name = getattr(mcp, "aspectName", None)
128
+
129
+ if not entity_urn or not aspect_name:
130
+ logger.debug(f"Skipping MCP with missing URN or aspect name: {mcp}")
131
+ continue
132
+
133
+ if aspects_to_trace is not None and aspect_name not in aspects_to_trace:
134
+ continue
135
+
136
+ if entity_urn not in data:
137
+ data[entity_urn] = []
138
+
139
+ data[entity_urn].append(aspect_name)
140
+
141
+ return TraceData(trace_id=trace_id, data=data)
142
+
143
+ except AttributeError as e:
144
+ logger.error(f"Error processing MCPs: {e}")
145
+ return None