acryl-datahub 0.15.0.6rc2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2522 -2493
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +205 -192
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/check_cli.py +72 -19
- datahub/cli/docker_cli.py +3 -3
- datahub/cli/iceberg_cli.py +31 -7
- datahub/cli/ingest_cli.py +30 -93
- datahub/cli/lite_cli.py +4 -2
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/configuration/common.py +10 -2
- datahub/configuration/git.py +1 -3
- datahub/configuration/kafka.py +1 -1
- datahub/emitter/mce_builder.py +28 -13
- datahub/emitter/mcp_builder.py +4 -1
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +323 -10
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/source_helpers.py +4 -0
- datahub/ingestion/fs/s3_fs.py +2 -2
- datahub/ingestion/glossary/classification_mixin.py +1 -5
- datahub/ingestion/graph/client.py +41 -22
- datahub/ingestion/graph/entity_versioning.py +3 -3
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
- datahub/ingestion/run/pipeline.py +112 -148
- datahub/ingestion/run/sink_callback.py +77 -0
- datahub/ingestion/sink/datahub_rest.py +8 -0
- datahub/ingestion/source/abs/config.py +2 -4
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/cassandra/cassandra.py +152 -233
- datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/common/subtypes.py +12 -0
- datahub/ingestion/source/csv_enricher.py +3 -3
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
- datahub/ingestion/source/dbt/dbt_common.py +8 -5
- datahub/ingestion/source/dbt/dbt_core.py +11 -9
- datahub/ingestion/source/dbt/dbt_tests.py +4 -8
- datahub/ingestion/source/delta_lake/config.py +8 -1
- datahub/ingestion/source/delta_lake/report.py +4 -2
- datahub/ingestion/source/delta_lake/source.py +20 -5
- datahub/ingestion/source/dremio/dremio_api.py +4 -8
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
- datahub/ingestion/source/dynamodb/dynamodb.py +6 -0
- datahub/ingestion/source/elastic_search.py +26 -6
- datahub/ingestion/source/feast.py +27 -8
- datahub/ingestion/source/file.py +6 -3
- datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
- datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
- datahub/ingestion/source/ge_data_profiler.py +12 -15
- datahub/ingestion/source/iceberg/iceberg.py +46 -12
- datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
- datahub/ingestion/source/identity/okta.py +37 -7
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -7
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
- datahub/ingestion/source/looker/looker_common.py +6 -5
- datahub/ingestion/source/looker/looker_file_loader.py +2 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
- datahub/ingestion/source/looker/looker_source.py +1 -1
- datahub/ingestion/source/looker/looker_template_language.py +4 -2
- datahub/ingestion/source/looker/lookml_source.py +3 -2
- datahub/ingestion/source/metabase.py +57 -35
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/metadata/lineage.py +2 -2
- datahub/ingestion/source/mlflow.py +365 -35
- datahub/ingestion/source/mode.py +18 -8
- datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
- datahub/ingestion/source/nifi.py +37 -11
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/openapi_parser.py +49 -17
- datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
- datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/pulsar.py +3 -2
- datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
- datahub/ingestion/source/redash.py +31 -7
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +24 -9
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +133 -33
- datahub/ingestion/source/redshift/redshift.py +46 -73
- datahub/ingestion/source/redshift/redshift_schema.py +186 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/s3/config.py +5 -5
- datahub/ingestion/source/s3/source.py +20 -41
- datahub/ingestion/source/salesforce.py +550 -275
- datahub/ingestion/source/schema_inference/object.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/slack/slack.py +31 -10
- datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
- datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
- datahub/ingestion/source/sql/athena.py +10 -16
- datahub/ingestion/source/sql/druid.py +1 -5
- datahub/ingestion/source/sql/hive.py +15 -6
- datahub/ingestion/source/sql/hive_metastore.py +3 -2
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +11 -5
- datahub/ingestion/source/sql/oracle.py +127 -63
- datahub/ingestion/source/sql/sql_common.py +16 -18
- datahub/ingestion/source/sql/sql_types.py +2 -2
- datahub/ingestion/source/sql/teradata.py +19 -5
- datahub/ingestion/source/sql/trino.py +2 -2
- datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
- datahub/ingestion/source/superset.py +222 -62
- datahub/ingestion/source/tableau/tableau.py +22 -6
- datahub/ingestion/source/tableau/tableau_common.py +3 -2
- datahub/ingestion/source/unity/ge_profiler.py +2 -1
- datahub/ingestion/source/unity/source.py +11 -1
- datahub/ingestion/source/vertexai.py +697 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
- datahub/lite/duckdb_lite.py +3 -10
- datahub/lite/lite_local.py +1 -1
- datahub/lite/lite_util.py +4 -3
- datahub/metadata/_schema_classes.py +714 -417
- datahub/metadata/_urns/urn_defs.py +1673 -1649
- datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
- datahub/metadata/schema.avsc +16438 -16603
- datahub/metadata/schemas/AssertionInfo.avsc +3 -1
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
- datahub/metadata/schemas/ChartInfo.avsc +1 -0
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
- datahub/metadata/schemas/IncidentInfo.avsc +130 -46
- datahub/metadata/schemas/InputFields.avsc +3 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +30 -12
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_shared.py +258 -16
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +30 -6
- datahub/sdk/dataset.py +118 -20
- datahub/sdk/{_entity.py → entity.py} +24 -1
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +23 -0
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/_sqlglot_patch.py +2 -10
- datahub/sql_parsing/schema_resolver.py +1 -1
- datahub/sql_parsing/split_statements.py +220 -126
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- datahub/sql_parsing/sqlglot_lineage.py +1 -1
- datahub/sql_parsing/sqlglot_utils.py +1 -4
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- datahub/upgrade/upgrade.py +2 -2
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/ingest_utils.py +106 -0
- datahub/utilities/mapping.py +1 -1
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/sentinels.py +22 -0
- datahub/utilities/unified_diff.py +5 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
datahub/configuration/common.py
CHANGED
|
@@ -20,7 +20,7 @@ from pydantic import BaseModel, Extra, ValidationError
|
|
|
20
20
|
from pydantic.fields import Field
|
|
21
21
|
from typing_extensions import Protocol, Self
|
|
22
22
|
|
|
23
|
-
from datahub.configuration._config_enum import ConfigEnum as ConfigEnum
|
|
23
|
+
from datahub.configuration._config_enum import ConfigEnum as ConfigEnum
|
|
24
24
|
from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
|
|
25
25
|
from datahub.utilities.dedup_list import deduplicate_list
|
|
26
26
|
|
|
@@ -130,7 +130,7 @@ class PermissiveConfigModel(ConfigModel):
|
|
|
130
130
|
# It is usually used for argument bags that are passed through to third-party libraries.
|
|
131
131
|
|
|
132
132
|
class Config:
|
|
133
|
-
if PYDANTIC_VERSION_2:
|
|
133
|
+
if PYDANTIC_VERSION_2: # noqa: SIM108
|
|
134
134
|
extra = "allow"
|
|
135
135
|
else:
|
|
136
136
|
extra = Extra.allow
|
|
@@ -198,6 +198,14 @@ class IgnorableError(MetaError):
|
|
|
198
198
|
"""An error that can be ignored."""
|
|
199
199
|
|
|
200
200
|
|
|
201
|
+
class TraceTimeoutError(OperationalError):
|
|
202
|
+
"""Failure to complete an API Trace within the timeout."""
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class TraceValidationError(OperationalError):
|
|
206
|
+
"""Failure to complete the expected write operation."""
|
|
207
|
+
|
|
208
|
+
|
|
201
209
|
@runtime_checkable
|
|
202
210
|
class ExceptionWithProps(Protocol):
|
|
203
211
|
def get_telemetry_props(self) -> Dict[str, Any]: ...
|
datahub/configuration/git.py
CHANGED
|
@@ -43,9 +43,7 @@ class GitReference(ConfigModel):
|
|
|
43
43
|
|
|
44
44
|
@validator("repo", pre=True)
|
|
45
45
|
def simplify_repo_url(cls, repo: str) -> str:
|
|
46
|
-
if repo.startswith("github.com/"):
|
|
47
|
-
repo = f"https://{repo}"
|
|
48
|
-
elif repo.startswith("gitlab.com"):
|
|
46
|
+
if repo.startswith("github.com/") or repo.startswith("gitlab.com"):
|
|
49
47
|
repo = f"https://{repo}"
|
|
50
48
|
elif repo.count("/") == 1:
|
|
51
49
|
repo = f"https://github.com/{repo}"
|
datahub/configuration/kafka.py
CHANGED
datahub/emitter/mce_builder.py
CHANGED
|
@@ -52,7 +52,15 @@ from datahub.metadata.schema_classes import (
|
|
|
52
52
|
UpstreamLineageClass,
|
|
53
53
|
_Aspect as AspectAbstract,
|
|
54
54
|
)
|
|
55
|
-
from datahub.metadata.urns import
|
|
55
|
+
from datahub.metadata.urns import (
|
|
56
|
+
ChartUrn,
|
|
57
|
+
DashboardUrn,
|
|
58
|
+
DataFlowUrn,
|
|
59
|
+
DataJobUrn,
|
|
60
|
+
DataPlatformUrn,
|
|
61
|
+
DatasetUrn,
|
|
62
|
+
TagUrn,
|
|
63
|
+
)
|
|
56
64
|
from datahub.utilities.urn_encoder import UrnEncoder
|
|
57
65
|
|
|
58
66
|
logger = logging.getLogger(__name__)
|
|
@@ -119,7 +127,7 @@ def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
|
|
|
119
127
|
def make_data_platform_urn(platform: str) -> str:
|
|
120
128
|
if platform.startswith("urn:li:dataPlatform:"):
|
|
121
129
|
return platform
|
|
122
|
-
return
|
|
130
|
+
return DataPlatformUrn.create_from_id(platform).urn()
|
|
123
131
|
|
|
124
132
|
|
|
125
133
|
def make_dataset_urn(platform: str, name: str, env: str = DEFAULT_ENV) -> str:
|
|
@@ -236,7 +244,7 @@ def make_user_urn(username: str) -> str:
|
|
|
236
244
|
Makes a user urn if the input is not a user or group urn already
|
|
237
245
|
"""
|
|
238
246
|
return (
|
|
239
|
-
f"urn:li:corpuser:{username}"
|
|
247
|
+
f"urn:li:corpuser:{UrnEncoder.encode_string(username)}"
|
|
240
248
|
if not username.startswith(("urn:li:corpuser:", "urn:li:corpGroup:"))
|
|
241
249
|
else username
|
|
242
250
|
)
|
|
@@ -249,7 +257,7 @@ def make_group_urn(groupname: str) -> str:
|
|
|
249
257
|
if groupname and groupname.startswith(("urn:li:corpGroup:", "urn:li:corpuser:")):
|
|
250
258
|
return groupname
|
|
251
259
|
else:
|
|
252
|
-
return f"urn:li:corpGroup:{groupname}"
|
|
260
|
+
return f"urn:li:corpGroup:{UrnEncoder.encode_string(groupname)}"
|
|
253
261
|
|
|
254
262
|
|
|
255
263
|
def make_tag_urn(tag: str) -> str:
|
|
@@ -301,7 +309,12 @@ def make_data_flow_urn(
|
|
|
301
309
|
|
|
302
310
|
|
|
303
311
|
def make_data_job_urn_with_flow(flow_urn: str, job_id: str) -> str:
|
|
304
|
-
|
|
312
|
+
data_flow_urn = DataFlowUrn.from_string(flow_urn)
|
|
313
|
+
data_job_urn = DataJobUrn.create_from_ids(
|
|
314
|
+
data_flow_urn=data_flow_urn.urn(),
|
|
315
|
+
job_id=job_id,
|
|
316
|
+
)
|
|
317
|
+
return data_job_urn.urn()
|
|
305
318
|
|
|
306
319
|
|
|
307
320
|
def make_data_process_instance_urn(dataProcessInstanceId: str) -> str:
|
|
@@ -324,10 +337,11 @@ def make_dashboard_urn(
|
|
|
324
337
|
platform: str, name: str, platform_instance: Optional[str] = None
|
|
325
338
|
) -> str:
|
|
326
339
|
# FIXME: dashboards don't currently include data platform urn prefixes.
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
340
|
+
return DashboardUrn.create_from_ids(
|
|
341
|
+
platform=platform,
|
|
342
|
+
name=name,
|
|
343
|
+
platform_instance=platform_instance,
|
|
344
|
+
).urn()
|
|
331
345
|
|
|
332
346
|
|
|
333
347
|
def dashboard_urn_to_key(dashboard_urn: str) -> Optional[DashboardKeyClass]:
|
|
@@ -342,10 +356,11 @@ def make_chart_urn(
|
|
|
342
356
|
platform: str, name: str, platform_instance: Optional[str] = None
|
|
343
357
|
) -> str:
|
|
344
358
|
# FIXME: charts don't currently include data platform urn prefixes.
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
359
|
+
return ChartUrn.create_from_ids(
|
|
360
|
+
platform=platform,
|
|
361
|
+
name=name,
|
|
362
|
+
platform_instance=platform_instance,
|
|
363
|
+
).urn()
|
|
349
364
|
|
|
350
365
|
|
|
351
366
|
def chart_urn_to_key(chart_urn: str) -> Optional[ChartKeyClass]:
|
datahub/emitter/mcp_builder.py
CHANGED
|
@@ -36,7 +36,7 @@ from datahub.metadata.schema_classes import (
|
|
|
36
36
|
SubTypesClass,
|
|
37
37
|
TagAssociationClass,
|
|
38
38
|
)
|
|
39
|
-
from datahub.metadata.urns import StructuredPropertyUrn
|
|
39
|
+
from datahub.metadata.urns import ContainerUrn, StructuredPropertyUrn
|
|
40
40
|
|
|
41
41
|
# In https://github.com/datahub-project/datahub/pull/11214, we added a
|
|
42
42
|
# new env field to container properties. However, populating this field
|
|
@@ -87,6 +87,9 @@ class ContainerKey(DatahubKey):
|
|
|
87
87
|
def property_dict(self) -> Dict[str, str]:
|
|
88
88
|
return self.dict(by_alias=True, exclude_none=True)
|
|
89
89
|
|
|
90
|
+
def as_urn_typed(self) -> ContainerUrn:
|
|
91
|
+
return ContainerUrn.from_string(self.as_urn())
|
|
92
|
+
|
|
90
93
|
def as_urn(self) -> str:
|
|
91
94
|
return make_container_urn(guid=self.guid())
|
|
92
95
|
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Dict, List, Optional, Sequence, Union
|
|
5
|
+
|
|
6
|
+
from requests import Response
|
|
7
|
+
|
|
8
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
9
|
+
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
|
10
|
+
MetadataChangeProposal,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class TraceData:
|
|
18
|
+
trace_id: str
|
|
19
|
+
data: Dict[str, List[str]]
|
|
20
|
+
|
|
21
|
+
def __post_init__(self) -> None:
|
|
22
|
+
if not self.trace_id:
|
|
23
|
+
raise ValueError("trace_id cannot be empty")
|
|
24
|
+
if not isinstance(self.data, dict):
|
|
25
|
+
raise TypeError("data must be a dictionary")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _extract_trace_id(
|
|
29
|
+
response: Response, trace_header: str = "traceparent"
|
|
30
|
+
) -> Optional[str]:
|
|
31
|
+
"""
|
|
32
|
+
Extract trace ID from response headers.
|
|
33
|
+
Args:
|
|
34
|
+
response: HTTP response object
|
|
35
|
+
trace_header: Name of the trace header to use
|
|
36
|
+
Returns:
|
|
37
|
+
Trace ID if found and response is valid, None otherwise
|
|
38
|
+
"""
|
|
39
|
+
if not 200 <= response.status_code < 300:
|
|
40
|
+
logger.debug(f"Invalid status code: {response.status_code}")
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
trace_id = response.headers.get(trace_header)
|
|
44
|
+
if not trace_id:
|
|
45
|
+
logger.debug(f"Missing trace header: {trace_header}")
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
return trace_id
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def extract_trace_data(
|
|
52
|
+
response: Response,
|
|
53
|
+
aspects_to_trace: Optional[List[str]] = None,
|
|
54
|
+
trace_header: str = "traceparent",
|
|
55
|
+
) -> Optional[TraceData]:
|
|
56
|
+
"""
|
|
57
|
+
Extract trace data from a response object.
|
|
58
|
+
Args:
|
|
59
|
+
response: HTTP response object
|
|
60
|
+
aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
|
|
61
|
+
trace_header: Name of the trace header to use (default: "traceparent")
|
|
62
|
+
Returns:
|
|
63
|
+
TraceData object if successful, None otherwise
|
|
64
|
+
Raises:
|
|
65
|
+
JSONDecodeError: If response body cannot be decoded as JSON
|
|
66
|
+
"""
|
|
67
|
+
trace_id = _extract_trace_id(response, trace_header)
|
|
68
|
+
if not trace_id:
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
json_data = response.json()
|
|
73
|
+
if not isinstance(json_data, list):
|
|
74
|
+
logger.debug("JSON data is not a list")
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
data: Dict[str, List[str]] = {}
|
|
78
|
+
|
|
79
|
+
for item in json_data:
|
|
80
|
+
urn = item.get("urn")
|
|
81
|
+
if not urn:
|
|
82
|
+
logger.debug(f"Skipping item without URN: {item}")
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
if aspects_to_trace is None:
|
|
86
|
+
aspect_names = [
|
|
87
|
+
k for k, v in item.items() if k != "urn" and v is not None
|
|
88
|
+
]
|
|
89
|
+
else:
|
|
90
|
+
aspect_names = [
|
|
91
|
+
field for field in aspects_to_trace if item.get(field) is not None
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
data[urn] = aspect_names
|
|
95
|
+
|
|
96
|
+
return TraceData(trace_id=trace_id, data=data)
|
|
97
|
+
|
|
98
|
+
except json.JSONDecodeError as e:
|
|
99
|
+
logger.error(f"Failed to decode JSON response: {e}")
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def extract_trace_data_from_mcps(
|
|
104
|
+
response: Response,
|
|
105
|
+
mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
|
|
106
|
+
aspects_to_trace: Optional[List[str]] = None,
|
|
107
|
+
trace_header: str = "traceparent",
|
|
108
|
+
) -> Optional[TraceData]:
|
|
109
|
+
"""
|
|
110
|
+
Extract trace data from a response object and populate data from provided MCPs.
|
|
111
|
+
Args:
|
|
112
|
+
response: HTTP response object used only for trace_id extraction
|
|
113
|
+
mcps: List of MCP URN and aspect data
|
|
114
|
+
aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
|
|
115
|
+
trace_header: Name of the trace header to use (default: "traceparent")
|
|
116
|
+
Returns:
|
|
117
|
+
TraceData object if successful, None otherwise
|
|
118
|
+
"""
|
|
119
|
+
trace_id = _extract_trace_id(response, trace_header)
|
|
120
|
+
if not trace_id:
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
data: Dict[str, List[str]] = {}
|
|
124
|
+
try:
|
|
125
|
+
for mcp in mcps:
|
|
126
|
+
entity_urn = getattr(mcp, "entityUrn", None)
|
|
127
|
+
aspect_name = getattr(mcp, "aspectName", None)
|
|
128
|
+
|
|
129
|
+
if not entity_urn or not aspect_name:
|
|
130
|
+
logger.debug(f"Skipping MCP with missing URN or aspect name: {mcp}")
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
if aspects_to_trace is not None and aspect_name not in aspects_to_trace:
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
if entity_urn not in data:
|
|
137
|
+
data[entity_urn] = []
|
|
138
|
+
|
|
139
|
+
data[entity_urn].append(aspect_name)
|
|
140
|
+
|
|
141
|
+
return TraceData(trace_id=trace_id, data=data)
|
|
142
|
+
|
|
143
|
+
except AttributeError as e:
|
|
144
|
+
logger.error(f"Error processing MCPs: {e}")
|
|
145
|
+
return None
|