acryl-datahub 1.0.0.1rc4__py3-none-any.whl → 1.0.0.1rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,9 +1,9 @@
1
- acryl_datahub-1.0.0.1rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.0.0.1rc5.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=C5PxZfTY1_MHATsJ5uiJ0n1KBC0rumbfeq67GwRBzYQ,323
4
+ datahub/_version.py,sha256=WcqnUOhppm3OjJxRxReX0PiJpy4Wv0kAAnaBQ0m4FCw,323
5
5
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
6
- datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
6
+ datahub/errors.py,sha256=bwtiNzFdVFze0IVKDEXQutkwk5j7cZkfXCUYCZIDSYg,565
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
@@ -120,13 +120,13 @@ datahub/emitter/composite_emitter.py,sha256=ZU-IdlAXKGPtmyT0JJgYC09vRn-TmeNaA6VP
120
120
  datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
121
121
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
122
122
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
123
- datahub/emitter/mce_builder.py,sha256=8UiG2VsYgC7n29h_y4qL6F9faGwwMZF3zGscl_CBT9s,16808
123
+ datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
124
124
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
125
125
  datahub/emitter/mcp_builder.py,sha256=Q1bX2BthNvZ7ae71XYF6ICoiN8IOqaAd_h3zOct57Q0,11752
126
126
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
127
127
  datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
128
- datahub/emitter/response_helper.py,sha256=lRMvzF-RPHNkN_ONl-N2uJjKh5XtRFrofrdGibVGn2U,4509
129
- datahub/emitter/rest_emitter.py,sha256=4l3_vnOmS8GKTj_HUejg5gJb28QCK0XH_nPVPE5AAp4,29841
128
+ datahub/emitter/response_helper.py,sha256=h2hrZYiv4xfauD_lHPW_fN_AV8KhWNM4CVd-Lat2vT0,4608
129
+ datahub/emitter/rest_emitter.py,sha256=NhA-4LnLxAHVz1cLiYqFnfX2YZTsCLdOBaFUtqqJpPs,30197
130
130
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
131
131
  datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
132
132
  datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
@@ -390,7 +390,7 @@ datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9y
390
390
  datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
391
391
  datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=4Kr7cLXpsWGtg-M18aXyhij9k9Ll5dGv3EaCS3d2DRk,8590
392
392
  datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=FHBFSkf5tf8_o5Sjfuvo1pLVTlkSyxI5HpI8ZthPuhE,38569
393
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=NrhgwREmkWTvlhpEs7dAEEJfOxQRalA02ArKr2LLjeY,27666
393
+ datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=wFAcldAYpDwW-9ll50LB7ac938-tn4AMnTJSNDoOO6Y,27651
394
394
  datahub/ingestion/source/powerbi/rest_api_wrapper/profiling_utils.py,sha256=bgcPheyqOj6KdRjDyANDK5yggItglcBIjbGFIwAxSds,1392
395
395
  datahub/ingestion/source/powerbi/rest_api_wrapper/query.py,sha256=VNw1Uvli6g0pnu9FpigYmnCdEPbVEipz7vdZU_WmHf4,616
396
396
  datahub/ingestion/source/powerbi_report_server/__init__.py,sha256=N9fGcrHXBbuPmx9rpGjd_jkMC3smXmfiwISDP1QZapk,324
@@ -410,7 +410,7 @@ datahub/ingestion/source/redshift/config.py,sha256=l_hlgsCjvlcgcFQpd5WMKlW8nqQUh
410
410
  datahub/ingestion/source/redshift/datashares.py,sha256=kH3YkoenOa59XZU12XeUf283lOOAITYD9jOXpy8R06E,9227
411
411
  datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX-ar4jhOXPXAlEIvgM,2055
412
412
  datahub/ingestion/source/redshift/lineage.py,sha256=IPF8vHy2MFyhK-hu2-lxV2-kcnNAEzltPLnnIvwIBMY,44100
413
- datahub/ingestion/source/redshift/lineage_v2.py,sha256=vQ2LBa04hqYqIRK0CP3VDYRlvMLAqodzdieDl6LipiQ,17909
413
+ datahub/ingestion/source/redshift/lineage_v2.py,sha256=dbTvuaJBV5yvCWM_oEAqZIA1JOlGxLJOexbEB47A_xE,17962
414
414
  datahub/ingestion/source/redshift/profile.py,sha256=dq7m9YG3TvEMbplwVIutUpzbXLPH8KIj9SuWNo7PWWE,4323
415
415
  datahub/ingestion/source/redshift/query.py,sha256=vVIuNUaU4a7AfMFJZlgLuqi0cGVl0gVz8xZUSnPhWvs,47845
416
416
  datahub/ingestion/source/redshift/redshift.py,sha256=whMujnJxwNT2ZXnOVRrZQiy317hlsvbARzabKmI3oN8,43536
@@ -1043,8 +1043,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1043
1043
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1044
1044
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1045
1045
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1046
- acryl_datahub-1.0.0.1rc4.dist-info/METADATA,sha256=0QZSNfWv2u7u7GcupcTXvYmmBOqeB7vfGNKHUyQEoNs,176849
1047
- acryl_datahub-1.0.0.1rc4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1048
- acryl_datahub-1.0.0.1rc4.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1049
- acryl_datahub-1.0.0.1rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1050
- acryl_datahub-1.0.0.1rc4.dist-info/RECORD,,
1046
+ acryl_datahub-1.0.0.1rc5.dist-info/METADATA,sha256=tMnH_4TdNIZMpke-1KadBIdM0nx_sJtliYXrZJXUkbs,176849
1047
+ acryl_datahub-1.0.0.1rc5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1048
+ acryl_datahub-1.0.0.1rc5.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1049
+ acryl_datahub-1.0.0.1rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1050
+ acryl_datahub-1.0.0.1rc5.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.1rc4"
3
+ __version__ = "1.0.0.1rc5"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -125,9 +125,7 @@ def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
125
125
 
126
126
 
127
127
  def make_data_platform_urn(platform: str) -> str:
128
- if platform.startswith("urn:li:dataPlatform:"):
129
- return platform
130
- return DataPlatformUrn.create_from_id(platform).urn()
128
+ return DataPlatformUrn(platform).urn()
131
129
 
132
130
 
133
131
  def make_dataset_urn(platform: str, name: str, env: str = DEFAULT_ENV) -> str:
@@ -1,17 +1,21 @@
1
1
  import json
2
2
  import logging
3
+ import warnings
3
4
  from dataclasses import dataclass
4
5
  from typing import Dict, List, Optional, Sequence, Union
5
6
 
6
7
  from requests import Response
7
8
 
8
9
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
10
+ from datahub.errors import APITracingWarning
9
11
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
10
12
  MetadataChangeProposal,
11
13
  )
12
14
 
13
15
  logger = logging.getLogger(__name__)
14
16
 
17
+ _TRACE_HEADER_NAME = "traceparent"
18
+
15
19
 
16
20
  @dataclass
17
21
  class TraceData:
@@ -25,14 +29,11 @@ class TraceData:
25
29
  raise TypeError("data must be a dictionary")
26
30
 
27
31
 
28
- def _extract_trace_id(
29
- response: Response, trace_header: str = "traceparent"
30
- ) -> Optional[str]:
32
+ def _extract_trace_id(response: Response) -> Optional[str]:
31
33
  """
32
34
  Extract trace ID from response headers.
33
35
  Args:
34
36
  response: HTTP response object
35
- trace_header: Name of the trace header to use
36
37
  Returns:
37
38
  Trace ID if found and response is valid, None otherwise
38
39
  """
@@ -40,9 +41,17 @@ def _extract_trace_id(
40
41
  logger.debug(f"Invalid status code: {response.status_code}")
41
42
  return None
42
43
 
43
- trace_id = response.headers.get(trace_header)
44
+ trace_id = response.headers.get(_TRACE_HEADER_NAME)
44
45
  if not trace_id:
45
- logger.debug(f"Missing trace header: {trace_header}")
46
+ # This will only be printed if
47
+ # 1. we're in async mode (checked by the caller)
48
+ # 2. the server did not return a trace ID
49
+ logger.debug(f"Missing trace header: {_TRACE_HEADER_NAME}")
50
+ warnings.warn(
51
+ "No trace ID found in response headers. API tracing is not active - likely due to an outdated server version.",
52
+ APITracingWarning,
53
+ stacklevel=3,
54
+ )
46
55
  return None
47
56
 
48
57
  return trace_id
@@ -51,20 +60,19 @@ def _extract_trace_id(
51
60
  def extract_trace_data(
52
61
  response: Response,
53
62
  aspects_to_trace: Optional[List[str]] = None,
54
- trace_header: str = "traceparent",
55
63
  ) -> Optional[TraceData]:
56
- """
57
- Extract trace data from a response object.
64
+ """Extract trace data from a response object.
65
+
66
+ If we run into a JSONDecodeError, we'll log an error and return None.
67
+
58
68
  Args:
59
69
  response: HTTP response object
60
70
  aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
61
- trace_header: Name of the trace header to use (default: "traceparent")
71
+
62
72
  Returns:
63
73
  TraceData object if successful, None otherwise
64
- Raises:
65
- JSONDecodeError: If response body cannot be decoded as JSON
66
74
  """
67
- trace_id = _extract_trace_id(response, trace_header)
75
+ trace_id = _extract_trace_id(response)
68
76
  if not trace_id:
69
77
  return None
70
78
 
@@ -104,19 +112,18 @@ def extract_trace_data_from_mcps(
104
112
  response: Response,
105
113
  mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
106
114
  aspects_to_trace: Optional[List[str]] = None,
107
- trace_header: str = "traceparent",
108
115
  ) -> Optional[TraceData]:
109
- """
110
- Extract trace data from a response object and populate data from provided MCPs.
116
+ """Extract trace data from a response object and populate data from provided MCPs.
117
+
111
118
  Args:
112
119
  response: HTTP response object used only for trace_id extraction
113
120
  mcps: List of MCP URN and aspect data
114
121
  aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
115
- trace_header: Name of the trace header to use (default: "traceparent")
122
+
116
123
  Returns:
117
124
  TraceData object if successful, None otherwise
118
125
  """
119
- trace_id = _extract_trace_id(response, trace_header)
126
+ trace_id = _extract_trace_id(response)
120
127
  if not trace_id:
121
128
  return None
122
129
 
@@ -5,6 +5,7 @@ import json
5
5
  import logging
6
6
  import os
7
7
  import time
8
+ import warnings
8
9
  from collections import defaultdict
9
10
  from dataclasses import dataclass
10
11
  from datetime import datetime, timedelta
@@ -50,6 +51,7 @@ from datahub.emitter.response_helper import (
50
51
  extract_trace_data_from_mcps,
51
52
  )
52
53
  from datahub.emitter.serialization_helper import pre_json_transform
54
+ from datahub.errors import APITracingWarning
53
55
  from datahub.ingestion.api.closeable import Closeable
54
56
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
55
57
  MetadataChangeEvent,
@@ -749,6 +751,12 @@ class DataHubRestEmitter(Closeable, Emitter):
749
751
  trace_flag if trace_flag is not None else self._default_trace_mode
750
752
  )
751
753
  resolved_async_flag = async_flag if async_flag is not None else async_default
754
+ if resolved_trace_flag and not resolved_async_flag:
755
+ warnings.warn(
756
+ "API tracing is only available with async ingestion. For sync mode, API errors will be surfaced as exceptions.",
757
+ APITracingWarning,
758
+ stacklevel=3,
759
+ )
752
760
  return resolved_trace_flag and resolved_async_flag
753
761
 
754
762
  def __repr__(self) -> str:
datahub/errors.py CHANGED
@@ -33,3 +33,7 @@ class MultipleSubtypesWarning(Warning):
33
33
 
34
34
  class ExperimentalWarning(Warning):
35
35
  pass
36
+
37
+
38
+ class APITracingWarning(Warning):
39
+ pass
@@ -115,7 +115,7 @@ class PowerBiAPI:
115
115
  if scan_result is None:
116
116
  return results
117
117
 
118
- for scanned_dashboard in scan_result.get(Constant.DASHBOARDS, []):
118
+ for scanned_dashboard in scan_result.get(Constant.DASHBOARDS) or []:
119
119
  # Iterate through response and create a list of PowerBiAPI.Dashboard
120
120
  dashboard_id = scanned_dashboard.get("id")
121
121
  tags = self._parse_endorsement(
@@ -133,17 +133,17 @@ class PowerBiAPI:
133
133
  if scan_result is None:
134
134
  return results
135
135
 
136
- reports: List[dict] = scan_result.get(Constant.REPORTS, [])
136
+ reports: List[dict] = scan_result.get(Constant.REPORTS) or []
137
137
 
138
138
  for report in reports:
139
- report_id = report.get(Constant.ID, None)
139
+ report_id = report.get(Constant.ID)
140
140
  if report_id is None:
141
141
  logger.warning(
142
142
  f"Report id is none. Skipping endorsement tag for report instance {report}"
143
143
  )
144
144
  continue
145
145
  endorsements = self._parse_endorsement(
146
- report.get(Constant.ENDORSEMENT_DETAIL, None)
146
+ report.get(Constant.ENDORSEMENT_DETAIL)
147
147
  )
148
148
  results[report_id] = endorsements
149
149
 
@@ -339,7 +339,7 @@ class PowerBiAPI:
339
339
  if not endorsements:
340
340
  return []
341
341
 
342
- endorsement = endorsements.get(Constant.ENDORSEMENT, None)
342
+ endorsement = endorsements.get(Constant.ENDORSEMENT)
343
343
  if not endorsement:
344
344
  return []
345
345
 
@@ -396,7 +396,7 @@ class PowerBiAPI:
396
396
 
397
397
  if self.__config.extract_endorsements_to_tags:
398
398
  dataset_instance.tags = self._parse_endorsement(
399
- dataset_dict.get(Constant.ENDORSEMENT_DETAIL, None)
399
+ dataset_dict.get(Constant.ENDORSEMENT_DETAIL)
400
400
  )
401
401
 
402
402
  dataset_map[dataset_instance.id] = dataset_instance
@@ -407,7 +407,7 @@ class PowerBiAPI:
407
407
  else dataset_instance.id
408
408
  )
409
409
  logger.debug(f"dataset_dict = {dataset_dict}")
410
- for table in dataset_dict.get(Constant.TABLES, []):
410
+ for table in dataset_dict.get(Constant.TABLES) or []:
411
411
  expression: Optional[str] = (
412
412
  table[Constant.SOURCE][0][Constant.EXPRESSION]
413
413
  if table.get(Constant.SOURCE) is not None
@@ -430,10 +430,10 @@ class PowerBiAPI:
430
430
  column["dataType"], FIELD_TYPE_MAPPING["Null"]
431
431
  ),
432
432
  )
433
- for column in table.get("columns", [])
433
+ for column in table.get("columns") or []
434
434
  ],
435
435
  measures=[
436
- Measure(**measure) for measure in table.get("measures", [])
436
+ Measure(**measure) for measure in table.get("measures") or []
437
437
  ],
438
438
  dataset=dataset_instance,
439
439
  row_count=None,
@@ -480,7 +480,7 @@ class PowerBiAPI:
480
480
  )
481
481
  )
482
482
  if app_id is None: # In PowerBI one workspace can have one app
483
- app_id = report.get(Constant.APP_ID)
483
+ app_id = report[Constant.APP_ID]
484
484
 
485
485
  raw_app_dashboards: List[Dict] = []
486
486
  # Filter app dashboards
@@ -488,7 +488,7 @@ class PowerBiAPI:
488
488
  if dashboard.get(Constant.APP_ID):
489
489
  raw_app_dashboards.append(dashboard)
490
490
  if app_id is None: # In PowerBI, one workspace contains one app
491
- app_id = report[Constant.APP_ID]
491
+ app_id = dashboard[Constant.APP_ID]
492
492
 
493
493
  # workspace doesn't have an App. Above two loops can be avoided
494
494
  # if app_id is available at root level in workspace_metadata
@@ -230,7 +230,8 @@ class RedshiftSqlLineageV2(Closeable):
230
230
  )
231
231
 
232
232
  # Populate lineage for external tables.
233
- self._process_external_tables(all_tables=all_tables, db_schemas=db_schemas)
233
+ if not self.config.skip_external_tables:
234
+ self._process_external_tables(all_tables=all_tables, db_schemas=db_schemas)
234
235
 
235
236
  def _populate_lineage_agg(
236
237
  self,