acryl-datahub 1.1.0.2rc1__py3-none-any.whl → 1.1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.1.0.2rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.1.0.3.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=xviXMZWidai-tecst_TvEVpDRvpRUSKWgx_4dTs7IJ8,323
4
+ datahub/_version.py,sha256=9kogO5B-EbMUWyRkwYF6whUIjuWLMLaXo3YwVAhKvM4,320
5
5
  datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -215,7 +215,7 @@ datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoE
215
215
  datahub/ingestion/source/mlflow.py,sha256=fh7izN9jlSwbpGIrEyJktlmwFZR5vNG9z9L5VQ31k_4,33141
216
216
  datahub/ingestion/source/mode.py,sha256=g3nhkpW5KS_w3a8JaKWoq3XBNOZKFlmxZq9XI2D5dXY,72161
217
217
  datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
218
- datahub/ingestion/source/nifi.py,sha256=D1gBXxdpLuUQ0eurwofIR_SGg1rHGhwk3qxsWI1PT9c,56882
218
+ datahub/ingestion/source/nifi.py,sha256=2jxSzuHIRQFF7DLoceGbUd_10lkhHRlqA9hhApSt3Yw,56882
219
219
  datahub/ingestion/source/openapi.py,sha256=VaR2xYaH1IhvRixpTBC7-168F74eIIyKiEKb5EqTO64,19253
220
220
  datahub/ingestion/source/openapi_parser.py,sha256=T87e2r-oPGgQl_FDMHnSGFZzApvWDCyKWnzIrVI5Alo,15420
221
221
  datahub/ingestion/source/preset.py,sha256=bbh0ZWiAZMy2zuJDmaRY07_OuGJ9tdtKjwvIxqbY5II,3964
@@ -224,7 +224,7 @@ datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99Wd
224
224
  datahub/ingestion/source/salesforce.py,sha256=CQtDFv1OsbC1vyzNbKOc6GxhFQ5GdYj45hgAF0-oIcw,40487
225
225
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
226
226
  datahub/ingestion/source/sql_queries.py,sha256=Ip7UZub7fgMh7P5jL_zJPY7lSkc9GGTy8GJ8lqZrcsE,9502
227
- datahub/ingestion/source/superset.py,sha256=acxKU8XkaCNvhcv0CwU27_dYTdV5iR45BPcc83SR_T0,48380
227
+ datahub/ingestion/source/superset.py,sha256=dSXbsPj4_BY9O6esrJRt5WYcHj7QWoBk7PTfIFxS_Zw,48387
228
228
  datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
229
229
  datahub/ingestion/source/abs/config.py,sha256=mBQe0JTaP-Rcv4HnMUUySoYbSr4r3jDEMioxaXHnxXU,6709
230
230
  datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4VK8QAjzBiJFu85tOGMmJ0lJZ2Og,3600
@@ -263,7 +263,7 @@ datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256
263
263
  datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=Rph96stSOzMQ_b4iRBRhGkpRzsMe8ddF5n4yGeZ7StQ,3369
264
264
  datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=v7_zkZzymKPmZKWAxnxmvmHC-8TQVGHUT-pBQFNehqc,7962
265
265
  datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=DHw5Z_rxj_fR09p7SO0UmDdvYEa_ViIRnLE9CFxPAAk,32525
266
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=Fq9tAVSvYepwweiZuJB5mbT0Y6EzFOEZWtdL4Zafd4E,50999
266
+ datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=29E_25jLpMIgWcYRC0ZcYd1fvaFtSi2T8S6hSwiTDTY,51090
267
267
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
268
268
  datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
269
269
  datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
@@ -300,6 +300,8 @@ datahub/ingestion/source/dbt/dbt_cloud.py,sha256=_17ZX6WDzg3lKo0J5XukiaZ8AiJVFsg
300
300
  datahub/ingestion/source/dbt/dbt_common.py,sha256=Hgdu6yisAOQ4mn98GjhnhTreUJffskMKD-tvYDMQ0bQ,82045
301
301
  datahub/ingestion/source/dbt/dbt_core.py,sha256=qtfNQk28r4_hkf5sIkjfWfrvZbW8Q0NIFPi67NpPeB4,24824
302
302
  datahub/ingestion/source/dbt/dbt_tests.py,sha256=pOZJaP4VsbaE5j4qVlE_E3ifno_KQpidfGTvOi5fr6I,9839
303
+ datahub/ingestion/source/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
304
+ datahub/ingestion/source/debug/datahub_debug.py,sha256=LJmGWi2nqWiR_14nybqkIo14D4IzoJ08GaAtlcGvv98,11962
303
305
  datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
304
306
  datahub/ingestion/source/delta_lake/config.py,sha256=km8WCmjjyRrMy9kJ7JxZZIdS1pKIhKznWQGMYUsF_4s,3522
305
307
  datahub/ingestion/source/delta_lake/delta_lake_utils.py,sha256=VqIDPEXepOnlk4oWMeRaneSpQBlWmlCKAa1wGUl1sfk,1525
@@ -924,12 +926,12 @@ datahub/sdk/chart.py,sha256=9HgZU3yTmKFJCocbSWTbwW8ROfL7h4UbK_B7pHEG7n0,11102
924
926
  datahub/sdk/container.py,sha256=IjnFVGDpSFDvgHuuMb7C3VdBxhJuIMq0q6crOs5PupE,7899
925
927
  datahub/sdk/dashboard.py,sha256=kt8vD-DzoHLDOwk4Ik8ZoIKC0tPvgxX8rU9CMobs37o,15539
926
928
  datahub/sdk/dataflow.py,sha256=gdAPVVkyKvsKtsa1AwhN_LpzidG_XzV3nhtd1cjnzDA,11128
927
- datahub/sdk/datajob.py,sha256=SxADdQQqD1czrrpKVYcKMllu4UTpZ0i7pDQ7YuqTh5w,12541
929
+ datahub/sdk/datajob.py,sha256=RaQ3GxtZb8LREWMMJuZJgqC4Dk3tBK9GcsszwsSDxYo,12582
928
930
  datahub/sdk/dataset.py,sha256=2-iD-HqjgFpCBmU3k8O5tkltmwFj4jaumADiX8sexJU,29465
929
931
  datahub/sdk/entity.py,sha256=Q29AbpS58L4gD8ETwoNIwG-ouytz4c0MSSFi6-jLl_4,6742
930
932
  datahub/sdk/entity_client.py,sha256=xHLGLn6oJfxmHLXo5w4-isPFZGcf8yR5IwyC6pvl_I8,8903
931
- datahub/sdk/lineage_client.py,sha256=u4bzs6xpdzWhZGx-V0rj1tvQxesVSEuxvxjRQRhZMSg,33822
932
- datahub/sdk/main_client.py,sha256=nB9CsQxq7fgW7zXtDwSgu51V2OGT12O1zaYZv9oR2t0,5137
933
+ datahub/sdk/lineage_client.py,sha256=M4fN4p8YHpM7zoQ7RCCekV8oPgw8QsWxzMAA8mPoy20,33822
934
+ datahub/sdk/main_client.py,sha256=kJRmydyyaOjUdqOljl0w_Cx7BKkViZmnX-1vZBHY4nw,5266
933
935
  datahub/sdk/mlmodel.py,sha256=cO5R8BYVljmQ0w33RIOuZmj4nq8OJCDVAZGTQI6YFS8,12628
934
936
  datahub/sdk/mlmodelgroup.py,sha256=wlZZHny0UORpF0fRYuVkWLSQwIHX_fWl5lPb1NKR6dM,8194
935
937
  datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
@@ -941,7 +943,7 @@ datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG
941
943
  datahub/secret/secret_common.py,sha256=g4anQtYPm7cI6kEJUZHjpBqeCyiUKIim2rJQByaeOoY,2864
942
944
  datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
943
945
  datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
944
- datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,6684
946
+ datahub/specific/chart.py,sha256=EUIRzcYNTs6tXoxVwYjJfD74Jw80kAjXyySjjEoTfw4,6685
945
947
  datahub/specific/dashboard.py,sha256=3AsXZ1Cp03uaTHsOmJqEiXzJjZUBgDbX-zmgwMw908o,11514
946
948
  datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
947
949
  datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
@@ -965,7 +967,7 @@ datahub/sql_parsing/split_statements.py,sha256=OIQXA9e4k3G9Z1y7rbgdtZhMWt4FPnq41
965
967
  datahub/sql_parsing/sql_parsing_aggregator.py,sha256=tqFZsE-7owUiU0q49nmkTt50CU4vn8ffUbNcTv9nRbc,71431
966
968
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
967
969
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
968
- datahub/sql_parsing/sqlglot_lineage.py,sha256=6tuVv64MPO4i2VsmO9pjvP5IBWLEGollT3Ayubj6MU4,58668
970
+ datahub/sql_parsing/sqlglot_lineage.py,sha256=I6c8d4fDje_hi-YfT83E2OMwFJRcuJhzP2vQnKXZAEw,59142
969
971
  datahub/sql_parsing/sqlglot_utils.py,sha256=TI11oBu1wrGeUuUGBg7hGTr6lTvztahdqiqXNJYRfbQ,14823
970
972
  datahub/sql_parsing/tool_meta_extractor.py,sha256=EV_g7sOchTSUm2p6wluNJqND7-rDYokVTqqFCM7hQ6c,7599
971
973
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -1073,8 +1075,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1073
1075
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1074
1076
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1075
1077
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1076
- acryl_datahub-1.1.0.2rc1.dist-info/METADATA,sha256=SztUJwVSty1c3J3bokL--2m9i0SyP1Q_ayGeXT0kMCA,180763
1077
- acryl_datahub-1.1.0.2rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1078
- acryl_datahub-1.1.0.2rc1.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1079
- acryl_datahub-1.1.0.2rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1080
- acryl_datahub-1.1.0.2rc1.dist-info/RECORD,,
1078
+ acryl_datahub-1.1.0.3.dist-info/METADATA,sha256=B3NohLgp5hPbkKh-Bi9Z8Xc9GETUeV_zj5B18rymlFw,182338
1079
+ acryl_datahub-1.1.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1080
+ acryl_datahub-1.1.0.3.dist-info/entry_points.txt,sha256=-N2PGtn1uwKR7-VM9spziE_RNyOdKm_XNpOWL1lnaj4,9790
1081
+ acryl_datahub-1.1.0.3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1082
+ acryl_datahub-1.1.0.3.dist-info/RECORD,,
@@ -36,6 +36,7 @@ csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource
36
36
  datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource
37
37
  datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource
38
38
  datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource
39
+ datahub-debug = datahub.ingestion.source.debug.datahub_debug:DataHubDebugSource
39
40
  datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
40
41
  datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
41
42
  dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.1.0.2rc1"
3
+ __version__ = "1.1.0.3"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -286,6 +286,7 @@ class BigQuerySchemaGenerator:
286
286
  yield from gen_database_container(
287
287
  database=database,
288
288
  name=database,
289
+ qualified_name=database,
289
290
  sub_types=[DatasetContainerSubTypes.BIGQUERY_PROJECT],
290
291
  domain_registry=self.domain_registry,
291
292
  domain_config=self.config.domain,
@@ -332,6 +333,7 @@ class BigQuerySchemaGenerator:
332
333
  yield from gen_schema_container(
333
334
  database=project_id,
334
335
  schema=dataset,
336
+ qualified_name=f"{project_id}.{dataset}",
335
337
  sub_types=[DatasetContainerSubTypes.BIGQUERY_DATASET],
336
338
  domain_registry=self.domain_registry,
337
339
  domain_config=self.config.domain,
File without changes
@@ -0,0 +1,300 @@
1
+ import logging
2
+ import socket
3
+ import time
4
+ from typing import Iterable, Optional
5
+ from urllib.parse import urlparse
6
+
7
+ import dns.exception
8
+ import dns.resolver
9
+ import requests
10
+
11
+ from datahub.configuration.common import ConfigModel
12
+ from datahub.ingestion.api.common import PipelineContext
13
+ from datahub.ingestion.api.decorators import (
14
+ SupportStatus,
15
+ config_class,
16
+ platform_name,
17
+ support_status,
18
+ )
19
+ from datahub.ingestion.api.source import Source, SourceReport
20
+ from datahub.ingestion.api.workunit import MetadataWorkUnit
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class DataHubDebugSourceConfig(ConfigModel):
26
+ dns_probe_url: Optional[str] = None
27
+
28
+
29
+ @platform_name("DataHubDebug")
30
+ @config_class(DataHubDebugSourceConfig)
31
+ @support_status(SupportStatus.TESTING)
32
+ class DataHubDebugSource(Source):
33
+ """
34
+ DataHubDebugSource is helper to debug things in executor where ingestion is running.
35
+
36
+ This source can perform the following tasks:
37
+ 1. Network probe of a URL. Different from test connection in sources as that is after source starts.
38
+
39
+ """
40
+
41
+ def __init__(self, ctx: PipelineContext, config: DataHubDebugSourceConfig):
42
+ self.ctx = ctx
43
+ self.config = config
44
+ self.report = SourceReport()
45
+ self.report.event_not_produced_warn = False
46
+
47
+ @classmethod
48
+ def create(cls, config_dict, ctx):
49
+ config = DataHubDebugSourceConfig.parse_obj(config_dict)
50
+ return cls(ctx, config)
51
+
52
+ def perform_dns_probe(self, url: str) -> None:
53
+ """
54
+ Perform comprehensive DNS probe and network connectivity tests.
55
+ Logs detailed information to help diagnose network issues.
56
+ """
57
+ logger.info(f"Starting DNS probe for URL: {url}")
58
+ logger.info("=" * 60)
59
+ logger.info(f"DNS PROBE REPORT FOR: {url}")
60
+ logger.info("=" * 60)
61
+
62
+ try:
63
+ # Parse the URL to extract hostname
64
+ parsed_url = urlparse(
65
+ url if url.startswith(("http://", "https://")) else f"http://{url}"
66
+ )
67
+ hostname = parsed_url.hostname or parsed_url.netloc
68
+ port = parsed_url.port or (443 if parsed_url.scheme == "https" else 80)
69
+
70
+ logger.info(f"Parsed hostname: {hostname}")
71
+ logger.info(f"Target port: {port}")
72
+ logger.info(f"URL scheme: {parsed_url.scheme}")
73
+ logger.info("-" * 60)
74
+
75
+ # Test 1: Enhanced DNS resolution with dnspython if available
76
+ logger.info("1. DNS RESOLUTION TEST")
77
+ self._dns_probe_with_dnspython(hostname)
78
+
79
+ logger.info("-" * 60)
80
+
81
+ # Test 2: HTTP/HTTPS connectivity test with requests if available
82
+ logger.info("2. HTTP CONNECTIVITY TEST")
83
+ self._http_probe_with_requests(url)
84
+
85
+ logger.info("-" * 60)
86
+
87
+ # Test 3: System network information
88
+ logger.info("3. SYSTEM NETWORK INFORMATION")
89
+ self._log_system_network_info()
90
+
91
+ except Exception as e:
92
+ logger.error(f"DNS probe failed with unexpected error: {e}", exc_info=True)
93
+
94
+ logger.info("=" * 60)
95
+ logger.info("DNS PROBE COMPLETED")
96
+ logger.info("=" * 60)
97
+
98
+ def _dns_probe_with_dnspython(self, hostname: str) -> None:
99
+ """Enhanced DNS probing using dnspython library"""
100
+ try:
101
+ # Test different record types
102
+ record_types = ["A", "AAAA", "CNAME", "MX"]
103
+
104
+ for record_type in record_types:
105
+ try:
106
+ start_time = time.time()
107
+ answers = dns.resolver.resolve(hostname, record_type)
108
+ dns_time = time.time() - start_time
109
+
110
+ logger.info(
111
+ f"✓ {record_type} record resolution successful ({dns_time:.3f}s)"
112
+ )
113
+ for answer in answers:
114
+ logger.info(f" - {record_type}: {answer}")
115
+
116
+ except dns.resolver.NXDOMAIN:
117
+ logger.info(f"✗ {record_type} record: Domain does not exist")
118
+ except dns.resolver.NoAnswer:
119
+ logger.info(
120
+ f"- {record_type} record: No answer (record type not available)"
121
+ )
122
+ except dns.exception.Timeout:
123
+ logger.error(f"✗ {record_type} record: DNS query timed out")
124
+ except Exception as e:
125
+ logger.error(f"✗ {record_type} record query failed: {e}")
126
+
127
+ # Test different DNS servers
128
+ logger.info("Testing with different DNS servers:")
129
+ dns_servers = ["8.8.8.8", "1.1.1.1", "208.67.222.222"]
130
+
131
+ for dns_server in dns_servers:
132
+ try:
133
+ resolver = dns.resolver.Resolver()
134
+ resolver.nameservers = [dns_server]
135
+ resolver.timeout = 5
136
+
137
+ start_time = time.time()
138
+ answers = resolver.resolve(hostname, "A")
139
+ dns_time = time.time() - start_time
140
+
141
+ logger.info(
142
+ f"✓ DNS server {dns_server} responded ({dns_time:.3f}s)"
143
+ )
144
+ for answer in answers:
145
+ logger.info(f" - A: {answer}")
146
+
147
+ except Exception as e:
148
+ logger.error(f"✗ DNS server {dns_server} failed: {e}")
149
+
150
+ except Exception as e:
151
+ logger.error(f"Enhanced DNS probe failed: {e}", exc_info=True)
152
+
153
+ def _http_probe_with_requests(self, url: str) -> None:
154
+ """HTTP connectivity test using requests library"""
155
+ try:
156
+ # Test with different timeouts and methods
157
+ timeout = 10
158
+ allow_redirects_head = True
159
+ allow_redirects_get = False
160
+
161
+ # Test HEAD request
162
+ try:
163
+ logger.info(f"Testing HEAD request with timeout {timeout}s")
164
+ start_time = time.time()
165
+
166
+ response = requests.head(
167
+ url, timeout=timeout, allow_redirects=allow_redirects_head
168
+ )
169
+
170
+ request_time = time.time() - start_time
171
+
172
+ logger.info(f"✓ HEAD request successful ({request_time:.3f}s)")
173
+ logger.info(f" Status code: {response.status_code}")
174
+ logger.info(
175
+ f" Response headers: {dict(list(response.headers.items())[:5])}"
176
+ )
177
+
178
+ if hasattr(response, "url") and response.url != url:
179
+ logger.info(f" Final URL after redirects: {response.url}")
180
+
181
+ except requests.exceptions.Timeout:
182
+ logger.error(f"✗ HEAD request timed out after {timeout}s")
183
+ except requests.exceptions.ConnectionError as e:
184
+ logger.error(f"✗ HEAD connection error: {e}")
185
+ except requests.exceptions.RequestException as e:
186
+ logger.error(f"✗ HEAD request failed: {e}")
187
+ except Exception as e:
188
+ logger.error(f"✗ HEAD unexpected error: {e}")
189
+
190
+ # Test GET request
191
+ try:
192
+ logger.info(f"Testing GET request with timeout {timeout}s")
193
+ start_time = time.time()
194
+
195
+ response = requests.get(
196
+ url, timeout=timeout, allow_redirects=allow_redirects_get
197
+ )
198
+
199
+ request_time = time.time() - start_time
200
+
201
+ logger.info(f"✓ GET request successful ({request_time:.3f}s)")
202
+ logger.info(f" Status code: {response.status_code}")
203
+ logger.info(
204
+ f" Response headers: {dict(list(response.headers.items())[:5])}"
205
+ )
206
+
207
+ if hasattr(response, "url") and response.url != url:
208
+ logger.info(f" Final URL after redirects: {response.url}")
209
+
210
+ except requests.exceptions.Timeout:
211
+ logger.error(f"✗ GET request timed out after {timeout}s")
212
+ except requests.exceptions.ConnectionError as e:
213
+ logger.error(f"✗ GET connection error: {e}")
214
+ except requests.exceptions.RequestException as e:
215
+ logger.error(f"✗ GET request failed: {e}")
216
+ except Exception as e:
217
+ logger.error(f"✗ GET unexpected error: {e}")
218
+
219
+ except Exception as e:
220
+ logger.error(f"HTTP probe failed: {e}", exc_info=True)
221
+
222
+ def _log_dns_troubleshooting(self) -> None:
223
+ """Log DNS troubleshooting information"""
224
+ logger.info("DNS TROUBLESHOOTING SUGGESTIONS:")
225
+ logger.info("- Check if the hostname is correct")
226
+ logger.info("- Verify DNS server configuration")
227
+ logger.info("- Check network connectivity")
228
+ logger.info("- Try using a different DNS server (8.8.8.8, 1.1.1.1)")
229
+ logger.info("- Check if there are firewall restrictions")
230
+
231
+ def _log_system_network_info(self) -> None:
232
+ """Log system network configuration information"""
233
+ try:
234
+ local_hostname = socket.gethostname()
235
+ logger.info(f"Local hostname: {local_hostname}")
236
+
237
+ try:
238
+ local_ips = socket.getaddrinfo(local_hostname, None)
239
+ logger.info("Local IP addresses:")
240
+ for addr_info in local_ips:
241
+ if addr_info[0] in [socket.AF_INET, socket.AF_INET6]:
242
+ family = "IPv4" if addr_info[0] == socket.AF_INET else "IPv6"
243
+ logger.info(f" - {addr_info[4][0]} ({family})")
244
+ except Exception as e:
245
+ logger.warning(f"Could not retrieve local IP addresses: {e}")
246
+
247
+ logger.info("DNS Server Connectivity:")
248
+ dns_servers = ["8.8.8.8", "1.1.1.1", "208.67.222.222"]
249
+ for dns_server in dns_servers:
250
+ try:
251
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
252
+ sock.settimeout(5)
253
+ result = sock.connect_ex((dns_server, 53))
254
+ if result == 0:
255
+ logger.info(f" ✓ Can reach {dns_server}:53")
256
+ else:
257
+ logger.error(f" ✗ Cannot reach {dns_server}:53")
258
+ sock.close()
259
+ except Exception as e:
260
+ logger.error(f" ✗ Error testing {dns_server}:53 - {e}")
261
+
262
+ except Exception as e:
263
+ logger.warning(f"Could not gather system network info: {e}")
264
+
265
+ def _test_alternative_dns(self, hostname: str) -> None:
266
+ """Test hostname resolution using alternative methods"""
267
+ try:
268
+ families = [(socket.AF_INET, "IPv4"), (socket.AF_INET6, "IPv6")]
269
+
270
+ for family, family_name in families:
271
+ try:
272
+ result = socket.getaddrinfo(hostname, None, family)
273
+ if result:
274
+ logger.info(f"✓ {family_name} resolution successful:")
275
+ for addr_info in result[:3]:
276
+ logger.info(f" - {addr_info[4][0]}")
277
+ else:
278
+ logger.warning(
279
+ f"✗ {family_name} resolution returned no results"
280
+ )
281
+ except socket.gaierror:
282
+ logger.error(f"✗ {family_name} resolution failed")
283
+ except Exception as e:
284
+ logger.error(f"✗ {family_name} resolution error: {e}")
285
+
286
+ except Exception as e:
287
+ logger.error(f"Alternative DNS test failed: {e}")
288
+
289
+ def get_workunits_internal(
290
+ self,
291
+ ) -> Iterable[MetadataWorkUnit]:
292
+ if self.config.dns_probe_url is not None:
293
+ # Perform DNS probe
294
+ logger.info(f"Performing DNS probe for: {self.config.dns_probe_url}")
295
+ self.perform_dns_probe(self.config.dns_probe_url)
296
+
297
+ yield from []
298
+
299
+ def get_report(self) -> SourceReport:
300
+ return self.report
@@ -72,7 +72,7 @@ NIFI = "nifi"
72
72
  # and here - https://github.com/psf/requests/issues/1573
73
73
  class SSLAdapter(HTTPAdapter):
74
74
  def __init__(self, certfile, keyfile, password=None):
75
- self.context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
75
+ self.context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
76
76
  self.context.load_cert_chain(
77
77
  certfile=certfile, keyfile=keyfile, password=password
78
78
  )
@@ -1087,7 +1087,7 @@ class SupersetSource(StatefulIngestionSourceBase):
1087
1087
  datasource_urn = self.get_datasource_urn_from_id(
1088
1088
  dataset_response, self.platform
1089
1089
  )
1090
- dataset_url = f"{self.config.display_uri}{dataset_response.get('result', {}).get('url', '')}"
1090
+ dataset_url = f"{self.config.display_uri}/explore/?datasource_type=table&datasource_id={dataset.id}"
1091
1091
 
1092
1092
  modified_actor = f"urn:li:corpuser:{self.owner_info.get((dataset_data.get('changed_by') or {}).get('id', -1), 'unknown')}"
1093
1093
  now = datetime.now().strftime("%I:%M%p on %B %d, %Y")
datahub/sdk/datajob.py CHANGED
@@ -313,6 +313,7 @@ class DataJob(
313
313
  # Set the browse path aspect
314
314
  self._set_aspect(models.BrowsePathsV2Class(path=browse_path))
315
315
 
316
+ # TODO: support datajob input/output
316
317
  @property
317
318
  def inlets(self) -> List[DatasetUrn]:
318
319
  """Get the inlets of the data job."""
@@ -248,7 +248,7 @@ class LineageClient:
248
248
  - dashboard to dataset
249
249
  - dashboard to chart
250
250
  - dashboard to dashboard
251
- - chart to dataset
251
+ - dataset to chart
252
252
 
253
253
  Args:
254
254
  upstream: URN of the upstream entity (dataset or datajob)
@@ -9,15 +9,6 @@ from datahub.sdk.entity_client import EntityClient
9
9
  from datahub.sdk.lineage_client import LineageClient
10
10
  from datahub.sdk.search_client import SearchClient
11
11
 
12
- try:
13
- from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
14
- ResolverClient,
15
- )
16
- except ImportError:
17
- from datahub.sdk.resolver_client import ( # type: ignore[assignment] # If the client is not installed, use the one from the SDK
18
- ResolverClient,
19
- )
20
-
21
12
 
22
13
  class DataHubClient:
23
14
  """Main client for interacting with DataHub.
@@ -101,7 +92,15 @@ class DataHubClient:
101
92
  return EntityClient(self)
102
93
 
103
94
  @property
104
- def resolve(self) -> ResolverClient:
95
+ def resolve(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
96
+ try:
97
+ from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
98
+ ResolverClient,
99
+ )
100
+ except ImportError:
101
+ from datahub.sdk.resolver_client import ( # type: ignore[assignment] # If the client is not installed, use the one from the SDK
102
+ ResolverClient,
103
+ )
105
104
  return ResolverClient(self)
106
105
 
107
106
  @property
datahub/specific/chart.py CHANGED
@@ -77,7 +77,7 @@ class ChartPatchBuilder(
77
77
  ChartInfo.ASPECT_NAME,
78
78
  "add",
79
79
  path=("inputEdges", input_urn),
80
- value=input_urn,
80
+ value=input_edge,
81
81
  )
82
82
  return self
83
83
 
@@ -1486,6 +1486,13 @@ def _sqlglot_lineage_nocache(
1486
1486
  )
1487
1487
  except Exception as e:
1488
1488
  return SqlParsingResult.make_from_error(e)
1489
+ except BaseException as e:
1490
+ # Handle pyo3_runtime.PanicException from SQLGlot's Rust tokenizer.
1491
+ # pyo3_runtime.PanicException inherits from BaseException (like SystemExit or
1492
+ # KeyboardInterrupt) rather than Exception, so it bypasses normal exception handling.
1493
+ wrapped_exception = Exception(f"BaseException during SQL parsing: {e}")
1494
+ wrapped_exception.__cause__ = e
1495
+ return SqlParsingResult.make_from_error(wrapped_exception)
1489
1496
 
1490
1497
 
1491
1498
  _sqlglot_lineage_cached = functools.lru_cache(maxsize=SQL_PARSE_RESULT_CACHE_SIZE)(