acryl-datahub 1.1.0.2__py3-none-any.whl → 1.1.0.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.2.dist-info → acryl_datahub-1.1.0.3rc1.dist-info}/METADATA +2448 -2419
- {acryl_datahub-1.1.0.2.dist-info → acryl_datahub-1.1.0.3rc1.dist-info}/RECORD +14 -12
- {acryl_datahub-1.1.0.2.dist-info → acryl_datahub-1.1.0.3rc1.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/nifi.py +1 -1
- datahub/sdk/datajob.py +1 -0
- datahub/sdk/lineage_client.py +1 -1
- datahub/specific/chart.py +1 -1
- {acryl_datahub-1.1.0.2.dist-info → acryl_datahub-1.1.0.3rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.2.dist-info → acryl_datahub-1.1.0.3rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.2.dist-info → acryl_datahub-1.1.0.3rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.1.0.
|
|
1
|
+
acryl_datahub-1.1.0.3rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=S6g02VXkiMX8tOvjPQvhiycUlmQG8wDgMRipQjsiIuw,323
|
|
5
5
|
datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -215,7 +215,7 @@ datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoE
|
|
|
215
215
|
datahub/ingestion/source/mlflow.py,sha256=fh7izN9jlSwbpGIrEyJktlmwFZR5vNG9z9L5VQ31k_4,33141
|
|
216
216
|
datahub/ingestion/source/mode.py,sha256=g3nhkpW5KS_w3a8JaKWoq3XBNOZKFlmxZq9XI2D5dXY,72161
|
|
217
217
|
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
218
|
-
datahub/ingestion/source/nifi.py,sha256=
|
|
218
|
+
datahub/ingestion/source/nifi.py,sha256=2jxSzuHIRQFF7DLoceGbUd_10lkhHRlqA9hhApSt3Yw,56882
|
|
219
219
|
datahub/ingestion/source/openapi.py,sha256=VaR2xYaH1IhvRixpTBC7-168F74eIIyKiEKb5EqTO64,19253
|
|
220
220
|
datahub/ingestion/source/openapi_parser.py,sha256=T87e2r-oPGgQl_FDMHnSGFZzApvWDCyKWnzIrVI5Alo,15420
|
|
221
221
|
datahub/ingestion/source/preset.py,sha256=bbh0ZWiAZMy2zuJDmaRY07_OuGJ9tdtKjwvIxqbY5II,3964
|
|
@@ -263,7 +263,7 @@ datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256
|
|
|
263
263
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=Rph96stSOzMQ_b4iRBRhGkpRzsMe8ddF5n4yGeZ7StQ,3369
|
|
264
264
|
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=v7_zkZzymKPmZKWAxnxmvmHC-8TQVGHUT-pBQFNehqc,7962
|
|
265
265
|
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=DHw5Z_rxj_fR09p7SO0UmDdvYEa_ViIRnLE9CFxPAAk,32525
|
|
266
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=
|
|
266
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=29E_25jLpMIgWcYRC0ZcYd1fvaFtSi2T8S6hSwiTDTY,51090
|
|
267
267
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
268
268
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
|
|
269
269
|
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
|
|
@@ -300,6 +300,8 @@ datahub/ingestion/source/dbt/dbt_cloud.py,sha256=_17ZX6WDzg3lKo0J5XukiaZ8AiJVFsg
|
|
|
300
300
|
datahub/ingestion/source/dbt/dbt_common.py,sha256=Hgdu6yisAOQ4mn98GjhnhTreUJffskMKD-tvYDMQ0bQ,82045
|
|
301
301
|
datahub/ingestion/source/dbt/dbt_core.py,sha256=qtfNQk28r4_hkf5sIkjfWfrvZbW8Q0NIFPi67NpPeB4,24824
|
|
302
302
|
datahub/ingestion/source/dbt/dbt_tests.py,sha256=pOZJaP4VsbaE5j4qVlE_E3ifno_KQpidfGTvOi5fr6I,9839
|
|
303
|
+
datahub/ingestion/source/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
304
|
+
datahub/ingestion/source/debug/datahub_debug.py,sha256=LJmGWi2nqWiR_14nybqkIo14D4IzoJ08GaAtlcGvv98,11962
|
|
303
305
|
datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
|
|
304
306
|
datahub/ingestion/source/delta_lake/config.py,sha256=km8WCmjjyRrMy9kJ7JxZZIdS1pKIhKznWQGMYUsF_4s,3522
|
|
305
307
|
datahub/ingestion/source/delta_lake/delta_lake_utils.py,sha256=VqIDPEXepOnlk4oWMeRaneSpQBlWmlCKAa1wGUl1sfk,1525
|
|
@@ -924,11 +926,11 @@ datahub/sdk/chart.py,sha256=9HgZU3yTmKFJCocbSWTbwW8ROfL7h4UbK_B7pHEG7n0,11102
|
|
|
924
926
|
datahub/sdk/container.py,sha256=IjnFVGDpSFDvgHuuMb7C3VdBxhJuIMq0q6crOs5PupE,7899
|
|
925
927
|
datahub/sdk/dashboard.py,sha256=kt8vD-DzoHLDOwk4Ik8ZoIKC0tPvgxX8rU9CMobs37o,15539
|
|
926
928
|
datahub/sdk/dataflow.py,sha256=gdAPVVkyKvsKtsa1AwhN_LpzidG_XzV3nhtd1cjnzDA,11128
|
|
927
|
-
datahub/sdk/datajob.py,sha256=
|
|
929
|
+
datahub/sdk/datajob.py,sha256=RaQ3GxtZb8LREWMMJuZJgqC4Dk3tBK9GcsszwsSDxYo,12582
|
|
928
930
|
datahub/sdk/dataset.py,sha256=2-iD-HqjgFpCBmU3k8O5tkltmwFj4jaumADiX8sexJU,29465
|
|
929
931
|
datahub/sdk/entity.py,sha256=Q29AbpS58L4gD8ETwoNIwG-ouytz4c0MSSFi6-jLl_4,6742
|
|
930
932
|
datahub/sdk/entity_client.py,sha256=xHLGLn6oJfxmHLXo5w4-isPFZGcf8yR5IwyC6pvl_I8,8903
|
|
931
|
-
datahub/sdk/lineage_client.py,sha256=
|
|
933
|
+
datahub/sdk/lineage_client.py,sha256=M4fN4p8YHpM7zoQ7RCCekV8oPgw8QsWxzMAA8mPoy20,33822
|
|
932
934
|
datahub/sdk/main_client.py,sha256=nB9CsQxq7fgW7zXtDwSgu51V2OGT12O1zaYZv9oR2t0,5137
|
|
933
935
|
datahub/sdk/mlmodel.py,sha256=cO5R8BYVljmQ0w33RIOuZmj4nq8OJCDVAZGTQI6YFS8,12628
|
|
934
936
|
datahub/sdk/mlmodelgroup.py,sha256=wlZZHny0UORpF0fRYuVkWLSQwIHX_fWl5lPb1NKR6dM,8194
|
|
@@ -941,7 +943,7 @@ datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG
|
|
|
941
943
|
datahub/secret/secret_common.py,sha256=g4anQtYPm7cI6kEJUZHjpBqeCyiUKIim2rJQByaeOoY,2864
|
|
942
944
|
datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
|
|
943
945
|
datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
|
|
944
|
-
datahub/specific/chart.py,sha256=
|
|
946
|
+
datahub/specific/chart.py,sha256=EUIRzcYNTs6tXoxVwYjJfD74Jw80kAjXyySjjEoTfw4,6685
|
|
945
947
|
datahub/specific/dashboard.py,sha256=3AsXZ1Cp03uaTHsOmJqEiXzJjZUBgDbX-zmgwMw908o,11514
|
|
946
948
|
datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
|
|
947
949
|
datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
|
|
@@ -1073,8 +1075,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1073
1075
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1074
1076
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1075
1077
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1076
|
-
acryl_datahub-1.1.0.
|
|
1077
|
-
acryl_datahub-1.1.0.
|
|
1078
|
-
acryl_datahub-1.1.0.
|
|
1079
|
-
acryl_datahub-1.1.0.
|
|
1080
|
-
acryl_datahub-1.1.0.
|
|
1078
|
+
acryl_datahub-1.1.0.3rc1.dist-info/METADATA,sha256=qmKShqlg2qDoeuPK1c0sKxBOlOOoD_3auEUXvZa4QyU,182347
|
|
1079
|
+
acryl_datahub-1.1.0.3rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1080
|
+
acryl_datahub-1.1.0.3rc1.dist-info/entry_points.txt,sha256=-N2PGtn1uwKR7-VM9spziE_RNyOdKm_XNpOWL1lnaj4,9790
|
|
1081
|
+
acryl_datahub-1.1.0.3rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1082
|
+
acryl_datahub-1.1.0.3rc1.dist-info/RECORD,,
|
|
@@ -36,6 +36,7 @@ csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource
|
|
|
36
36
|
datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource
|
|
37
37
|
datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource
|
|
38
38
|
datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource
|
|
39
|
+
datahub-debug = datahub.ingestion.source.debug.datahub_debug:DataHubDebugSource
|
|
39
40
|
datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
|
|
40
41
|
datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
|
|
41
42
|
dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource
|
datahub/_version.py
CHANGED
|
@@ -286,6 +286,7 @@ class BigQuerySchemaGenerator:
|
|
|
286
286
|
yield from gen_database_container(
|
|
287
287
|
database=database,
|
|
288
288
|
name=database,
|
|
289
|
+
qualified_name=database,
|
|
289
290
|
sub_types=[DatasetContainerSubTypes.BIGQUERY_PROJECT],
|
|
290
291
|
domain_registry=self.domain_registry,
|
|
291
292
|
domain_config=self.config.domain,
|
|
@@ -332,6 +333,7 @@ class BigQuerySchemaGenerator:
|
|
|
332
333
|
yield from gen_schema_container(
|
|
333
334
|
database=project_id,
|
|
334
335
|
schema=dataset,
|
|
336
|
+
qualified_name=f"{project_id}.{dataset}",
|
|
335
337
|
sub_types=[DatasetContainerSubTypes.BIGQUERY_DATASET],
|
|
336
338
|
domain_registry=self.domain_registry,
|
|
337
339
|
domain_config=self.config.domain,
|
|
File without changes
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import socket
|
|
3
|
+
import time
|
|
4
|
+
from typing import Iterable, Optional
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
import dns.exception
|
|
8
|
+
import dns.resolver
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
from datahub.configuration.common import ConfigModel
|
|
12
|
+
from datahub.ingestion.api.common import PipelineContext
|
|
13
|
+
from datahub.ingestion.api.decorators import (
|
|
14
|
+
SupportStatus,
|
|
15
|
+
config_class,
|
|
16
|
+
platform_name,
|
|
17
|
+
support_status,
|
|
18
|
+
)
|
|
19
|
+
from datahub.ingestion.api.source import Source, SourceReport
|
|
20
|
+
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DataHubDebugSourceConfig(ConfigModel):
|
|
26
|
+
dns_probe_url: Optional[str] = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@platform_name("DataHubDebug")
|
|
30
|
+
@config_class(DataHubDebugSourceConfig)
|
|
31
|
+
@support_status(SupportStatus.TESTING)
|
|
32
|
+
class DataHubDebugSource(Source):
|
|
33
|
+
"""
|
|
34
|
+
DataHubDebugSource is helper to debug things in executor where ingestion is running.
|
|
35
|
+
|
|
36
|
+
This source can perform the following tasks:
|
|
37
|
+
1. Network probe of a URL. Different from test connection in sources as that is after source starts.
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, ctx: PipelineContext, config: DataHubDebugSourceConfig):
|
|
42
|
+
self.ctx = ctx
|
|
43
|
+
self.config = config
|
|
44
|
+
self.report = SourceReport()
|
|
45
|
+
self.report.event_not_produced_warn = False
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def create(cls, config_dict, ctx):
|
|
49
|
+
config = DataHubDebugSourceConfig.parse_obj(config_dict)
|
|
50
|
+
return cls(ctx, config)
|
|
51
|
+
|
|
52
|
+
def perform_dns_probe(self, url: str) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Perform comprehensive DNS probe and network connectivity tests.
|
|
55
|
+
Logs detailed information to help diagnose network issues.
|
|
56
|
+
"""
|
|
57
|
+
logger.info(f"Starting DNS probe for URL: {url}")
|
|
58
|
+
logger.info("=" * 60)
|
|
59
|
+
logger.info(f"DNS PROBE REPORT FOR: {url}")
|
|
60
|
+
logger.info("=" * 60)
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
# Parse the URL to extract hostname
|
|
64
|
+
parsed_url = urlparse(
|
|
65
|
+
url if url.startswith(("http://", "https://")) else f"http://{url}"
|
|
66
|
+
)
|
|
67
|
+
hostname = parsed_url.hostname or parsed_url.netloc
|
|
68
|
+
port = parsed_url.port or (443 if parsed_url.scheme == "https" else 80)
|
|
69
|
+
|
|
70
|
+
logger.info(f"Parsed hostname: {hostname}")
|
|
71
|
+
logger.info(f"Target port: {port}")
|
|
72
|
+
logger.info(f"URL scheme: {parsed_url.scheme}")
|
|
73
|
+
logger.info("-" * 60)
|
|
74
|
+
|
|
75
|
+
# Test 1: Enhanced DNS resolution with dnspython if available
|
|
76
|
+
logger.info("1. DNS RESOLUTION TEST")
|
|
77
|
+
self._dns_probe_with_dnspython(hostname)
|
|
78
|
+
|
|
79
|
+
logger.info("-" * 60)
|
|
80
|
+
|
|
81
|
+
# Test 2: HTTP/HTTPS connectivity test with requests if available
|
|
82
|
+
logger.info("2. HTTP CONNECTIVITY TEST")
|
|
83
|
+
self._http_probe_with_requests(url)
|
|
84
|
+
|
|
85
|
+
logger.info("-" * 60)
|
|
86
|
+
|
|
87
|
+
# Test 3: System network information
|
|
88
|
+
logger.info("3. SYSTEM NETWORK INFORMATION")
|
|
89
|
+
self._log_system_network_info()
|
|
90
|
+
|
|
91
|
+
except Exception as e:
|
|
92
|
+
logger.error(f"DNS probe failed with unexpected error: {e}", exc_info=True)
|
|
93
|
+
|
|
94
|
+
logger.info("=" * 60)
|
|
95
|
+
logger.info("DNS PROBE COMPLETED")
|
|
96
|
+
logger.info("=" * 60)
|
|
97
|
+
|
|
98
|
+
def _dns_probe_with_dnspython(self, hostname: str) -> None:
|
|
99
|
+
"""Enhanced DNS probing using dnspython library"""
|
|
100
|
+
try:
|
|
101
|
+
# Test different record types
|
|
102
|
+
record_types = ["A", "AAAA", "CNAME", "MX"]
|
|
103
|
+
|
|
104
|
+
for record_type in record_types:
|
|
105
|
+
try:
|
|
106
|
+
start_time = time.time()
|
|
107
|
+
answers = dns.resolver.resolve(hostname, record_type)
|
|
108
|
+
dns_time = time.time() - start_time
|
|
109
|
+
|
|
110
|
+
logger.info(
|
|
111
|
+
f"✓ {record_type} record resolution successful ({dns_time:.3f}s)"
|
|
112
|
+
)
|
|
113
|
+
for answer in answers:
|
|
114
|
+
logger.info(f" - {record_type}: {answer}")
|
|
115
|
+
|
|
116
|
+
except dns.resolver.NXDOMAIN:
|
|
117
|
+
logger.info(f"✗ {record_type} record: Domain does not exist")
|
|
118
|
+
except dns.resolver.NoAnswer:
|
|
119
|
+
logger.info(
|
|
120
|
+
f"- {record_type} record: No answer (record type not available)"
|
|
121
|
+
)
|
|
122
|
+
except dns.exception.Timeout:
|
|
123
|
+
logger.error(f"✗ {record_type} record: DNS query timed out")
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(f"✗ {record_type} record query failed: {e}")
|
|
126
|
+
|
|
127
|
+
# Test different DNS servers
|
|
128
|
+
logger.info("Testing with different DNS servers:")
|
|
129
|
+
dns_servers = ["8.8.8.8", "1.1.1.1", "208.67.222.222"]
|
|
130
|
+
|
|
131
|
+
for dns_server in dns_servers:
|
|
132
|
+
try:
|
|
133
|
+
resolver = dns.resolver.Resolver()
|
|
134
|
+
resolver.nameservers = [dns_server]
|
|
135
|
+
resolver.timeout = 5
|
|
136
|
+
|
|
137
|
+
start_time = time.time()
|
|
138
|
+
answers = resolver.resolve(hostname, "A")
|
|
139
|
+
dns_time = time.time() - start_time
|
|
140
|
+
|
|
141
|
+
logger.info(
|
|
142
|
+
f"✓ DNS server {dns_server} responded ({dns_time:.3f}s)"
|
|
143
|
+
)
|
|
144
|
+
for answer in answers:
|
|
145
|
+
logger.info(f" - A: {answer}")
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
logger.error(f"✗ DNS server {dns_server} failed: {e}")
|
|
149
|
+
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logger.error(f"Enhanced DNS probe failed: {e}", exc_info=True)
|
|
152
|
+
|
|
153
|
+
def _http_probe_with_requests(self, url: str) -> None:
|
|
154
|
+
"""HTTP connectivity test using requests library"""
|
|
155
|
+
try:
|
|
156
|
+
# Test with different timeouts and methods
|
|
157
|
+
timeout = 10
|
|
158
|
+
allow_redirects_head = True
|
|
159
|
+
allow_redirects_get = False
|
|
160
|
+
|
|
161
|
+
# Test HEAD request
|
|
162
|
+
try:
|
|
163
|
+
logger.info(f"Testing HEAD request with timeout {timeout}s")
|
|
164
|
+
start_time = time.time()
|
|
165
|
+
|
|
166
|
+
response = requests.head(
|
|
167
|
+
url, timeout=timeout, allow_redirects=allow_redirects_head
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
request_time = time.time() - start_time
|
|
171
|
+
|
|
172
|
+
logger.info(f"✓ HEAD request successful ({request_time:.3f}s)")
|
|
173
|
+
logger.info(f" Status code: {response.status_code}")
|
|
174
|
+
logger.info(
|
|
175
|
+
f" Response headers: {dict(list(response.headers.items())[:5])}"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
if hasattr(response, "url") and response.url != url:
|
|
179
|
+
logger.info(f" Final URL after redirects: {response.url}")
|
|
180
|
+
|
|
181
|
+
except requests.exceptions.Timeout:
|
|
182
|
+
logger.error(f"✗ HEAD request timed out after {timeout}s")
|
|
183
|
+
except requests.exceptions.ConnectionError as e:
|
|
184
|
+
logger.error(f"✗ HEAD connection error: {e}")
|
|
185
|
+
except requests.exceptions.RequestException as e:
|
|
186
|
+
logger.error(f"✗ HEAD request failed: {e}")
|
|
187
|
+
except Exception as e:
|
|
188
|
+
logger.error(f"✗ HEAD unexpected error: {e}")
|
|
189
|
+
|
|
190
|
+
# Test GET request
|
|
191
|
+
try:
|
|
192
|
+
logger.info(f"Testing GET request with timeout {timeout}s")
|
|
193
|
+
start_time = time.time()
|
|
194
|
+
|
|
195
|
+
response = requests.get(
|
|
196
|
+
url, timeout=timeout, allow_redirects=allow_redirects_get
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
request_time = time.time() - start_time
|
|
200
|
+
|
|
201
|
+
logger.info(f"✓ GET request successful ({request_time:.3f}s)")
|
|
202
|
+
logger.info(f" Status code: {response.status_code}")
|
|
203
|
+
logger.info(
|
|
204
|
+
f" Response headers: {dict(list(response.headers.items())[:5])}"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
if hasattr(response, "url") and response.url != url:
|
|
208
|
+
logger.info(f" Final URL after redirects: {response.url}")
|
|
209
|
+
|
|
210
|
+
except requests.exceptions.Timeout:
|
|
211
|
+
logger.error(f"✗ GET request timed out after {timeout}s")
|
|
212
|
+
except requests.exceptions.ConnectionError as e:
|
|
213
|
+
logger.error(f"✗ GET connection error: {e}")
|
|
214
|
+
except requests.exceptions.RequestException as e:
|
|
215
|
+
logger.error(f"✗ GET request failed: {e}")
|
|
216
|
+
except Exception as e:
|
|
217
|
+
logger.error(f"✗ GET unexpected error: {e}")
|
|
218
|
+
|
|
219
|
+
except Exception as e:
|
|
220
|
+
logger.error(f"HTTP probe failed: {e}", exc_info=True)
|
|
221
|
+
|
|
222
|
+
def _log_dns_troubleshooting(self) -> None:
|
|
223
|
+
"""Log DNS troubleshooting information"""
|
|
224
|
+
logger.info("DNS TROUBLESHOOTING SUGGESTIONS:")
|
|
225
|
+
logger.info("- Check if the hostname is correct")
|
|
226
|
+
logger.info("- Verify DNS server configuration")
|
|
227
|
+
logger.info("- Check network connectivity")
|
|
228
|
+
logger.info("- Try using a different DNS server (8.8.8.8, 1.1.1.1)")
|
|
229
|
+
logger.info("- Check if there are firewall restrictions")
|
|
230
|
+
|
|
231
|
+
def _log_system_network_info(self) -> None:
|
|
232
|
+
"""Log system network configuration information"""
|
|
233
|
+
try:
|
|
234
|
+
local_hostname = socket.gethostname()
|
|
235
|
+
logger.info(f"Local hostname: {local_hostname}")
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
local_ips = socket.getaddrinfo(local_hostname, None)
|
|
239
|
+
logger.info("Local IP addresses:")
|
|
240
|
+
for addr_info in local_ips:
|
|
241
|
+
if addr_info[0] in [socket.AF_INET, socket.AF_INET6]:
|
|
242
|
+
family = "IPv4" if addr_info[0] == socket.AF_INET else "IPv6"
|
|
243
|
+
logger.info(f" - {addr_info[4][0]} ({family})")
|
|
244
|
+
except Exception as e:
|
|
245
|
+
logger.warning(f"Could not retrieve local IP addresses: {e}")
|
|
246
|
+
|
|
247
|
+
logger.info("DNS Server Connectivity:")
|
|
248
|
+
dns_servers = ["8.8.8.8", "1.1.1.1", "208.67.222.222"]
|
|
249
|
+
for dns_server in dns_servers:
|
|
250
|
+
try:
|
|
251
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
252
|
+
sock.settimeout(5)
|
|
253
|
+
result = sock.connect_ex((dns_server, 53))
|
|
254
|
+
if result == 0:
|
|
255
|
+
logger.info(f" ✓ Can reach {dns_server}:53")
|
|
256
|
+
else:
|
|
257
|
+
logger.error(f" ✗ Cannot reach {dns_server}:53")
|
|
258
|
+
sock.close()
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.error(f" ✗ Error testing {dns_server}:53 - {e}")
|
|
261
|
+
|
|
262
|
+
except Exception as e:
|
|
263
|
+
logger.warning(f"Could not gather system network info: {e}")
|
|
264
|
+
|
|
265
|
+
def _test_alternative_dns(self, hostname: str) -> None:
|
|
266
|
+
"""Test hostname resolution using alternative methods"""
|
|
267
|
+
try:
|
|
268
|
+
families = [(socket.AF_INET, "IPv4"), (socket.AF_INET6, "IPv6")]
|
|
269
|
+
|
|
270
|
+
for family, family_name in families:
|
|
271
|
+
try:
|
|
272
|
+
result = socket.getaddrinfo(hostname, None, family)
|
|
273
|
+
if result:
|
|
274
|
+
logger.info(f"✓ {family_name} resolution successful:")
|
|
275
|
+
for addr_info in result[:3]:
|
|
276
|
+
logger.info(f" - {addr_info[4][0]}")
|
|
277
|
+
else:
|
|
278
|
+
logger.warning(
|
|
279
|
+
f"✗ {family_name} resolution returned no results"
|
|
280
|
+
)
|
|
281
|
+
except socket.gaierror:
|
|
282
|
+
logger.error(f"✗ {family_name} resolution failed")
|
|
283
|
+
except Exception as e:
|
|
284
|
+
logger.error(f"✗ {family_name} resolution error: {e}")
|
|
285
|
+
|
|
286
|
+
except Exception as e:
|
|
287
|
+
logger.error(f"Alternative DNS test failed: {e}")
|
|
288
|
+
|
|
289
|
+
def get_workunits_internal(
|
|
290
|
+
self,
|
|
291
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
292
|
+
if self.config.dns_probe_url is not None:
|
|
293
|
+
# Perform DNS probe
|
|
294
|
+
logger.info(f"Performing DNS probe for: {self.config.dns_probe_url}")
|
|
295
|
+
self.perform_dns_probe(self.config.dns_probe_url)
|
|
296
|
+
|
|
297
|
+
yield from []
|
|
298
|
+
|
|
299
|
+
def get_report(self) -> SourceReport:
|
|
300
|
+
return self.report
|
datahub/ingestion/source/nifi.py
CHANGED
|
@@ -72,7 +72,7 @@ NIFI = "nifi"
|
|
|
72
72
|
# and here - https://github.com/psf/requests/issues/1573
|
|
73
73
|
class SSLAdapter(HTTPAdapter):
|
|
74
74
|
def __init__(self, certfile, keyfile, password=None):
|
|
75
|
-
self.context = ssl.create_default_context(ssl.Purpose.
|
|
75
|
+
self.context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
|
|
76
76
|
self.context.load_cert_chain(
|
|
77
77
|
certfile=certfile, keyfile=keyfile, password=password
|
|
78
78
|
)
|
datahub/sdk/datajob.py
CHANGED
|
@@ -313,6 +313,7 @@ class DataJob(
|
|
|
313
313
|
# Set the browse path aspect
|
|
314
314
|
self._set_aspect(models.BrowsePathsV2Class(path=browse_path))
|
|
315
315
|
|
|
316
|
+
# TODO: support datajob input/output
|
|
316
317
|
@property
|
|
317
318
|
def inlets(self) -> List[DatasetUrn]:
|
|
318
319
|
"""Get the inlets of the data job."""
|
datahub/sdk/lineage_client.py
CHANGED
datahub/specific/chart.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|