acryl-datahub 1.3.0.1rc4__py3-none-any.whl → 1.3.0.1rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc5.dist-info}/METADATA +2396 -2392
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc5.dist-info}/RECORD +18 -18
- datahub/_version.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +161 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +5 -3
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/usage.py +2 -2
- datahub/ingestion/source/sql/mysql.py +101 -4
- datahub/ingestion/source/sql/postgres.py +81 -4
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/metadata/_internal_schema_classes.py +547 -544
- datahub/metadata/_urns/urn_defs.py +1729 -1729
- datahub/metadata/schema.avsc +18384 -18382
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +3 -1
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc5.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc5.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.3.0.
|
|
1
|
+
acryl_datahub-1.3.0.1rc5.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=GFe5nZs9PKs-LLDaT1H1D9udtmOwDf_NsyGlgBGOywE,323
|
|
5
5
|
datahub/entrypoints.py,sha256=VcbU6Z47b_JKW1zI-WJMYIngm05FSogKLiuvFNtyNcI,9088
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -241,7 +241,7 @@ datahub/ingestion/source/abs/source.py,sha256=z86K5_P_gu8kTytLOAYyQqqD2g14JGSrv1
|
|
|
241
241
|
datahub/ingestion/source/apply/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
242
242
|
datahub/ingestion/source/apply/datahub_apply.py,sha256=xTD-Iq3UHhxcz61RwNuI2kJjRrnQEfZFSgvS1X6loV4,7703
|
|
243
243
|
datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
244
|
-
datahub/ingestion/source/aws/aws_common.py,sha256=
|
|
244
|
+
datahub/ingestion/source/aws/aws_common.py,sha256=Va9uxo5aKsAR7qIC625VpRO3XDqzNIg4SfK_eFg25Rw,23781
|
|
245
245
|
datahub/ingestion/source/aws/glue.py,sha256=dUaMWcI5Ed-TzbbSrF6suT4L1vcRHoHfFCdTvAINc4w,67423
|
|
246
246
|
datahub/ingestion/source/aws/platform_resource_repository.py,sha256=0eUfGy1FbaBltCSNTtXyLrkrdqTc1KkTgDJB1Gd-Ydk,853
|
|
247
247
|
datahub/ingestion/source/aws/s3_boto_utils.py,sha256=rGlWAkKZpkeA1_wMvcJvSDvobvduShszowU-KcrQudg,7011
|
|
@@ -271,7 +271,7 @@ datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256
|
|
|
271
271
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=2syDMaRpYEbtGUVejVAK5d6g8HqM54ZyEM908uLJ55o,3393
|
|
272
272
|
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=zlTkqOmt5zxnO40rVTYHF3fclj4OVlLtqUXwW5WIIcM,7855
|
|
273
273
|
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=zbYb1EYnCJxgvsU8oT_76l0q_BW1exVjMWM1GAgd1nc,32600
|
|
274
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=
|
|
274
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=PbSCMj5ACwEu_HQNe29IHs4y1bn15_nnz6ZW1Yt17wI,51796
|
|
275
275
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
276
276
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=IinOy-RO4UZGxSf5scaN02672BzZuNsjJZ56axti6iI,4016
|
|
277
277
|
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
|
|
@@ -456,9 +456,9 @@ datahub/ingestion/source/redshift/profile.py,sha256=H1Xtc2rXScUv4w0b2BbM7POjYEwq
|
|
|
456
456
|
datahub/ingestion/source/redshift/query.py,sha256=HKobQ-0crARgT8Mkfe-WBqVR9ZadYCZ9DGaUoEHHHww,48234
|
|
457
457
|
datahub/ingestion/source/redshift/redshift.py,sha256=RN8rao3j7nocnnD6oPcEju09-8mOZTE4vFkgy_13Az8,41293
|
|
458
458
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
459
|
-
datahub/ingestion/source/redshift/redshift_schema.py,sha256=
|
|
459
|
+
datahub/ingestion/source/redshift/redshift_schema.py,sha256=2U8IIPRJkL-HWUeWswOzvcT1hdTBQgPMhr6tYCDuqrM,25226
|
|
460
460
|
datahub/ingestion/source/redshift/report.py,sha256=aCFDFUbz5xde8b_eRIHSBiELoo9LZFtDpp2lSadiPHU,2937
|
|
461
|
-
datahub/ingestion/source/redshift/usage.py,sha256=
|
|
461
|
+
datahub/ingestion/source/redshift/usage.py,sha256=szdg3cUw4UpZ8rXMZufIAVTq2iiI1VsmrFgjAEH8Dv4,17521
|
|
462
462
|
datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
|
|
463
463
|
datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pLQaOGJGOo,7828
|
|
464
464
|
datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
|
|
@@ -523,9 +523,9 @@ datahub/ingestion/source/sql/hana.py,sha256=V6bGVLVjI1VL0deebg8VxIL8Ls-oxUvpSvX9
|
|
|
523
523
|
datahub/ingestion/source/sql/hive.py,sha256=SPmAWlk63V-s-loBTU2hXsQA7xA4sa0iPK6pCbF-AJ8,31600
|
|
524
524
|
datahub/ingestion/source/sql/hive_metastore.py,sha256=UBB7mV2eKuCxv3voi0F3tqF2MyRObSYxArAxETZfO4E,35997
|
|
525
525
|
datahub/ingestion/source/sql/mariadb.py,sha256=om6QoG5UtDldt1N6AfIWp3T-HXNaaqFmpz2i0JAemfM,654
|
|
526
|
-
datahub/ingestion/source/sql/mysql.py,sha256=
|
|
526
|
+
datahub/ingestion/source/sql/mysql.py,sha256=h0kv86-8SxBTmaEhmcyqGcKoaWQ4peUdiE9sfhrTuqY,9734
|
|
527
527
|
datahub/ingestion/source/sql/oracle.py,sha256=nKMM1O67SkxCgT781eENl5xXpIR8_p5joTSdAYzQwHY,29988
|
|
528
|
-
datahub/ingestion/source/sql/postgres.py,sha256=
|
|
528
|
+
datahub/ingestion/source/sql/postgres.py,sha256=Vk1NVI0zJPzMj4SKJ9jfyGu4DY3bow724BDn10BaxzU,17478
|
|
529
529
|
datahub/ingestion/source/sql/presto.py,sha256=58py4M3UYxkGpbBFA1o96H154eUhD2dBm1hpxxYlYYM,4256
|
|
530
530
|
datahub/ingestion/source/sql/sql_common.py,sha256=EZGoeGlOYZoOrXOiKDI-S1mw-sPVV33PZQ_mPJlEvRc,57759
|
|
531
531
|
datahub/ingestion/source/sql/sql_config.py,sha256=u3nGZYYl1WtaxfNsDU5bglgZ5Jq3Fxk9xei_CUIAXB0,8222
|
|
@@ -535,7 +535,7 @@ datahub/ingestion/source/sql/sql_report.py,sha256=gw-OPHSExp_b6DRjvwqE1U6Bpkwekx
|
|
|
535
535
|
datahub/ingestion/source/sql/sql_types.py,sha256=AVeBBXw8aKB1_jw6Wtg58miu-YUfN_-7ZcXwSF-ESgA,16021
|
|
536
536
|
datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F25T2VrCziR9I,8418
|
|
537
537
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
538
|
-
datahub/ingestion/source/sql/sqlalchemy_uri.py,sha256=
|
|
538
|
+
datahub/ingestion/source/sql/sqlalchemy_uri.py,sha256=wE_GX2TtkFEvscC_Epy5hUfBxtE6mUQoVPb7fUea0jk,1882
|
|
539
539
|
datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
|
|
540
540
|
datahub/ingestion/source/sql/teradata.py,sha256=YydlPGndFGZcpvlmim3T-1yaAmsFt08TZVOTo1R3GLo,66871
|
|
541
541
|
datahub/ingestion/source/sql/trino.py,sha256=o5hm84iwRHO59TD2LaEqYgF2LYIcSUIKmlgu1VudGBY,19254
|
|
@@ -646,12 +646,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
646
646
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
647
647
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
648
648
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
649
|
-
datahub/metadata/_internal_schema_classes.py,sha256=
|
|
650
|
-
datahub/metadata/schema.avsc,sha256=
|
|
649
|
+
datahub/metadata/_internal_schema_classes.py,sha256=M3j1TDrK43RYlhKUsaqeHgjbw5ERkAzKxdC4pD4dLEg,1077060
|
|
650
|
+
datahub/metadata/schema.avsc,sha256=mUP8XLRosJg0WlBymK0-EAlGqrC-j7bQIDbOVtODrR8,775591
|
|
651
651
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
652
652
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
653
653
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
654
|
-
datahub/metadata/_urns/urn_defs.py,sha256=
|
|
654
|
+
datahub/metadata/_urns/urn_defs.py,sha256=_LgqKLHrmHHxpvrP-93NMJSLEnoFI8q72lkX17mK1XA,143257
|
|
655
655
|
datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
656
656
|
datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
657
657
|
datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
|
|
@@ -782,7 +782,7 @@ datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjO
|
|
|
782
782
|
datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=TGmm9WEGTaABs7kt5Uc-N-kbc5Sd-2sQwx-JpfAptvw,545
|
|
783
783
|
datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc,sha256=q6ZyMoxInwmrkrXkUgMe-i-WZzAxbjcvJ-EI99SnEp8,599
|
|
784
784
|
datahub/metadata/schemas/DataHubPageModuleKey.avsc,sha256=NyFN8cVO6s6rtgoLGJJGfcPfpGr5PfmZlIhM6ajldfQ,460
|
|
785
|
-
datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=
|
|
785
|
+
datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=hbIEkpjQxVxSZOQmTVIjOGnGTNeaA0r0oYNKPpwuddg,10443
|
|
786
786
|
datahub/metadata/schemas/DataHubPageTemplateKey.avsc,sha256=0sVqwL97Rp8YHPytp2RqUP5hIW048hmT2hPNP5k6arc,472
|
|
787
787
|
datahub/metadata/schemas/DataHubPageTemplateProperties.avsc,sha256=FyNcZIniQy9m6yN9DT4XsPkDrxUsU7tRTqmfdGoEtMU,8565
|
|
788
788
|
datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
|
|
@@ -1128,8 +1128,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1128
1128
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1129
1129
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1130
1130
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1131
|
-
acryl_datahub-1.3.0.
|
|
1132
|
-
acryl_datahub-1.3.0.
|
|
1133
|
-
acryl_datahub-1.3.0.
|
|
1134
|
-
acryl_datahub-1.3.0.
|
|
1135
|
-
acryl_datahub-1.3.0.
|
|
1131
|
+
acryl_datahub-1.3.0.1rc5.dist-info/METADATA,sha256=ckjfgTlPEUgZH1sYvZm3MLupBZvBbsC-zokD3Q2ekno,184688
|
|
1132
|
+
acryl_datahub-1.3.0.1rc5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1133
|
+
acryl_datahub-1.3.0.1rc5.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
|
|
1134
|
+
acryl_datahub-1.3.0.1rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1135
|
+
acryl_datahub-1.3.0.1rc5.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -3,11 +3,13 @@ from datetime import datetime, timedelta, timezone
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from http import HTTPStatus
|
|
5
5
|
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
|
|
6
|
+
from urllib.parse import parse_qs, urlparse
|
|
6
7
|
|
|
7
8
|
import boto3
|
|
8
9
|
import requests
|
|
9
10
|
from boto3.session import Session
|
|
10
11
|
from botocore.config import DEFAULT_TIMEOUT, Config
|
|
12
|
+
from botocore.exceptions import ClientError, NoCredentialsError
|
|
11
13
|
from botocore.utils import fix_s3_host
|
|
12
14
|
from pydantic.fields import Field
|
|
13
15
|
|
|
@@ -465,6 +467,165 @@ class AwsConnectionConfig(ConfigModel):
|
|
|
465
467
|
def get_lakeformation_client(self) -> "LakeFormationClient":
|
|
466
468
|
return self.get_session().client("lakeformation", config=self._aws_config())
|
|
467
469
|
|
|
470
|
+
def get_rds_client(self):
|
|
471
|
+
"""Get an RDS client for generating IAM auth tokens."""
|
|
472
|
+
return self.get_session().client("rds", config=self._aws_config())
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def generate_rds_iam_token(
|
|
476
|
+
endpoint: str,
|
|
477
|
+
username: str,
|
|
478
|
+
port: int,
|
|
479
|
+
aws_config: AwsConnectionConfig,
|
|
480
|
+
) -> str:
|
|
481
|
+
"""
|
|
482
|
+
Generate an AWS RDS IAM authentication token.
|
|
483
|
+
|
|
484
|
+
boto3's generate_db_auth_token() returns a presigned URL in the format:
|
|
485
|
+
"hostname:port/?Action=connect&DBUser=username&X-Amz-Date=...&X-Amz-Expires=..."
|
|
486
|
+
|
|
487
|
+
This token should be used as-is by pymysql/psycopg2 drivers.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
endpoint: RDS endpoint hostname
|
|
491
|
+
username: Database username for IAM authentication
|
|
492
|
+
port: Database port (5432 for PostgreSQL, 3306 for MySQL)
|
|
493
|
+
aws_config: AwsConnectionConfig for session management and credentials
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
Authentication token (presigned URL format)
|
|
497
|
+
|
|
498
|
+
Raises:
|
|
499
|
+
ValueError: If AWS credentials are not found or token generation fails
|
|
500
|
+
|
|
501
|
+
"""
|
|
502
|
+
try:
|
|
503
|
+
client = aws_config.get_rds_client()
|
|
504
|
+
token = client.generate_db_auth_token(
|
|
505
|
+
DBHostname=endpoint, Port=port, DBUsername=username
|
|
506
|
+
)
|
|
507
|
+
logger.debug(f"Generated RDS IAM token for {username}@{endpoint}:{port}")
|
|
508
|
+
return token
|
|
509
|
+
except NoCredentialsError as e:
|
|
510
|
+
raise ValueError("AWS credentials not found") from e
|
|
511
|
+
except ClientError as e:
|
|
512
|
+
raise ValueError(f"Failed to generate RDS IAM token: {e}") from e
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
class RDSIAMTokenManager:
|
|
516
|
+
"""
|
|
517
|
+
Manages RDS IAM token lifecycle with automatic refresh.
|
|
518
|
+
|
|
519
|
+
RDS IAM tokens include expiration information in the URL parameters.
|
|
520
|
+
This manager parses the token expiry and refreshes before expiration
|
|
521
|
+
to ensure uninterrupted database access.
|
|
522
|
+
"""
|
|
523
|
+
|
|
524
|
+
def __init__(
|
|
525
|
+
self,
|
|
526
|
+
endpoint: str,
|
|
527
|
+
username: str,
|
|
528
|
+
port: int,
|
|
529
|
+
aws_config: AwsConnectionConfig,
|
|
530
|
+
refresh_threshold_minutes: int = 5,
|
|
531
|
+
):
|
|
532
|
+
"""
|
|
533
|
+
Initialize the token manager.
|
|
534
|
+
|
|
535
|
+
Args:
|
|
536
|
+
endpoint: RDS endpoint hostname
|
|
537
|
+
username: Database username for IAM authentication
|
|
538
|
+
port: Database port
|
|
539
|
+
aws_config: AwsConnectionConfig for session management and credentials
|
|
540
|
+
refresh_threshold_minutes: Refresh token when this many minutes remain before expiry
|
|
541
|
+
"""
|
|
542
|
+
self.endpoint = endpoint
|
|
543
|
+
self.username = username
|
|
544
|
+
self.port = port
|
|
545
|
+
self.aws_config = aws_config
|
|
546
|
+
self.refresh_threshold = timedelta(minutes=refresh_threshold_minutes)
|
|
547
|
+
|
|
548
|
+
self._current_token: Optional[str] = None
|
|
549
|
+
self._token_expires_at: Optional[datetime] = None
|
|
550
|
+
|
|
551
|
+
def get_token(self) -> str:
|
|
552
|
+
"""
|
|
553
|
+
Get current token, refreshing if necessary.
|
|
554
|
+
|
|
555
|
+
Returns:
|
|
556
|
+
Valid authentication token
|
|
557
|
+
|
|
558
|
+
Raises:
|
|
559
|
+
RuntimeError: If token generation or refresh fails
|
|
560
|
+
"""
|
|
561
|
+
if self._needs_refresh():
|
|
562
|
+
self._refresh_token()
|
|
563
|
+
|
|
564
|
+
assert self._current_token is not None
|
|
565
|
+
return self._current_token
|
|
566
|
+
|
|
567
|
+
def _needs_refresh(self) -> bool:
|
|
568
|
+
"""Check if token needs to be refreshed."""
|
|
569
|
+
if self._current_token is None or self._token_expires_at is None:
|
|
570
|
+
return True
|
|
571
|
+
|
|
572
|
+
time_until_expiry = self._token_expires_at - datetime.now(timezone.utc)
|
|
573
|
+
return time_until_expiry <= self.refresh_threshold
|
|
574
|
+
|
|
575
|
+
def _parse_token_expiry(self, token: str) -> datetime:
|
|
576
|
+
"""
|
|
577
|
+
Parse token expiry from X-Amz-Date and X-Amz-Expires URL parameters.
|
|
578
|
+
|
|
579
|
+
Args:
|
|
580
|
+
token: RDS IAM authentication token (presigned URL)
|
|
581
|
+
|
|
582
|
+
Returns:
|
|
583
|
+
Expiration datetime in UTC
|
|
584
|
+
|
|
585
|
+
Raises:
|
|
586
|
+
ValueError: If token URL format is invalid or missing required parameters
|
|
587
|
+
"""
|
|
588
|
+
try:
|
|
589
|
+
parsed_url = urlparse(token)
|
|
590
|
+
query_params = parse_qs(parsed_url.query)
|
|
591
|
+
|
|
592
|
+
# Extract X-Amz-Date (ISO 8601 format: YYYYMMDDTHHMMSSZ)
|
|
593
|
+
amz_date_list = query_params.get("X-Amz-Date")
|
|
594
|
+
if not amz_date_list:
|
|
595
|
+
raise ValueError("Missing X-Amz-Date parameter in RDS IAM token")
|
|
596
|
+
amz_date_str = amz_date_list[0]
|
|
597
|
+
|
|
598
|
+
# Extract X-Amz-Expires (duration in seconds)
|
|
599
|
+
amz_expires_list = query_params.get("X-Amz-Expires")
|
|
600
|
+
if not amz_expires_list:
|
|
601
|
+
raise ValueError("Missing X-Amz-Expires parameter in RDS IAM token")
|
|
602
|
+
amz_expires_seconds = int(amz_expires_list[0])
|
|
603
|
+
|
|
604
|
+
# Parse X-Amz-Date to datetime
|
|
605
|
+
token_issued_at = datetime.strptime(amz_date_str, "%Y%m%dT%H%M%SZ").replace(
|
|
606
|
+
tzinfo=timezone.utc
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
# Calculate expiration
|
|
610
|
+
return token_issued_at + timedelta(seconds=amz_expires_seconds)
|
|
611
|
+
|
|
612
|
+
except (ValueError, KeyError, IndexError) as e:
|
|
613
|
+
raise ValueError(
|
|
614
|
+
f"Failed to parse RDS IAM token expiry: {e}. Token format may be invalid."
|
|
615
|
+
) from e
|
|
616
|
+
|
|
617
|
+
def _refresh_token(self) -> None:
|
|
618
|
+
"""Generate and store a new token with parsed expiry."""
|
|
619
|
+
logger.info("Refreshing RDS IAM authentication token")
|
|
620
|
+
self._current_token = generate_rds_iam_token(
|
|
621
|
+
endpoint=self.endpoint,
|
|
622
|
+
username=self.username,
|
|
623
|
+
port=self.port,
|
|
624
|
+
aws_config=self.aws_config,
|
|
625
|
+
)
|
|
626
|
+
self._token_expires_at = self._parse_token_expiry(self._current_token)
|
|
627
|
+
logger.debug(f"Token will expire at {self._token_expires_at}")
|
|
628
|
+
|
|
468
629
|
|
|
469
630
|
class AwsSourceConfig(EnvConfigMixin, AwsConnectionConfig):
|
|
470
631
|
"""
|
|
@@ -449,10 +449,12 @@ class BigQuerySchemaGenerator:
|
|
|
449
449
|
):
|
|
450
450
|
yield wu
|
|
451
451
|
except Exception as e:
|
|
452
|
-
|
|
453
|
-
|
|
452
|
+
# If configuration indicates we need table data access (for profiling or use_tables_list_query_v2),
|
|
453
|
+
# include bigquery.tables.getData in the error message since that's likely the missing permission
|
|
454
|
+
if self.config.have_table_data_read_permission:
|
|
455
|
+
action_mesage = "Does your service account have bigquery.tables.list, bigquery.routines.get, bigquery.routines.list, bigquery.tables.getData permissions?"
|
|
454
456
|
else:
|
|
455
|
-
action_mesage = "Does your service account have bigquery.tables.list, bigquery.routines.get, bigquery.routines.list
|
|
457
|
+
action_mesage = "Does your service account have bigquery.tables.list, bigquery.routines.get, bigquery.routines.list permissions?"
|
|
456
458
|
|
|
457
459
|
self.report.failure(
|
|
458
460
|
title="Unable to get tables for dataset",
|
|
@@ -15,6 +15,7 @@ from datahub.ingestion.source.sql.sql_generic import BaseColumn, BaseTable
|
|
|
15
15
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
|
|
16
16
|
from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult
|
|
17
17
|
from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column
|
|
18
|
+
from datahub.utilities.perf_timer import PerfTimer
|
|
18
19
|
|
|
19
20
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
@@ -243,9 +244,13 @@ class RedshiftDataDictionary:
|
|
|
243
244
|
conn: redshift_connector.Connection, query: str
|
|
244
245
|
) -> redshift_connector.Cursor:
|
|
245
246
|
cursor: redshift_connector.Cursor = conn.cursor()
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
247
|
+
with PerfTimer() as timer:
|
|
248
|
+
query_hash_id = hash(query)
|
|
249
|
+
logger.info(f"Executing query [{query_hash_id}]\n{query}")
|
|
250
|
+
cursor.execute(query)
|
|
251
|
+
logger.info(
|
|
252
|
+
f"Time taken query [{query_hash_id}: {timer.elapsed_seconds():.3f} seconds"
|
|
253
|
+
)
|
|
249
254
|
return cursor
|
|
250
255
|
|
|
251
256
|
@staticmethod
|
|
@@ -545,8 +550,7 @@ class RedshiftDataDictionary:
|
|
|
545
550
|
conn: redshift_connector.Connection,
|
|
546
551
|
query: str,
|
|
547
552
|
) -> Iterable[LineageRow]:
|
|
548
|
-
cursor =
|
|
549
|
-
cursor.execute(query)
|
|
553
|
+
cursor = RedshiftDataDictionary.get_query_result(conn=conn, query=query)
|
|
550
554
|
field_names = [i[0] for i in cursor.description]
|
|
551
555
|
|
|
552
556
|
rows = cursor.fetchmany()
|
|
@@ -603,9 +607,7 @@ class RedshiftDataDictionary:
|
|
|
603
607
|
conn: redshift_connector.Connection,
|
|
604
608
|
query: str,
|
|
605
609
|
) -> Iterable[TempTableRow]:
|
|
606
|
-
cursor =
|
|
607
|
-
|
|
608
|
-
cursor.execute(query)
|
|
610
|
+
cursor = RedshiftDataDictionary.get_query_result(conn=conn, query=query)
|
|
609
611
|
|
|
610
612
|
field_names = [i[0] for i in cursor.description]
|
|
611
613
|
|
|
@@ -662,8 +664,9 @@ class RedshiftDataDictionary:
|
|
|
662
664
|
def get_outbound_datashares(
|
|
663
665
|
conn: redshift_connector.Connection,
|
|
664
666
|
) -> Iterable[OutboundDatashare]:
|
|
665
|
-
cursor =
|
|
666
|
-
|
|
667
|
+
cursor = RedshiftDataDictionary.get_query_result(
|
|
668
|
+
conn=conn, query=RedshiftCommonQuery.list_outbound_datashares()
|
|
669
|
+
)
|
|
667
670
|
for item in cursor.fetchall():
|
|
668
671
|
yield OutboundDatashare(
|
|
669
672
|
share_name=item[1],
|
|
@@ -678,8 +681,10 @@ class RedshiftDataDictionary:
|
|
|
678
681
|
conn: redshift_connector.Connection,
|
|
679
682
|
database: str,
|
|
680
683
|
) -> Optional[InboundDatashare]:
|
|
681
|
-
cursor =
|
|
682
|
-
|
|
684
|
+
cursor = RedshiftDataDictionary.get_query_result(
|
|
685
|
+
conn=conn,
|
|
686
|
+
query=RedshiftCommonQuery.get_inbound_datashare(database),
|
|
687
|
+
)
|
|
683
688
|
item = cursor.fetchone()
|
|
684
689
|
if item:
|
|
685
690
|
return InboundDatashare(
|
|
@@ -25,6 +25,7 @@ from datahub.ingestion.source.redshift.query import (
|
|
|
25
25
|
RedshiftServerlessQuery,
|
|
26
26
|
)
|
|
27
27
|
from datahub.ingestion.source.redshift.redshift_schema import (
|
|
28
|
+
RedshiftDataDictionary,
|
|
28
29
|
RedshiftTable,
|
|
29
30
|
RedshiftView,
|
|
30
31
|
)
|
|
@@ -263,8 +264,7 @@ class RedshiftUsageExtractor:
|
|
|
263
264
|
connection: redshift_connector.Connection,
|
|
264
265
|
all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]],
|
|
265
266
|
) -> Iterable[RedshiftAccessEvent]:
|
|
266
|
-
cursor =
|
|
267
|
-
cursor.execute(query)
|
|
267
|
+
cursor = RedshiftDataDictionary.get_query_result(conn=connection, query=query)
|
|
268
268
|
results = cursor.fetchmany()
|
|
269
269
|
field_names = [i[0] for i in cursor.description]
|
|
270
270
|
while results:
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
# This import verifies that the dependencies are available.
|
|
2
|
-
|
|
3
|
-
from typing import List
|
|
2
|
+
import logging
|
|
3
|
+
from typing import TYPE_CHECKING, Any, List, Optional
|
|
4
4
|
|
|
5
5
|
import pymysql # noqa: F401
|
|
6
6
|
from pydantic.fields import Field
|
|
7
|
-
from sqlalchemy import util
|
|
7
|
+
from sqlalchemy import create_engine, event, inspect, util
|
|
8
8
|
from sqlalchemy.dialects.mysql import BIT, base
|
|
9
9
|
from sqlalchemy.dialects.mysql.enumerated import SET
|
|
10
10
|
from sqlalchemy.engine.reflection import Inspector
|
|
11
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from sqlalchemy.engine import Engine
|
|
14
|
+
|
|
12
15
|
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
13
16
|
from datahub.ingestion.api.decorators import (
|
|
14
17
|
SourceCapability,
|
|
@@ -18,11 +21,16 @@ from datahub.ingestion.api.decorators import (
|
|
|
18
21
|
platform_name,
|
|
19
22
|
support_status,
|
|
20
23
|
)
|
|
24
|
+
from datahub.ingestion.source.aws.aws_common import (
|
|
25
|
+
AwsConnectionConfig,
|
|
26
|
+
RDSIAMTokenManager,
|
|
27
|
+
)
|
|
21
28
|
from datahub.ingestion.source.sql.sql_common import (
|
|
22
29
|
make_sqlalchemy_type,
|
|
23
30
|
register_custom_type,
|
|
24
31
|
)
|
|
25
32
|
from datahub.ingestion.source.sql.sql_config import SQLAlchemyConnectionConfig
|
|
33
|
+
from datahub.ingestion.source.sql.sqlalchemy_uri import parse_host_port
|
|
26
34
|
from datahub.ingestion.source.sql.stored_procedures.base import (
|
|
27
35
|
BaseProcedure,
|
|
28
36
|
)
|
|
@@ -31,6 +39,9 @@ from datahub.ingestion.source.sql.two_tier_sql_source import (
|
|
|
31
39
|
TwoTierSQLAlchemySource,
|
|
32
40
|
)
|
|
33
41
|
from datahub.metadata.schema_classes import BytesTypeClass
|
|
42
|
+
from datahub.utilities.str_enum import StrEnum
|
|
43
|
+
|
|
44
|
+
logger = logging.getLogger(__name__)
|
|
34
45
|
|
|
35
46
|
SET.__repr__ = util.generic_repr # type:ignore
|
|
36
47
|
|
|
@@ -54,11 +65,33 @@ base.ischema_names["polygon"] = POLYGON
|
|
|
54
65
|
base.ischema_names["decimal128"] = DECIMAL128
|
|
55
66
|
|
|
56
67
|
|
|
68
|
+
class MySQLAuthMode(StrEnum):
|
|
69
|
+
"""Authentication mode for MySQL connection."""
|
|
70
|
+
|
|
71
|
+
PASSWORD = "PASSWORD"
|
|
72
|
+
AWS_IAM = "AWS_IAM"
|
|
73
|
+
|
|
74
|
+
|
|
57
75
|
class MySQLConnectionConfig(SQLAlchemyConnectionConfig):
|
|
58
76
|
# defaults
|
|
59
77
|
host_port: str = Field(default="localhost:3306", description="MySQL host URL.")
|
|
60
78
|
scheme: HiddenFromDocs[str] = "mysql+pymysql"
|
|
61
79
|
|
|
80
|
+
# Authentication configuration
|
|
81
|
+
auth_mode: MySQLAuthMode = Field(
|
|
82
|
+
default=MySQLAuthMode.PASSWORD,
|
|
83
|
+
description="Authentication mode to use for the MySQL connection. "
|
|
84
|
+
"Options are 'PASSWORD' (default) for standard username/password authentication, "
|
|
85
|
+
"or 'AWS_IAM' for AWS RDS IAM authentication.",
|
|
86
|
+
)
|
|
87
|
+
aws_config: AwsConnectionConfig = Field(
|
|
88
|
+
default_factory=AwsConnectionConfig,
|
|
89
|
+
description="AWS configuration for RDS IAM authentication (only used when auth_mode is AWS_IAM). "
|
|
90
|
+
"Provides full control over AWS credentials, region, profiles, role assumption, retry logic, and proxy settings. "
|
|
91
|
+
"If not explicitly configured, boto3 will automatically use the default credential chain and region from "
|
|
92
|
+
"environment variables (AWS_DEFAULT_REGION, AWS_REGION), AWS config files (~/.aws/config), or IAM role metadata.",
|
|
93
|
+
)
|
|
94
|
+
|
|
62
95
|
|
|
63
96
|
class MySQLConfig(MySQLConnectionConfig, TwoTierSQLAlchemyConfig):
|
|
64
97
|
def get_identifier(self, *, schema: str, table: str) -> str:
|
|
@@ -91,9 +124,27 @@ class MySQLSource(TwoTierSQLAlchemySource):
|
|
|
91
124
|
Table, row, and column statistics via optional SQL profiling
|
|
92
125
|
"""
|
|
93
126
|
|
|
94
|
-
|
|
127
|
+
config: MySQLConfig
|
|
128
|
+
|
|
129
|
+
def __init__(self, config: MySQLConfig, ctx: Any):
|
|
95
130
|
super().__init__(config, ctx, self.get_platform())
|
|
96
131
|
|
|
132
|
+
self._rds_iam_token_manager: Optional[RDSIAMTokenManager] = None
|
|
133
|
+
if config.auth_mode == MySQLAuthMode.AWS_IAM:
|
|
134
|
+
hostname, port = parse_host_port(config.host_port, default_port=3306)
|
|
135
|
+
if port is None:
|
|
136
|
+
raise ValueError("Port must be specified for RDS IAM authentication")
|
|
137
|
+
|
|
138
|
+
if not config.username:
|
|
139
|
+
raise ValueError("username is required for RDS IAM authentication")
|
|
140
|
+
|
|
141
|
+
self._rds_iam_token_manager = RDSIAMTokenManager(
|
|
142
|
+
endpoint=hostname,
|
|
143
|
+
username=config.username,
|
|
144
|
+
port=port,
|
|
145
|
+
aws_config=config.aws_config,
|
|
146
|
+
)
|
|
147
|
+
|
|
97
148
|
def get_platform(self):
|
|
98
149
|
return "mysql"
|
|
99
150
|
|
|
@@ -102,6 +153,52 @@ class MySQLSource(TwoTierSQLAlchemySource):
|
|
|
102
153
|
config = MySQLConfig.parse_obj(config_dict)
|
|
103
154
|
return cls(config, ctx)
|
|
104
155
|
|
|
156
|
+
def _setup_rds_iam_event_listener(
|
|
157
|
+
self, engine: "Engine", database_name: Optional[str] = None
|
|
158
|
+
) -> None:
|
|
159
|
+
"""Setup SQLAlchemy event listener to inject RDS IAM tokens."""
|
|
160
|
+
if not (
|
|
161
|
+
self.config.auth_mode == MySQLAuthMode.AWS_IAM
|
|
162
|
+
and self._rds_iam_token_manager
|
|
163
|
+
):
|
|
164
|
+
return
|
|
165
|
+
|
|
166
|
+
def do_connect_listener(_dialect, _conn_rec, _cargs, cparams):
|
|
167
|
+
if not self._rds_iam_token_manager:
|
|
168
|
+
raise RuntimeError("RDS IAM Token Manager is not initialized")
|
|
169
|
+
cparams["password"] = self._rds_iam_token_manager.get_token()
|
|
170
|
+
# PyMySQL requires SSL to be enabled for RDS IAM authentication.
|
|
171
|
+
# Preserve any existing SSL configuration, otherwise enable with default settings.
|
|
172
|
+
# The {"ssl": True} dict is a workaround to make PyMySQL recognize that SSL
|
|
173
|
+
# should be enabled, since the library requires a truthy value in the ssl parameter.
|
|
174
|
+
# See https://pymysql.readthedocs.io/en/latest/modules/connections.html#pymysql.connections.Connection
|
|
175
|
+
cparams["ssl"] = cparams.get("ssl") or {"ssl": True}
|
|
176
|
+
|
|
177
|
+
event.listen(engine, "do_connect", do_connect_listener) # type: ignore[misc]
|
|
178
|
+
|
|
179
|
+
def get_inspectors(self):
|
|
180
|
+
url = self.config.get_sql_alchemy_url()
|
|
181
|
+
logger.debug(f"sql_alchemy_url={url}")
|
|
182
|
+
|
|
183
|
+
engine = create_engine(url, **self.config.options)
|
|
184
|
+
self._setup_rds_iam_event_listener(engine)
|
|
185
|
+
|
|
186
|
+
with engine.connect() as conn:
|
|
187
|
+
inspector = inspect(conn)
|
|
188
|
+
if self.config.database and self.config.database != "":
|
|
189
|
+
databases = [self.config.database]
|
|
190
|
+
else:
|
|
191
|
+
databases = inspector.get_schema_names()
|
|
192
|
+
for db in databases:
|
|
193
|
+
if self.config.database_pattern.allowed(db):
|
|
194
|
+
url = self.config.get_sql_alchemy_url(current_db=db)
|
|
195
|
+
db_engine = create_engine(url, **self.config.options)
|
|
196
|
+
self._setup_rds_iam_event_listener(db_engine, database_name=db)
|
|
197
|
+
|
|
198
|
+
with db_engine.connect() as conn:
|
|
199
|
+
inspector = inspect(conn)
|
|
200
|
+
yield inspector
|
|
201
|
+
|
|
105
202
|
def add_profile_metadata(self, inspector: Inspector) -> None:
|
|
106
203
|
if not self.config.is_profiling_enabled():
|
|
107
204
|
return
|