acryl-datahub 0.15.0rc12__py3-none-any.whl → 0.15.0rc13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc12.dist-info → acryl_datahub-0.15.0rc13.dist-info}/METADATA +2311 -2311
- {acryl_datahub-0.15.0rc12.dist-info → acryl_datahub-0.15.0rc13.dist-info}/RECORD +11 -11
- datahub/__init__.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_connection.py +28 -0
- datahub/ingestion/source/sql/sql_common.py +2 -0
- datahub/ingestion/source/sql/sql_report.py +1 -0
- datahub/ingestion/source/unity/source.py +2 -0
- datahub/sql_parsing/sqlglot_lineage.py +7 -1
- {acryl_datahub-0.15.0rc12.dist-info → acryl_datahub-0.15.0rc13.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc12.dist-info → acryl_datahub-0.15.0rc13.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc12.dist-info → acryl_datahub-0.15.0rc13.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=uasr1AqGhDFOT97_MigdNOSRw0ewVjoBizWutajHjN4,575
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -424,7 +424,7 @@ datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQg
|
|
|
424
424
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
425
425
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
426
426
|
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=XkBDHH1BWGAp4FhBZCHB3LrPjBxJ28GcyeQgvv-uBxA,18704
|
|
427
|
-
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=
|
|
427
|
+
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
|
|
428
428
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
429
429
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=w2CPm5XEU-KMUSIpb58aKOaxTDHfM5NvghutCVRicy4,23247
|
|
430
430
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
@@ -452,11 +452,11 @@ datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_
|
|
|
452
452
|
datahub/ingestion/source/sql/oracle.py,sha256=ibBtjaneCFto-Rw3k2OxsbT3YHgux1aCtPtv5oA8St4,24533
|
|
453
453
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
454
454
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
455
|
-
datahub/ingestion/source/sql/sql_common.py,sha256=
|
|
455
|
+
datahub/ingestion/source/sql/sql_common.py,sha256=yvYUz7cAkzynxLiZht38TOca0hWPXotalQhZPYd54yc,51114
|
|
456
456
|
datahub/ingestion/source/sql/sql_config.py,sha256=M-l_uXau0ODolLZHBzAXhy-Rq5yYxvJ6cLbCIea7Mww,9449
|
|
457
457
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
458
458
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=6QbhkQH_F13GV1HsavVTq3BE9F7Pr_vfGOjCX2o2c60,11675
|
|
459
|
-
datahub/ingestion/source/sql/sql_report.py,sha256=
|
|
459
|
+
datahub/ingestion/source/sql/sql_report.py,sha256=Knh6KhmuO2jIgBI5ChEBoBrWYPJYOl7wNze03gCuI2Q,2539
|
|
460
460
|
datahub/ingestion/source/sql/sql_types.py,sha256=lrJpavRTE7aDVAKOrKZcrp4CsKydiiaza1wt2ieqWzs,15041
|
|
461
461
|
datahub/ingestion/source/sql/sql_utils.py,sha256=w9YFNm_qJNjOcWAWBI_lUoFMbd0wT8q0LoT7Ia71tIE,8100
|
|
462
462
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
@@ -498,7 +498,7 @@ datahub/ingestion/source/unity/proxy.py,sha256=2-pYQ-3B9UVUwO1yB9iTdi3DqgqZ2JrpQ
|
|
|
498
498
|
datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
|
|
499
499
|
datahub/ingestion/source/unity/proxy_types.py,sha256=qrvHiwPzl5cPX-KRvcIGGeJVdr0I8XUQmoAI6ErZ-v8,9371
|
|
500
500
|
datahub/ingestion/source/unity/report.py,sha256=0Y-ciHVTI6ZKNCJ5zWoQh3Ze1c_GMqmTMKFwzXDuuOg,2788
|
|
501
|
-
datahub/ingestion/source/unity/source.py,sha256=
|
|
501
|
+
datahub/ingestion/source/unity/source.py,sha256=h3oT68-PH3Ne3v44uw5s20JbmrV7nChcNcf1CUmJYVc,41607
|
|
502
502
|
datahub/ingestion/source/unity/usage.py,sha256=r91-ishhv9QTNLevVhQ9HPZ47CRvVeeAMBtWuRsONxk,11089
|
|
503
503
|
datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
504
504
|
datahub/ingestion/source/usage/clickhouse_usage.py,sha256=8nQqNAPKqivywjzsvqH0-HWFwjd4gECpw_xahLXk5ek,9970
|
|
@@ -870,7 +870,7 @@ datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgn
|
|
|
870
870
|
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=gLelf5l73EufB8qijb9ZDLANkt4o05schGg4DY-bOJs,69937
|
|
871
871
|
datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
|
|
872
872
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
873
|
-
datahub/sql_parsing/sqlglot_lineage.py,sha256=
|
|
873
|
+
datahub/sql_parsing/sqlglot_lineage.py,sha256=zvIkfBIT5LmdwT0KxSqpbPjJtiSoOpkZAT3mPaFZ2ko,46208
|
|
874
874
|
datahub/sql_parsing/sqlglot_utils.py,sha256=8MYzkyekhup3ihVStRPuwneWPNu17xhBg5SG8iVfFRY,14431
|
|
875
875
|
datahub/sql_parsing/tool_meta_extractor.py,sha256=pE-pkRKBfNTXEJkaQM9NlG807mc-X6OtetgskJySCs8,2908
|
|
876
876
|
datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -974,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
974
974
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
975
975
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
976
976
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
979
|
-
acryl_datahub-0.15.
|
|
980
|
-
acryl_datahub-0.15.
|
|
981
|
-
acryl_datahub-0.15.
|
|
977
|
+
acryl_datahub-0.15.0rc13.dist-info/METADATA,sha256=ZDtXy_rVYg4PJ-roJ3o-vXLsdQl6snJmceHd8l03Qm4,174408
|
|
978
|
+
acryl_datahub-0.15.0rc13.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
979
|
+
acryl_datahub-0.15.0rc13.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
|
|
980
|
+
acryl_datahub-0.15.0rc13.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
981
|
+
acryl_datahub-0.15.0rc13.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -43,6 +43,7 @@ _VALID_AUTH_TYPES: Dict[str, str] = {
|
|
|
43
43
|
"EXTERNAL_BROWSER_AUTHENTICATOR": EXTERNAL_BROWSER_AUTHENTICATOR,
|
|
44
44
|
"KEY_PAIR_AUTHENTICATOR": KEY_PAIR_AUTHENTICATOR,
|
|
45
45
|
"OAUTH_AUTHENTICATOR": OAUTH_AUTHENTICATOR,
|
|
46
|
+
"OAUTH_AUTHENTICATOR_TOKEN": OAUTH_AUTHENTICATOR,
|
|
46
47
|
}
|
|
47
48
|
|
|
48
49
|
_SNOWFLAKE_HOST_SUFFIX = ".snowflakecomputing.com"
|
|
@@ -104,6 +105,10 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
104
105
|
description="Connect args to pass to Snowflake SqlAlchemy driver",
|
|
105
106
|
exclude=True,
|
|
106
107
|
)
|
|
108
|
+
token: Optional[str] = pydantic.Field(
|
|
109
|
+
default=None,
|
|
110
|
+
description="OAuth token from external identity provider. Not recommended for most use cases because it will not be able to refresh once expired.",
|
|
111
|
+
)
|
|
107
112
|
|
|
108
113
|
def get_account(self) -> str:
|
|
109
114
|
assert self.account_id
|
|
@@ -148,6 +153,18 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
148
153
|
logger.info(f"using authenticator type '{v}'")
|
|
149
154
|
return v
|
|
150
155
|
|
|
156
|
+
@pydantic.validator("token", always=True)
|
|
157
|
+
def validate_token_oauth_config(cls, v, values):
|
|
158
|
+
auth_type = values.get("authentication_type")
|
|
159
|
+
if auth_type == "OAUTH_AUTHENTICATOR_TOKEN":
|
|
160
|
+
if not v:
|
|
161
|
+
raise ValueError("Token required for OAUTH_AUTHENTICATOR_TOKEN.")
|
|
162
|
+
elif v is not None:
|
|
163
|
+
raise ValueError(
|
|
164
|
+
"Token can only be provided when using OAUTH_AUTHENTICATOR_TOKEN"
|
|
165
|
+
)
|
|
166
|
+
return v
|
|
167
|
+
|
|
151
168
|
@staticmethod
|
|
152
169
|
def _check_oauth_config(oauth_config: Optional[OAuthConfiguration]) -> None:
|
|
153
170
|
if oauth_config is None:
|
|
@@ -333,6 +350,17 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
333
350
|
application=_APPLICATION_NAME,
|
|
334
351
|
**connect_args,
|
|
335
352
|
)
|
|
353
|
+
elif self.authentication_type == "OAUTH_AUTHENTICATOR_TOKEN":
|
|
354
|
+
return snowflake.connector.connect(
|
|
355
|
+
user=self.username,
|
|
356
|
+
account=self.account_id,
|
|
357
|
+
authenticator="oauth",
|
|
358
|
+
token=self.token, # Token generated externally and provided directly to the recipe
|
|
359
|
+
warehouse=self.warehouse,
|
|
360
|
+
role=self.role,
|
|
361
|
+
application=_APPLICATION_NAME,
|
|
362
|
+
**connect_args,
|
|
363
|
+
)
|
|
336
364
|
elif self.authentication_type == "OAUTH_AUTHENTICATOR":
|
|
337
365
|
return self.get_oauth_connection()
|
|
338
366
|
elif self.authentication_type == "KEY_PAIR_AUTHENTICATOR":
|
|
@@ -1197,6 +1197,8 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1197
1197
|
)
|
|
1198
1198
|
else:
|
|
1199
1199
|
self.report.num_view_definitions_parsed += 1
|
|
1200
|
+
if raw_lineage.out_tables != [view_urn]:
|
|
1201
|
+
self.report.num_view_definitions_view_urn_mismatch += 1
|
|
1200
1202
|
return view_definition_lineage_helper(raw_lineage, view_urn)
|
|
1201
1203
|
|
|
1202
1204
|
def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]:
|
|
@@ -48,6 +48,7 @@ class SQLSourceReport(
|
|
|
48
48
|
query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None
|
|
49
49
|
|
|
50
50
|
num_view_definitions_parsed: int = 0
|
|
51
|
+
num_view_definitions_view_urn_mismatch: int = 0
|
|
51
52
|
num_view_definitions_failed_parsing: int = 0
|
|
52
53
|
num_view_definitions_failed_column_parsing: int = 0
|
|
53
54
|
view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList)
|
|
@@ -974,6 +974,8 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
974
974
|
)
|
|
975
975
|
else:
|
|
976
976
|
self.report.num_view_definitions_parsed += 1
|
|
977
|
+
if raw_lineage.out_tables != [view_urn]:
|
|
978
|
+
self.report.num_view_definitions_view_urn_mismatch += 1
|
|
977
979
|
return view_definition_lineage_helper(raw_lineage, view_urn)
|
|
978
980
|
|
|
979
981
|
def get_view_lineage(self) -> Iterable[MetadataWorkUnit]:
|
|
@@ -1243,13 +1243,19 @@ def infer_output_schema(result: SqlParsingResult) -> Optional[List[SchemaFieldCl
|
|
|
1243
1243
|
def view_definition_lineage_helper(
|
|
1244
1244
|
result: SqlParsingResult, view_urn: str
|
|
1245
1245
|
) -> SqlParsingResult:
|
|
1246
|
-
if result.query_type is QueryType.SELECT
|
|
1246
|
+
if result.query_type is QueryType.SELECT or (
|
|
1247
|
+
result.out_tables and result.out_tables != [view_urn]
|
|
1248
|
+
):
|
|
1247
1249
|
# Some platforms (e.g. postgres) store only <select statement> from view definition
|
|
1248
1250
|
# `create view V as <select statement>` . For such view definitions, `result.out_tables` and
|
|
1249
1251
|
# `result.column_lineage[].downstream` are empty in `sqlglot_lineage` response, whereas upstream
|
|
1250
1252
|
# details and downstream column details are extracted correctly.
|
|
1251
1253
|
# Here, we inject view V's urn in `result.out_tables` and `result.column_lineage[].downstream`
|
|
1252
1254
|
# to get complete lineage result.
|
|
1255
|
+
|
|
1256
|
+
# Some platforms(e.g. mssql) may have slightly different view name in view definition than
|
|
1257
|
+
# actual view name used elsewhere. Therefore we overwrite downstream table for such cases as well.
|
|
1258
|
+
|
|
1253
1259
|
result.out_tables = [view_urn]
|
|
1254
1260
|
if result.column_lineage:
|
|
1255
1261
|
for col_result in result.column_lineage:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|