acryl-datahub 0.15.0.1rc9__py3-none-any.whl → 0.15.0.1rc10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc9.dist-info → acryl_datahub-0.15.0.1rc10.dist-info}/METADATA +2389 -2389
- {acryl_datahub-0.15.0.1rc9.dist-info → acryl_datahub-0.15.0.1rc10.dist-info}/RECORD +14 -14
- datahub/__init__.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +10 -3
- datahub/ingestion/source/looker/looker_dataclasses.py +7 -9
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +16 -15
- datahub/ingestion/source/snowflake/snowflake_config.py +10 -9
- datahub/ingestion/source/snowflake/snowflake_queries.py +38 -7
- datahub/ingestion/source/snowflake/snowflake_query.py +5 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +2 -5
- datahub/ingestion/source/snowflake/snowflake_utils.py +22 -18
- {acryl_datahub-0.15.0.1rc9.dist-info → acryl_datahub-0.15.0.1rc10.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc9.dist-info → acryl_datahub-0.15.0.1rc10.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc9.dist-info → acryl_datahub-0.15.0.1rc10.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=_-iwjV9mhNtK3Q_48sB1x7crxfllh3ay-QVv4WQ8458,577
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -273,7 +273,7 @@ datahub/ingestion/source/datahub/datahub_source.py,sha256=2jDnsHEzpGhr00qQI9unSU
|
|
|
273
273
|
datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vBCU0XxGcZR6Xxs,940
|
|
274
274
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
275
275
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
276
|
-
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=
|
|
276
|
+
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=Joh4AIjlu-UVJw_Hu32bPxT9w25RX4JfUnUhVpiJcJw,18005
|
|
277
277
|
datahub/ingestion/source/dbt/dbt_common.py,sha256=0ddiqNx9sUAGZYDQ8tSr5Qh5ti-kgC4saW1yRRNJXgg,80493
|
|
278
278
|
datahub/ingestion/source/dbt/dbt_core.py,sha256=m6cA9vVd4Nh2arc-T2_xeQoxvreRbMhTDIJuYsx3wHc,22722
|
|
279
279
|
datahub/ingestion/source/dbt/dbt_tests.py,sha256=Q5KISW_AOOWqyxmyOgJQquyX7xlfOqKu9WhrHoLKC0M,9881
|
|
@@ -334,7 +334,7 @@ datahub/ingestion/source/looker/looker_common.py,sha256=KObx5ZTfldN2EO11eb1LrHI-
|
|
|
334
334
|
datahub/ingestion/source/looker/looker_config.py,sha256=87WAgdJ_QWdTq25RBwgIqfc2kq7dubSpzbEtXb2ihMw,13182
|
|
335
335
|
datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
|
|
336
336
|
datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKfcGLSJwKdUCTesp7U8dIrQ,402
|
|
337
|
-
datahub/ingestion/source/looker/looker_dataclasses.py,sha256=
|
|
337
|
+
datahub/ingestion/source/looker/looker_dataclasses.py,sha256=LjrP5m_A4HV-XeFlSNGVYNuyF0ulxp_qwB82Ss4Iycs,12200
|
|
338
338
|
datahub/ingestion/source/looker/looker_file_loader.py,sha256=c1ewDrIb9VJg1o-asbwX9gL83kgL01vIETzzbmZIhmw,4267
|
|
339
339
|
datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=0gaYjBv4wkbbLWVgvaAV6JyWAFb0utTG6TCve2d9xss,11511
|
|
340
340
|
datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=mO4G4MNA4YZFvZaDBpdiJ2vP3irC82kY34RdaK4Pbfs,3100
|
|
@@ -370,7 +370,7 @@ datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=h77DunhlgOP0fAg
|
|
|
370
370
|
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
|
|
371
371
|
datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
372
372
|
datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=xqAsnNUCP44Wd1rE1m_phbKtNCMJTFJfOX4_2varadg,8298
|
|
373
|
-
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=
|
|
373
|
+
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=8_IIYzcGQR5jcJ3NKg_tIa7VobUEBXzVpvFBaFPUToM,39598
|
|
374
374
|
datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=3nGU-_KQe1WMIAPdxtuzulqpAreNsqi0vX0XdrddCU8,26184
|
|
375
375
|
datahub/ingestion/source/powerbi/rest_api_wrapper/profiling_utils.py,sha256=bgcPheyqOj6KdRjDyANDK5yggItglcBIjbGFIwAxSds,1392
|
|
376
376
|
datahub/ingestion/source/powerbi/rest_api_wrapper/query.py,sha256=VNw1Uvli6g0pnu9FpigYmnCdEPbVEipz7vdZU_WmHf4,616
|
|
@@ -429,21 +429,21 @@ datahub/ingestion/source/snowflake/constants.py,sha256=22n-0r04nuy-ImxWFFpmbrt_G
|
|
|
429
429
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
430
430
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
431
431
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
432
|
-
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=
|
|
432
|
+
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=jQGSa7ZQs3EsXB9ANShZ4xv9RqrhRfVHRSLeFiDwwxc,17974
|
|
433
433
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
|
|
434
434
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
435
435
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=5Lpy_irZlbOFJbvVkgsZSBjdLCT3VZNjlEvttzSQAU4,21121
|
|
436
436
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
437
|
-
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=
|
|
438
|
-
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=
|
|
437
|
+
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=Lhc5FAx8pXiUyfODGNkQJhjThSCIjPqG2R82dHN-jg0,26889
|
|
438
|
+
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=5po2FWz41UVowykJYbTFGxsltbmlHBCPcHG20VOhdOE,38469
|
|
439
439
|
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=_-rD7Q4MzKY8fYzJHSBnGX4gurwujL3UoRzcP_TZURs,6468
|
|
440
440
|
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=z5ZPgh-TILAz0DeIwDxRCsj980CM2BbftXiFpM1dV_Y,21674
|
|
441
441
|
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=3AxvKfK9WV9x2f2XNuJ-Cmy4szmXKm1Ky0haRVvyC6w,42340
|
|
442
442
|
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
|
|
443
443
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
|
|
444
444
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=fyfWmFVz2WZrpTJWNIe9m0WpDHgeFrGPf8diORJZUwo,6212
|
|
445
|
-
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=
|
|
446
|
-
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=
|
|
445
|
+
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=0rXgz8bvRiI9SYVMa0UGLeg_DcjqBy6kQsdq0Uq0HVk,24685
|
|
446
|
+
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=MoI8-DR9tuMuHMBQcpDo4GFjvcoQZWLNkdFZsTkgK-M,12786
|
|
447
447
|
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=hIWtzlxuSQ_3w48o4AF2l9CQOcWIe6AmD07I89sH2B0,31860
|
|
448
448
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
449
449
|
datahub/ingestion/source/sql/athena.py,sha256=G3cIY8H_76lIUAzQWW2kLnZOEsfbakmojxbiHb3dYZ8,24059
|
|
@@ -982,8 +982,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
982
982
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
983
983
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
984
984
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
985
|
-
acryl_datahub-0.15.0.
|
|
986
|
-
acryl_datahub-0.15.0.
|
|
987
|
-
acryl_datahub-0.15.0.
|
|
988
|
-
acryl_datahub-0.15.0.
|
|
989
|
-
acryl_datahub-0.15.0.
|
|
985
|
+
acryl_datahub-0.15.0.1rc10.dist-info/METADATA,sha256=GCgEH25gXF0roGuAivBGRw1IyiBv_Xv4wbWj9jGlpIo,173645
|
|
986
|
+
acryl_datahub-0.15.0.1rc10.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
987
|
+
acryl_datahub-0.15.0.1rc10.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
|
|
988
|
+
acryl_datahub-0.15.0.1rc10.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
989
|
+
acryl_datahub-0.15.0.1rc10.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from json import JSONDecodeError
|
|
4
|
-
from typing import Dict, List, Optional, Tuple
|
|
4
|
+
from typing import Dict, List, Literal, Optional, Tuple
|
|
5
5
|
from urllib.parse import urlparse
|
|
6
6
|
|
|
7
7
|
import dateutil.parser
|
|
@@ -62,6 +62,11 @@ class DBTCloudConfig(DBTCommonConfig):
|
|
|
62
62
|
description="The ID of the run to ingest metadata from. If not specified, we'll default to the latest run.",
|
|
63
63
|
)
|
|
64
64
|
|
|
65
|
+
external_url_mode: Literal["explore", "ide"] = Field(
|
|
66
|
+
default="explore",
|
|
67
|
+
description='Where should the "View in dbt" link point to - either the "Explore" UI or the dbt Cloud IDE',
|
|
68
|
+
)
|
|
69
|
+
|
|
65
70
|
@root_validator(pre=True)
|
|
66
71
|
def set_metadata_endpoint(cls, values: dict) -> dict:
|
|
67
72
|
if values.get("access_url") and not values.get("metadata_endpoint"):
|
|
@@ -527,5 +532,7 @@ class DBTCloudSource(DBTSourceBase, TestableSource):
|
|
|
527
532
|
)
|
|
528
533
|
|
|
529
534
|
def get_external_url(self, node: DBTNode) -> Optional[str]:
|
|
530
|
-
|
|
531
|
-
|
|
535
|
+
if self.config.external_url_mode == "explore":
|
|
536
|
+
return f"{self.config.access_url}/explore/{self.config.account_id}/projects/{self.config.project_id}/environments/production/details/{node.dbt_name}"
|
|
537
|
+
else:
|
|
538
|
+
return f"{self.config.access_url}/develop/{self.config.account_id}/projects/{self.config.project_id}"
|
|
@@ -186,16 +186,16 @@ class LookerModel:
|
|
|
186
186
|
f"traversal_path={traversal_path}, included_files = {included_files}, seen_so_far: {seen_so_far}"
|
|
187
187
|
)
|
|
188
188
|
if "*" not in inc and not included_files:
|
|
189
|
-
reporter.
|
|
189
|
+
reporter.warning(
|
|
190
190
|
title="Error Resolving Include",
|
|
191
|
-
message=
|
|
192
|
-
context=f"
|
|
191
|
+
message="Cannot resolve included file",
|
|
192
|
+
context=f"Include: {inc}, path: {path}, traversal_path: {traversal_path}",
|
|
193
193
|
)
|
|
194
194
|
elif not included_files:
|
|
195
|
-
reporter.
|
|
195
|
+
reporter.warning(
|
|
196
196
|
title="Error Resolving Include",
|
|
197
|
-
message=
|
|
198
|
-
context=f"
|
|
197
|
+
message="Did not find anything matching the wildcard include",
|
|
198
|
+
context=f"Include: {inc}, path: {path}, traversal_path: {traversal_path}",
|
|
199
199
|
)
|
|
200
200
|
# only load files that we haven't seen so far
|
|
201
201
|
included_files = [x for x in included_files if x not in seen_so_far]
|
|
@@ -231,9 +231,7 @@ class LookerModel:
|
|
|
231
231
|
source_config,
|
|
232
232
|
reporter,
|
|
233
233
|
seen_so_far,
|
|
234
|
-
traversal_path=traversal_path
|
|
235
|
-
+ "."
|
|
236
|
-
+ pathlib.Path(included_file).stem,
|
|
234
|
+
traversal_path=f"{traversal_path} -> {pathlib.Path(included_file).stem}",
|
|
237
235
|
)
|
|
238
236
|
)
|
|
239
237
|
except Exception as e:
|
|
@@ -84,13 +84,14 @@ class DataResolverBase(ABC):
|
|
|
84
84
|
tenant_id: str,
|
|
85
85
|
metadata_api_timeout: int,
|
|
86
86
|
):
|
|
87
|
-
self.
|
|
88
|
-
self.
|
|
89
|
-
|
|
87
|
+
self._access_token: Optional[str] = None
|
|
88
|
+
self._access_token_expiry_time: Optional[datetime] = None
|
|
89
|
+
|
|
90
|
+
self._tenant_id = tenant_id
|
|
90
91
|
# Test connection by generating access token
|
|
91
92
|
logger.info(f"Trying to connect to {self._get_authority_url()}")
|
|
92
93
|
# Power-Bi Auth (Service Principal Auth)
|
|
93
|
-
self.
|
|
94
|
+
self._msal_client = msal.ConfidentialClientApplication(
|
|
94
95
|
client_id,
|
|
95
96
|
client_credential=client_secret,
|
|
96
97
|
authority=DataResolverBase.AUTHORITY + tenant_id,
|
|
@@ -168,18 +169,18 @@ class DataResolverBase(ABC):
|
|
|
168
169
|
pass
|
|
169
170
|
|
|
170
171
|
def _get_authority_url(self):
|
|
171
|
-
return f"{DataResolverBase.AUTHORITY}{self.
|
|
172
|
+
return f"{DataResolverBase.AUTHORITY}{self._tenant_id}"
|
|
172
173
|
|
|
173
174
|
def get_authorization_header(self):
|
|
174
175
|
return {Constant.Authorization: self.get_access_token()}
|
|
175
176
|
|
|
176
|
-
def get_access_token(self):
|
|
177
|
-
if self.
|
|
178
|
-
return self.
|
|
177
|
+
def get_access_token(self) -> str:
|
|
178
|
+
if self._access_token is not None and not self._is_access_token_expired():
|
|
179
|
+
return self._access_token
|
|
179
180
|
|
|
180
181
|
logger.info("Generating PowerBi access token")
|
|
181
182
|
|
|
182
|
-
auth_response = self.
|
|
183
|
+
auth_response = self._msal_client.acquire_token_for_client(
|
|
183
184
|
scopes=[DataResolverBase.SCOPE]
|
|
184
185
|
)
|
|
185
186
|
|
|
@@ -193,24 +194,24 @@ class DataResolverBase(ABC):
|
|
|
193
194
|
|
|
194
195
|
logger.info("Generated PowerBi access token")
|
|
195
196
|
|
|
196
|
-
self.
|
|
197
|
+
self._access_token = "Bearer {}".format(
|
|
197
198
|
auth_response.get(Constant.ACCESS_TOKEN)
|
|
198
199
|
)
|
|
199
200
|
safety_gap = 300
|
|
200
|
-
self.
|
|
201
|
+
self._access_token_expiry_time = datetime.now() + timedelta(
|
|
201
202
|
seconds=(
|
|
202
203
|
max(auth_response.get(Constant.ACCESS_TOKEN_EXPIRY, 0) - safety_gap, 0)
|
|
203
204
|
)
|
|
204
205
|
)
|
|
205
206
|
|
|
206
|
-
logger.debug(f"{Constant.PBIAccessToken}={self.
|
|
207
|
+
logger.debug(f"{Constant.PBIAccessToken}={self._access_token}")
|
|
207
208
|
|
|
208
|
-
return self.
|
|
209
|
+
return self._access_token
|
|
209
210
|
|
|
210
211
|
def _is_access_token_expired(self) -> bool:
|
|
211
|
-
if not self.
|
|
212
|
+
if not self._access_token_expiry_time:
|
|
212
213
|
return True
|
|
213
|
-
return self.
|
|
214
|
+
return self._access_token_expiry_time < datetime.now()
|
|
214
215
|
|
|
215
216
|
def get_dashboards(self, workspace: Workspace) -> List[Dashboard]:
|
|
216
217
|
"""
|
|
@@ -138,12 +138,20 @@ class SnowflakeIdentifierConfig(
|
|
|
138
138
|
description="Whether to convert dataset urns to lowercase.",
|
|
139
139
|
)
|
|
140
140
|
|
|
141
|
-
|
|
142
|
-
class SnowflakeUsageConfig(BaseUsageConfig):
|
|
143
141
|
email_domain: Optional[str] = pydantic.Field(
|
|
144
142
|
default=None,
|
|
145
143
|
description="Email domain of your organization so users can be displayed on UI appropriately.",
|
|
146
144
|
)
|
|
145
|
+
|
|
146
|
+
email_as_user_identifier: bool = Field(
|
|
147
|
+
default=True,
|
|
148
|
+
description="Format user urns as an email, if the snowflake user's email is set. If `email_domain` is "
|
|
149
|
+
"provided, generates email addresses for snowflake users with unset emails, based on their "
|
|
150
|
+
"username.",
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class SnowflakeUsageConfig(BaseUsageConfig):
|
|
147
155
|
apply_view_usage_to_tables: bool = pydantic.Field(
|
|
148
156
|
default=False,
|
|
149
157
|
description="Whether to apply view's usage to its base tables. If set to True, usage is applied to base tables only.",
|
|
@@ -267,13 +275,6 @@ class SnowflakeV2Config(
|
|
|
267
275
|
" Map of share name -> details of share.",
|
|
268
276
|
)
|
|
269
277
|
|
|
270
|
-
email_as_user_identifier: bool = Field(
|
|
271
|
-
default=True,
|
|
272
|
-
description="Format user urns as an email, if the snowflake user's email is set. If `email_domain` is "
|
|
273
|
-
"provided, generates email addresses for snowflake users with unset emails, based on their "
|
|
274
|
-
"username.",
|
|
275
|
-
)
|
|
276
|
-
|
|
277
278
|
include_assertion_results: bool = Field(
|
|
278
279
|
default=False,
|
|
279
280
|
description="Whether to ingest assertion run results for assertions created using Datahub"
|
|
@@ -66,6 +66,11 @@ from datahub.utilities.perf_timer import PerfTimer
|
|
|
66
66
|
|
|
67
67
|
logger = logging.getLogger(__name__)
|
|
68
68
|
|
|
69
|
+
# Define a type alias
|
|
70
|
+
UserName = str
|
|
71
|
+
UserEmail = str
|
|
72
|
+
UsersMapping = Dict[UserName, UserEmail]
|
|
73
|
+
|
|
69
74
|
|
|
70
75
|
class SnowflakeQueriesExtractorConfig(ConfigModel):
|
|
71
76
|
# TODO: Support stateful ingestion for the time windows.
|
|
@@ -114,11 +119,13 @@ class SnowflakeQueriesSourceConfig(
|
|
|
114
119
|
class SnowflakeQueriesExtractorReport(Report):
|
|
115
120
|
copy_history_fetch_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer)
|
|
116
121
|
query_log_fetch_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer)
|
|
122
|
+
users_fetch_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer)
|
|
117
123
|
|
|
118
124
|
audit_log_load_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer)
|
|
119
125
|
sql_aggregator: Optional[SqlAggregatorReport] = None
|
|
120
126
|
|
|
121
127
|
num_ddl_queries_dropped: int = 0
|
|
128
|
+
num_users: int = 0
|
|
122
129
|
|
|
123
130
|
|
|
124
131
|
@dataclass
|
|
@@ -225,6 +232,9 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
225
232
|
def get_workunits_internal(
|
|
226
233
|
self,
|
|
227
234
|
) -> Iterable[MetadataWorkUnit]:
|
|
235
|
+
with self.report.users_fetch_timer:
|
|
236
|
+
users = self.fetch_users()
|
|
237
|
+
|
|
228
238
|
# TODO: Add some logic to check if the cached audit log is stale or not.
|
|
229
239
|
audit_log_file = self.local_temp_path / "audit_log.sqlite"
|
|
230
240
|
use_cached_audit_log = audit_log_file.exists()
|
|
@@ -248,7 +258,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
248
258
|
queries.append(entry)
|
|
249
259
|
|
|
250
260
|
with self.report.query_log_fetch_timer:
|
|
251
|
-
for entry in self.fetch_query_log():
|
|
261
|
+
for entry in self.fetch_query_log(users):
|
|
252
262
|
queries.append(entry)
|
|
253
263
|
|
|
254
264
|
with self.report.audit_log_load_timer:
|
|
@@ -263,6 +273,25 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
263
273
|
shared_connection.close()
|
|
264
274
|
audit_log_file.unlink(missing_ok=True)
|
|
265
275
|
|
|
276
|
+
def fetch_users(self) -> UsersMapping:
|
|
277
|
+
users: UsersMapping = dict()
|
|
278
|
+
with self.structured_reporter.report_exc("Error fetching users from Snowflake"):
|
|
279
|
+
logger.info("Fetching users from Snowflake")
|
|
280
|
+
query = SnowflakeQuery.get_all_users()
|
|
281
|
+
resp = self.connection.query(query)
|
|
282
|
+
|
|
283
|
+
for row in resp:
|
|
284
|
+
try:
|
|
285
|
+
users[row["NAME"]] = row["EMAIL"]
|
|
286
|
+
self.report.num_users += 1
|
|
287
|
+
except Exception as e:
|
|
288
|
+
self.structured_reporter.warning(
|
|
289
|
+
"Error parsing user row",
|
|
290
|
+
context=f"{row}",
|
|
291
|
+
exc=e,
|
|
292
|
+
)
|
|
293
|
+
return users
|
|
294
|
+
|
|
266
295
|
def fetch_copy_history(self) -> Iterable[KnownLineageMapping]:
|
|
267
296
|
# Derived from _populate_external_lineage_from_copy_history.
|
|
268
297
|
|
|
@@ -298,7 +327,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
298
327
|
yield result
|
|
299
328
|
|
|
300
329
|
def fetch_query_log(
|
|
301
|
-
self,
|
|
330
|
+
self, users: UsersMapping
|
|
302
331
|
) -> Iterable[Union[PreparsedQuery, TableRename, TableSwap]]:
|
|
303
332
|
query_log_query = _build_enriched_query_log_query(
|
|
304
333
|
start_time=self.config.window.start_time,
|
|
@@ -319,7 +348,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
319
348
|
|
|
320
349
|
assert isinstance(row, dict)
|
|
321
350
|
try:
|
|
322
|
-
entry = self._parse_audit_log_row(row)
|
|
351
|
+
entry = self._parse_audit_log_row(row, users)
|
|
323
352
|
except Exception as e:
|
|
324
353
|
self.structured_reporter.warning(
|
|
325
354
|
"Error parsing query log row",
|
|
@@ -331,7 +360,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
331
360
|
yield entry
|
|
332
361
|
|
|
333
362
|
def _parse_audit_log_row(
|
|
334
|
-
self, row: Dict[str, Any]
|
|
363
|
+
self, row: Dict[str, Any], users: UsersMapping
|
|
335
364
|
) -> Optional[Union[TableRename, TableSwap, PreparsedQuery]]:
|
|
336
365
|
json_fields = {
|
|
337
366
|
"DIRECT_OBJECTS_ACCESSED",
|
|
@@ -430,9 +459,11 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
430
459
|
)
|
|
431
460
|
)
|
|
432
461
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
462
|
+
user = CorpUserUrn(
|
|
463
|
+
self.identifiers.get_user_identifier(
|
|
464
|
+
res["user_name"], users.get(res["user_name"])
|
|
465
|
+
)
|
|
466
|
+
)
|
|
436
467
|
|
|
437
468
|
timestamp: datetime = res["query_start_time"]
|
|
438
469
|
timestamp = timestamp.astimezone(timezone.utc)
|
|
@@ -947,4 +947,8 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
947
947
|
AND METRIC_NAME ilike '{pattern}' escape '{escape_pattern}'
|
|
948
948
|
ORDER BY MEASUREMENT_TIME ASC;
|
|
949
949
|
|
|
950
|
-
"""
|
|
950
|
+
"""
|
|
951
|
+
|
|
952
|
+
@staticmethod
|
|
953
|
+
def get_all_users() -> str:
|
|
954
|
+
return """SELECT name as "NAME", email as "EMAIL" FROM SNOWFLAKE.ACCOUNT_USAGE.USERS"""
|
|
@@ -342,10 +342,9 @@ class SnowflakeUsageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
342
342
|
filtered_user_counts.append(
|
|
343
343
|
DatasetUserUsageCounts(
|
|
344
344
|
user=make_user_urn(
|
|
345
|
-
self.get_user_identifier(
|
|
345
|
+
self.identifiers.get_user_identifier(
|
|
346
346
|
user_count["user_name"],
|
|
347
347
|
user_email,
|
|
348
|
-
self.config.email_as_user_identifier,
|
|
349
348
|
)
|
|
350
349
|
),
|
|
351
350
|
count=user_count["total"],
|
|
@@ -453,9 +452,7 @@ class SnowflakeUsageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
453
452
|
reported_time: int = int(time.time() * 1000)
|
|
454
453
|
last_updated_timestamp: int = int(start_time.timestamp() * 1000)
|
|
455
454
|
user_urn = make_user_urn(
|
|
456
|
-
self.get_user_identifier(
|
|
457
|
-
user_name, user_email, self.config.email_as_user_identifier
|
|
458
|
-
)
|
|
455
|
+
self.identifiers.get_user_identifier(user_name, user_email)
|
|
459
456
|
)
|
|
460
457
|
|
|
461
458
|
# NOTE: In earlier `snowflake-usage` connector this was base_objects_accessed, which is incorrect
|
|
@@ -300,6 +300,28 @@ class SnowflakeIdentifierBuilder:
|
|
|
300
300
|
def get_quoted_identifier_for_table(db_name, schema_name, table_name):
|
|
301
301
|
return f'"{db_name}"."{schema_name}"."{table_name}"'
|
|
302
302
|
|
|
303
|
+
# Note - decide how to construct user urns.
|
|
304
|
+
# Historically urns were created using part before @ from user's email.
|
|
305
|
+
# Users without email were skipped from both user entries as well as aggregates.
|
|
306
|
+
# However email is not mandatory field in snowflake user, user_name is always present.
|
|
307
|
+
def get_user_identifier(
|
|
308
|
+
self,
|
|
309
|
+
user_name: str,
|
|
310
|
+
user_email: Optional[str],
|
|
311
|
+
) -> str:
|
|
312
|
+
if user_email:
|
|
313
|
+
return self.snowflake_identifier(
|
|
314
|
+
user_email
|
|
315
|
+
if self.identifier_config.email_as_user_identifier is True
|
|
316
|
+
else user_email.split("@")[0]
|
|
317
|
+
)
|
|
318
|
+
return self.snowflake_identifier(
|
|
319
|
+
f"{user_name}@{self.identifier_config.email_domain}"
|
|
320
|
+
if self.identifier_config.email_as_user_identifier is True
|
|
321
|
+
and self.identifier_config.email_domain is not None
|
|
322
|
+
else user_name
|
|
323
|
+
)
|
|
324
|
+
|
|
303
325
|
|
|
304
326
|
class SnowflakeCommonMixin(SnowflakeStructuredReportMixin):
|
|
305
327
|
platform = "snowflake"
|
|
@@ -315,24 +337,6 @@ class SnowflakeCommonMixin(SnowflakeStructuredReportMixin):
|
|
|
315
337
|
def identifiers(self) -> SnowflakeIdentifierBuilder:
|
|
316
338
|
return SnowflakeIdentifierBuilder(self.config, self.report)
|
|
317
339
|
|
|
318
|
-
# Note - decide how to construct user urns.
|
|
319
|
-
# Historically urns were created using part before @ from user's email.
|
|
320
|
-
# Users without email were skipped from both user entries as well as aggregates.
|
|
321
|
-
# However email is not mandatory field in snowflake user, user_name is always present.
|
|
322
|
-
def get_user_identifier(
|
|
323
|
-
self,
|
|
324
|
-
user_name: str,
|
|
325
|
-
user_email: Optional[str],
|
|
326
|
-
email_as_user_identifier: bool,
|
|
327
|
-
) -> str:
|
|
328
|
-
if user_email:
|
|
329
|
-
return self.identifiers.snowflake_identifier(
|
|
330
|
-
user_email
|
|
331
|
-
if email_as_user_identifier is True
|
|
332
|
-
else user_email.split("@")[0]
|
|
333
|
-
)
|
|
334
|
-
return self.identifiers.snowflake_identifier(user_name)
|
|
335
|
-
|
|
336
340
|
# TODO: Revisit this after stateful ingestion can commit checkpoint
|
|
337
341
|
# for failures that do not affect the checkpoint
|
|
338
342
|
# TODO: Add additional parameters to match the signature of the .warning and .failure methods
|
|
File without changes
|
{acryl_datahub-0.15.0.1rc9.dist-info → acryl_datahub-0.15.0.1rc10.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|