acryl-datahub 0.15.0rc8__py3-none-any.whl → 0.15.0rc10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc8.dist-info → acryl_datahub-0.15.0rc10.dist-info}/METADATA +2469 -2469
- {acryl_datahub-0.15.0rc8.dist-info → acryl_datahub-0.15.0rc10.dist-info}/RECORD +9 -9
- datahub/__init__.py +1 -1
- datahub/ingestion/source/gc/dataprocess_cleanup.py +2 -2
- datahub/ingestion/source/looker/looker_liquid_tag.py +8 -1
- datahub/ingestion/source/tableau/tableau.py +53 -27
- {acryl_datahub-0.15.0rc8.dist-info → acryl_datahub-0.15.0rc10.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc8.dist-info → acryl_datahub-0.15.0rc10.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc8.dist-info → acryl_datahub-0.15.0rc10.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=vEQvZGPQ7hfoL7fm6XTBo6OY37wlMIITNNW1rpZuQCk,575
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -301,7 +301,7 @@ datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP
|
|
|
301
301
|
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
|
|
302
302
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
303
303
|
datahub/ingestion/source/gc/datahub_gc.py,sha256=f6Erj3KfD0Hx3ydwL5MUVCZgFzS9c6U2Pkr54JLIUOA,12394
|
|
304
|
-
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=
|
|
304
|
+
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=rGIwUKPlNu2XR0YT8DuJGg2pPGIr3MM-YDa5Slo2vNY,14470
|
|
305
305
|
datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=cHJmxz4NmA7VjTX2iGEo3wZ_SDrjC_rCQcnRxKgfUVI,8713
|
|
306
306
|
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=_tms5AqNAJRDRzQmyN_VydzXbdME2lkvTwa5u1La5z8,7353
|
|
307
307
|
datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -331,7 +331,7 @@ datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKf
|
|
|
331
331
|
datahub/ingestion/source/looker/looker_dataclasses.py,sha256=ULWLFWsV2cKmTuOFavD8QjEBmnXmvjyr8RbUB62DwJQ,12178
|
|
332
332
|
datahub/ingestion/source/looker/looker_file_loader.py,sha256=c1ewDrIb9VJg1o-asbwX9gL83kgL01vIETzzbmZIhmw,4267
|
|
333
333
|
datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=QTTCW-rPNUoazQG_sTJbCARXJzQ7NKS-XKURp2AAWls,11106
|
|
334
|
-
datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=
|
|
334
|
+
datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=mO4G4MNA4YZFvZaDBpdiJ2vP3irC82kY34RdaK4Pbfs,3100
|
|
335
335
|
datahub/ingestion/source/looker/looker_query_model.py,sha256=N0jBbFruiCIIGT6sJn6tNeppeQ78KGTkOwTLirhxFNc,2144
|
|
336
336
|
datahub/ingestion/source/looker/looker_source.py,sha256=AByQxWVfOBqOtZPaR_cw9SB-tFZtfppiKRkFSbcK1GA,65346
|
|
337
337
|
datahub/ingestion/source/looker/looker_template_language.py,sha256=EG4ZfVZ0x53lgaYh2ohzL4ZCy9KsX0TA51XqCmsCd2Q,14328
|
|
@@ -485,7 +485,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
|
|
|
485
485
|
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
|
|
486
486
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
487
487
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
488
|
-
datahub/ingestion/source/tableau/tableau.py,sha256=
|
|
488
|
+
datahub/ingestion/source/tableau/tableau.py,sha256=AFlDng8EfvBvZL692hMf_sfzGwpHpUU6FW_ElR4uitQ,131551
|
|
489
489
|
datahub/ingestion/source/tableau/tableau_common.py,sha256=Dy_2pvkPucZJsG_LvQZLlxNEkjh-yOXHlZ4jurq9opM,26069
|
|
490
490
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=nWElhtDo5kj5mWivZFmtVF_4Ugw0-EatBYWyDVzu5hE,2501
|
|
491
491
|
datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -974,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
974
974
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
975
975
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
976
976
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
979
|
-
acryl_datahub-0.15.
|
|
980
|
-
acryl_datahub-0.15.
|
|
981
|
-
acryl_datahub-0.15.
|
|
977
|
+
acryl_datahub-0.15.0rc10.dist-info/METADATA,sha256=oHQM6bvFxv1QboFDXeahyZEbfacayYoFOlOUcPacaeI,172487
|
|
978
|
+
acryl_datahub-0.15.0rc10.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
979
|
+
acryl_datahub-0.15.0rc10.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
|
|
980
|
+
acryl_datahub-0.15.0rc10.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
981
|
+
acryl_datahub-0.15.0rc10.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -114,11 +114,11 @@ class DataProcessCleanupConfig(ConfigModel):
|
|
|
114
114
|
)
|
|
115
115
|
|
|
116
116
|
delete_empty_data_jobs: bool = Field(
|
|
117
|
-
|
|
117
|
+
False, description="Whether to delete Data Jobs without runs"
|
|
118
118
|
)
|
|
119
119
|
|
|
120
120
|
delete_empty_data_flows: bool = Field(
|
|
121
|
-
|
|
121
|
+
False, description="Whether to delete Data Flows without runs"
|
|
122
122
|
)
|
|
123
123
|
|
|
124
124
|
hard_delete_entities: bool = Field(
|
|
@@ -4,6 +4,7 @@ from typing import ClassVar, Optional, TextIO
|
|
|
4
4
|
from liquid import Environment
|
|
5
5
|
from liquid.ast import Node
|
|
6
6
|
from liquid.context import Context
|
|
7
|
+
from liquid.filter import string_filter
|
|
7
8
|
from liquid.parse import expect, get_parser
|
|
8
9
|
from liquid.stream import TokenStream
|
|
9
10
|
from liquid.tag import Tag
|
|
@@ -81,12 +82,18 @@ class ConditionTag(Tag):
|
|
|
81
82
|
custom_tags = [ConditionTag]
|
|
82
83
|
|
|
83
84
|
|
|
85
|
+
@string_filter
|
|
86
|
+
def sql_quote_filter(variable: str) -> str:
|
|
87
|
+
return f"'{variable}'"
|
|
88
|
+
|
|
89
|
+
|
|
84
90
|
@lru_cache(maxsize=1)
|
|
85
91
|
def _create_env() -> Environment:
|
|
86
|
-
env: Environment = Environment()
|
|
92
|
+
env: Environment = Environment(strict_filters=False)
|
|
87
93
|
# register tag. One time activity
|
|
88
94
|
for custom_tag in custom_tags:
|
|
89
95
|
env.add_tag(custom_tag)
|
|
96
|
+
env.add_filter("sql_quote", sql_quote_filter)
|
|
90
97
|
return env
|
|
91
98
|
|
|
92
99
|
|
|
@@ -68,6 +68,7 @@ from datahub.ingestion.api.source import (
|
|
|
68
68
|
CapabilityReport,
|
|
69
69
|
MetadataWorkUnitProcessor,
|
|
70
70
|
Source,
|
|
71
|
+
StructuredLogLevel,
|
|
71
72
|
TestableSource,
|
|
72
73
|
TestConnectionReport,
|
|
73
74
|
)
|
|
@@ -289,16 +290,12 @@ class TableauConnectionConfig(ConfigModel):
|
|
|
289
290
|
server.auth.sign_in(authentication)
|
|
290
291
|
return server
|
|
291
292
|
except ServerResponseError as e:
|
|
293
|
+
message = f"Unable to login (invalid/expired credentials or missing permissions): {str(e)}"
|
|
292
294
|
if isinstance(authentication, PersonalAccessTokenAuth):
|
|
293
295
|
# Docs on token expiry in Tableau:
|
|
294
296
|
# https://help.tableau.com/current/server/en-us/security_personal_access_tokens.htm#token-expiry
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
"expire if not used for 15 days or if over 1 year old"
|
|
298
|
-
)
|
|
299
|
-
raise ValueError(
|
|
300
|
-
f"Unable to login (invalid/expired credentials or missing permissions): {str(e)}"
|
|
301
|
-
) from e
|
|
297
|
+
message = f"Error authenticating with Tableau. Note that Tableau personal access tokens expire if not used for 15 days or if over 1 year old: {str(e)}"
|
|
298
|
+
raise ValueError(message) from e
|
|
302
299
|
except Exception as e:
|
|
303
300
|
raise ValueError(
|
|
304
301
|
f"Unable to login (check your Tableau connection and credentials): {str(e)}"
|
|
@@ -700,6 +697,7 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
700
697
|
config=self.config,
|
|
701
698
|
ctx=self.ctx,
|
|
702
699
|
site=site,
|
|
700
|
+
site_id=site.id,
|
|
703
701
|
report=self.report,
|
|
704
702
|
server=self.server,
|
|
705
703
|
platform=self.platform,
|
|
@@ -707,11 +705,19 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
707
705
|
logger.info(f"Ingesting assets of site '{site.content_url}'.")
|
|
708
706
|
yield from site_source.ingest_tableau_site()
|
|
709
707
|
else:
|
|
710
|
-
site =
|
|
708
|
+
site = None
|
|
709
|
+
with self.report.report_exc(
|
|
710
|
+
title="Unable to fetch site details. Site hierarchy may be incomplete and external urls may be missing.",
|
|
711
|
+
message="This usually indicates missing permissions. Ensure that you have all necessary permissions.",
|
|
712
|
+
level=StructuredLogLevel.WARN,
|
|
713
|
+
):
|
|
714
|
+
site = self.server.sites.get_by_id(self.server.site_id)
|
|
715
|
+
|
|
711
716
|
site_source = TableauSiteSource(
|
|
712
717
|
config=self.config,
|
|
713
718
|
ctx=self.ctx,
|
|
714
719
|
site=site,
|
|
720
|
+
site_id=self.server.site_id,
|
|
715
721
|
report=self.report,
|
|
716
722
|
server=self.server,
|
|
717
723
|
platform=self.platform,
|
|
@@ -722,6 +728,7 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
722
728
|
title="Failed to Retrieve Tableau Metadata",
|
|
723
729
|
message="Unable to retrieve metadata from tableau.",
|
|
724
730
|
context=str(md_exception),
|
|
731
|
+
exc=md_exception,
|
|
725
732
|
)
|
|
726
733
|
|
|
727
734
|
def close(self) -> None:
|
|
@@ -743,7 +750,8 @@ class TableauSiteSource:
|
|
|
743
750
|
self,
|
|
744
751
|
config: TableauConfig,
|
|
745
752
|
ctx: PipelineContext,
|
|
746
|
-
site: SiteItem,
|
|
753
|
+
site: Optional[SiteItem],
|
|
754
|
+
site_id: Optional[str],
|
|
747
755
|
report: TableauSourceReport,
|
|
748
756
|
server: Server,
|
|
749
757
|
platform: str,
|
|
@@ -752,9 +760,16 @@ class TableauSiteSource:
|
|
|
752
760
|
self.report = report
|
|
753
761
|
self.server: Server = server
|
|
754
762
|
self.ctx: PipelineContext = ctx
|
|
755
|
-
self.site: SiteItem = site
|
|
756
763
|
self.platform = platform
|
|
757
764
|
|
|
765
|
+
self.site: Optional[SiteItem] = site
|
|
766
|
+
if site_id is not None:
|
|
767
|
+
self.site_id: str = site_id
|
|
768
|
+
else:
|
|
769
|
+
assert self.site is not None, "site or site_id is required"
|
|
770
|
+
assert self.site.id is not None, "site_id is required when site is provided"
|
|
771
|
+
self.site_id = self.site.id
|
|
772
|
+
|
|
758
773
|
self.database_tables: Dict[str, DatabaseTable] = {}
|
|
759
774
|
self.tableau_stat_registry: Dict[str, UsageStat] = {}
|
|
760
775
|
self.tableau_project_registry: Dict[str, TableauProject] = {}
|
|
@@ -808,7 +823,7 @@ class TableauSiteSource:
|
|
|
808
823
|
def _re_authenticate(self):
|
|
809
824
|
tableau_auth: Union[
|
|
810
825
|
TableauAuth, PersonalAccessTokenAuth
|
|
811
|
-
] = self.config.get_tableau_auth(self.
|
|
826
|
+
] = self.config.get_tableau_auth(self.site_id)
|
|
812
827
|
self.server.auth.sign_in(tableau_auth)
|
|
813
828
|
|
|
814
829
|
@property
|
|
@@ -826,6 +841,7 @@ class TableauSiteSource:
|
|
|
826
841
|
if not view.id:
|
|
827
842
|
continue
|
|
828
843
|
self.tableau_stat_registry[view.id] = UsageStat(view_count=view.total_views)
|
|
844
|
+
logger.info(f"Got Tableau stats for {len(self.tableau_stat_registry)} assets")
|
|
829
845
|
logger.debug("Tableau stats %s", self.tableau_stat_registry)
|
|
830
846
|
|
|
831
847
|
def _populate_database_server_hostname_map(self) -> None:
|
|
@@ -876,7 +892,7 @@ class TableauSiteSource:
|
|
|
876
892
|
ancestors = [cur_proj.name]
|
|
877
893
|
while cur_proj.parent_id is not None:
|
|
878
894
|
if cur_proj.parent_id not in all_project_map:
|
|
879
|
-
self.report.
|
|
895
|
+
self.report.warning(
|
|
880
896
|
"project-issue",
|
|
881
897
|
f"Parent project {cur_proj.parent_id} not found. We need Site Administrator Explorer permissions.",
|
|
882
898
|
)
|
|
@@ -974,8 +990,11 @@ class TableauSiteSource:
|
|
|
974
990
|
self.datasource_project_map[ds.id] = ds.project_id
|
|
975
991
|
except Exception as e:
|
|
976
992
|
self.report.get_all_datasources_query_failed = True
|
|
977
|
-
|
|
978
|
-
|
|
993
|
+
self.report.warning(
|
|
994
|
+
title="Unexpected Query Error",
|
|
995
|
+
message="Get all datasources query failed due to error",
|
|
996
|
+
exc=e,
|
|
997
|
+
)
|
|
979
998
|
|
|
980
999
|
def _init_workbook_registry(self) -> None:
|
|
981
1000
|
if self.server is None:
|
|
@@ -1141,7 +1160,6 @@ class TableauSiteSource:
|
|
|
1141
1160
|
)
|
|
1142
1161
|
|
|
1143
1162
|
if node_limit_errors:
|
|
1144
|
-
logger.debug(f"Node Limit Error. query_data {query_data}")
|
|
1145
1163
|
self.report.warning(
|
|
1146
1164
|
title="Tableau Data Exceed Predefined Limit",
|
|
1147
1165
|
message="The numbers of record in result set exceeds a predefined limit. Increase the tableau "
|
|
@@ -1257,9 +1275,10 @@ class TableauSiteSource:
|
|
|
1257
1275
|
wrk_id: Optional[str] = workbook.get(c.ID)
|
|
1258
1276
|
prj_name: Optional[str] = workbook.get(c.PROJECT_NAME)
|
|
1259
1277
|
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1278
|
+
self.report.warning(
|
|
1279
|
+
title="Skipping Missing Workbook",
|
|
1280
|
+
message="Skipping workbook as its project is not present in project registry",
|
|
1281
|
+
context=f"workbook={wrk_name}({wrk_id}), project={prj_name}({project_luid})",
|
|
1263
1282
|
)
|
|
1264
1283
|
continue
|
|
1265
1284
|
|
|
@@ -1453,7 +1472,7 @@ class TableauSiteSource:
|
|
|
1453
1472
|
c.COLUMNS_CONNECTION
|
|
1454
1473
|
].get("totalCount")
|
|
1455
1474
|
if not is_custom_sql and not num_tbl_cols:
|
|
1456
|
-
logger.
|
|
1475
|
+
logger.warning(
|
|
1457
1476
|
f"Skipping upstream table with id {table[c.ID]}, no columns: {table}"
|
|
1458
1477
|
)
|
|
1459
1478
|
continue
|
|
@@ -1469,7 +1488,12 @@ class TableauSiteSource:
|
|
|
1469
1488
|
table, default_schema_map=self.config.default_schema_map
|
|
1470
1489
|
)
|
|
1471
1490
|
except Exception as e:
|
|
1472
|
-
|
|
1491
|
+
self.report.warning(
|
|
1492
|
+
title="Potentially Missing Lineage Issue",
|
|
1493
|
+
message="Failed to generate upstream reference",
|
|
1494
|
+
exc=e,
|
|
1495
|
+
context=f"table={table}",
|
|
1496
|
+
)
|
|
1473
1497
|
continue
|
|
1474
1498
|
|
|
1475
1499
|
table_urn = ref.make_dataset_urn(
|
|
@@ -1917,10 +1941,12 @@ class TableauSiteSource:
|
|
|
1917
1941
|
self.datasource_project_map[ds_result.id] = ds_result.project_id
|
|
1918
1942
|
except Exception as e:
|
|
1919
1943
|
self.report.num_get_datasource_query_failures += 1
|
|
1920
|
-
|
|
1921
|
-
|
|
1944
|
+
self.report.warning(
|
|
1945
|
+
title="Unexpected Query Error",
|
|
1946
|
+
message="Failed to get datasource details",
|
|
1947
|
+
exc=e,
|
|
1948
|
+
context=f"ds_luid={ds_luid}",
|
|
1922
1949
|
)
|
|
1923
|
-
logger.debug("Error stack trace", exc_info=True)
|
|
1924
1950
|
|
|
1925
1951
|
def _get_workbook_project_luid(self, wb: dict) -> Optional[str]:
|
|
1926
1952
|
if wb.get(c.LUID) and self.workbook_project_map.get(wb[c.LUID]):
|
|
@@ -3181,10 +3207,10 @@ class TableauSiteSource:
|
|
|
3181
3207
|
else:
|
|
3182
3208
|
# This is a root Tableau project since the parent_project_id is None.
|
|
3183
3209
|
# For a root project, either the site is the parent, or the platform is the default parent.
|
|
3184
|
-
if self.config.add_site_container
|
|
3210
|
+
if self.config.add_site_container:
|
|
3185
3211
|
# The site containers have already been generated by emit_site_container, so we
|
|
3186
3212
|
# don't need to emit them again here.
|
|
3187
|
-
parent_project_key = self.gen_site_key(self.
|
|
3213
|
+
parent_project_key = self.gen_site_key(self.site_id)
|
|
3188
3214
|
|
|
3189
3215
|
yield from gen_containers(
|
|
3190
3216
|
container_key=project_key,
|
|
@@ -3201,12 +3227,12 @@ class TableauSiteSource:
|
|
|
3201
3227
|
yield from emit_project_in_topological_order(project)
|
|
3202
3228
|
|
|
3203
3229
|
def emit_site_container(self):
|
|
3204
|
-
if not self.site
|
|
3230
|
+
if not self.site:
|
|
3205
3231
|
logger.warning("Can not ingest site container. No site information found.")
|
|
3206
3232
|
return
|
|
3207
3233
|
|
|
3208
3234
|
yield from gen_containers(
|
|
3209
|
-
container_key=self.gen_site_key(self.
|
|
3235
|
+
container_key=self.gen_site_key(self.site_id),
|
|
3210
3236
|
name=self.site.name or "Default",
|
|
3211
3237
|
sub_types=[c.SITE],
|
|
3212
3238
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|