acryl-datahub 0.15.0.5rc3__py3-none-any.whl → 0.15.0.5rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.5rc3.dist-info → acryl_datahub-0.15.0.5rc4.dist-info}/METADATA +2302 -2302
- {acryl_datahub-0.15.0.5rc3.dist-info → acryl_datahub-0.15.0.5rc4.dist-info}/RECORD +10 -10
- datahub/_version.py +1 -1
- datahub/ingestion/source/aws/glue.py +2 -0
- datahub/ingestion/source/looker/lookml_config.py +4 -1
- datahub/ingestion/source/looker/lookml_source.py +56 -0
- {acryl_datahub-0.15.0.5rc3.dist-info → acryl_datahub-0.15.0.5rc4.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.5rc3.dist-info → acryl_datahub-0.15.0.5rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.5rc3.dist-info → acryl_datahub-0.15.0.5rc4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.5rc3.dist-info → acryl_datahub-0.15.0.5rc4.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=WfLwqZVZ8jnBAUkc1mxyo_REl_AjCDcwm9R97SuldaY,324
|
|
4
4
|
datahub/entrypoints.py,sha256=osv2ailvuW-HHlAE0fOtyblJI1X7HInZutd9DC66jqQ,8022
|
|
5
5
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -219,7 +219,7 @@ datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm
|
|
|
219
219
|
datahub/ingestion/source/abs/source.py,sha256=cuMezUzr-Smp5tok2ceYor5I5jp52NDMjfeN8kfIbvg,24816
|
|
220
220
|
datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
221
221
|
datahub/ingestion/source/aws/aws_common.py,sha256=DfdQgkJ_s2isFx8WvqKTlAcBk4KE8SgfpmA5BgC3fgY,17716
|
|
222
|
-
datahub/ingestion/source/aws/glue.py,sha256=
|
|
222
|
+
datahub/ingestion/source/aws/glue.py,sha256=DwROr923M01QnvImUbMoHS6TTTT9kBz2tEmQ3Sv4EoY,58019
|
|
223
223
|
datahub/ingestion/source/aws/s3_boto_utils.py,sha256=Y54jlLV5gLcuZ4Zs57kIW5dYHD89RSFfsVNlFbRnSkQ,3901
|
|
224
224
|
datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnreCaMhCzNlzs,2149
|
|
225
225
|
datahub/ingestion/source/aws/sagemaker.py,sha256=Bl2tkBYnrindgx61VHYgNovUF_Kp_fXNcivQn28vC2w,5254
|
|
@@ -346,9 +346,9 @@ datahub/ingestion/source/looker/looker_template_language.py,sha256=mfbU27NYs0mkZ
|
|
|
346
346
|
datahub/ingestion/source/looker/looker_usage.py,sha256=qFBX7OHtIcarYIqFe0jQMrDV8MMPV_nN4PZrZRUznTw,23029
|
|
347
347
|
datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z_IBzDVZvezmUIkaBCZY1bdk6mE,4392
|
|
348
348
|
datahub/ingestion/source/looker/lookml_concept_context.py,sha256=eDaze9S7cgO5eFP7-0azUMEJyR3EfMjmfj5pMPjpm8c,18066
|
|
349
|
-
datahub/ingestion/source/looker/lookml_config.py,sha256=
|
|
349
|
+
datahub/ingestion/source/looker/lookml_config.py,sha256=Ub5Efgzb1bDId5nNcUhcZKEm2hp273wF5edip283U2g,10775
|
|
350
350
|
datahub/ingestion/source/looker/lookml_refinement.py,sha256=MkVreI0BylaCFyDHihDHaCcXyDSP84eF9p1h5d-ZHnM,9504
|
|
351
|
-
datahub/ingestion/source/looker/lookml_source.py,sha256=
|
|
351
|
+
datahub/ingestion/source/looker/lookml_source.py,sha256=qkLVzs5AHSA5B518s7fCD99KKHh6_8-QR8KM0mySnFg,42757
|
|
352
352
|
datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz7X3GrO951BkwSbF2afo,766
|
|
353
353
|
datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
|
|
354
354
|
datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
|
|
@@ -993,9 +993,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
993
993
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
994
994
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
995
995
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
996
|
-
acryl_datahub-0.15.0.
|
|
997
|
-
acryl_datahub-0.15.0.
|
|
998
|
-
acryl_datahub-0.15.0.
|
|
999
|
-
acryl_datahub-0.15.0.
|
|
1000
|
-
acryl_datahub-0.15.0.
|
|
1001
|
-
acryl_datahub-0.15.0.
|
|
996
|
+
acryl_datahub-0.15.0.5rc4.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
997
|
+
acryl_datahub-0.15.0.5rc4.dist-info/METADATA,sha256=5_fFeSDo0RY3z5NeouX22pyoKVy8iM7I7a8KyYQz4Xg,173382
|
|
998
|
+
acryl_datahub-0.15.0.5rc4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
999
|
+
acryl_datahub-0.15.0.5rc4.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
|
|
1000
|
+
acryl_datahub-0.15.0.5rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1001
|
+
acryl_datahub-0.15.0.5rc4.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -218,6 +218,7 @@ class GlueSourceConfig(
|
|
|
218
218
|
|
|
219
219
|
@dataclass
|
|
220
220
|
class GlueSourceReport(StaleEntityRemovalSourceReport):
|
|
221
|
+
catalog_id: Optional[str] = None
|
|
221
222
|
tables_scanned = 0
|
|
222
223
|
filtered: List[str] = dataclass_field(default_factory=list)
|
|
223
224
|
databases: EntityFilterReport = EntityFilterReport.field(type="database")
|
|
@@ -315,6 +316,7 @@ class GlueSource(StatefulIngestionSourceBase):
|
|
|
315
316
|
self.extract_owners = config.extract_owners
|
|
316
317
|
self.source_config = config
|
|
317
318
|
self.report = GlueSourceReport()
|
|
319
|
+
self.report.catalog_id = self.source_config.catalog_id
|
|
318
320
|
self.glue_client = config.glue_client
|
|
319
321
|
self.s3_client = config.s3_client
|
|
320
322
|
self.extract_transforms = config.extract_transforms
|
|
@@ -139,7 +139,10 @@ class LookMLSourceConfig(
|
|
|
139
139
|
)
|
|
140
140
|
emit_reachable_views_only: bool = Field(
|
|
141
141
|
True,
|
|
142
|
-
description=
|
|
142
|
+
description=(
|
|
143
|
+
"When enabled, only views that are reachable from explores defined in the model files are emitted. "
|
|
144
|
+
"If set to False, all views imported in model files are emitted. Views that are unreachable i.e. not explicitly defined in the model files are currently not emitted however reported as warning for debugging purposes."
|
|
145
|
+
),
|
|
143
146
|
)
|
|
144
147
|
populate_sql_logic_for_missing_descriptions: bool = Field(
|
|
145
148
|
False,
|
|
@@ -59,6 +59,7 @@ from datahub.ingestion.source.looker.lookml_concept_context import (
|
|
|
59
59
|
from datahub.ingestion.source.looker.lookml_config import (
|
|
60
60
|
BASE_PROJECT_NAME,
|
|
61
61
|
MODEL_FILE_EXTENSION,
|
|
62
|
+
VIEW_FILE_EXTENSION,
|
|
62
63
|
LookerConnectionDefinition,
|
|
63
64
|
LookMLSourceConfig,
|
|
64
65
|
LookMLSourceReport,
|
|
@@ -884,6 +885,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
884
885
|
view_urn = maybe_looker_view.id.get_urn(
|
|
885
886
|
self.source_config
|
|
886
887
|
)
|
|
888
|
+
|
|
887
889
|
view_connection_mapping = view_connection_map.get(
|
|
888
890
|
view_urn
|
|
889
891
|
)
|
|
@@ -939,6 +941,9 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
939
941
|
str(maybe_looker_view.id)
|
|
940
942
|
)
|
|
941
943
|
|
|
944
|
+
if not self.source_config.emit_reachable_views_only:
|
|
945
|
+
self.report_skipped_unreachable_views(viewfile_loader, processed_view_map)
|
|
946
|
+
|
|
942
947
|
if (
|
|
943
948
|
self.source_config.tag_measures_and_dimensions
|
|
944
949
|
and self.reporter.events_produced != 0
|
|
@@ -966,5 +971,56 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
966
971
|
),
|
|
967
972
|
).as_workunit()
|
|
968
973
|
|
|
974
|
+
def report_skipped_unreachable_views(
|
|
975
|
+
self,
|
|
976
|
+
viewfile_loader: LookerViewFileLoader,
|
|
977
|
+
processed_view_map: Dict[str, Set[str]] = {},
|
|
978
|
+
) -> None:
|
|
979
|
+
view_files: Dict[str, List[pathlib.Path]] = {}
|
|
980
|
+
for project, folder_path in self.base_projects_folder.items():
|
|
981
|
+
folder = pathlib.Path(folder_path)
|
|
982
|
+
view_files[project] = list(folder.glob(f"**/*{VIEW_FILE_EXTENSION}"))
|
|
983
|
+
|
|
984
|
+
skipped_view_paths: Dict[str, List[str]] = {}
|
|
985
|
+
for project, views in view_files.items():
|
|
986
|
+
skipped_paths: Set[str] = set()
|
|
987
|
+
|
|
988
|
+
for view_path in views:
|
|
989
|
+
# Check if the view is already in processed_view_map
|
|
990
|
+
if not any(
|
|
991
|
+
str(view_path) in view_set
|
|
992
|
+
for view_set in processed_view_map.values()
|
|
993
|
+
):
|
|
994
|
+
looker_viewfile = viewfile_loader.load_viewfile(
|
|
995
|
+
path=str(view_path),
|
|
996
|
+
project_name=project,
|
|
997
|
+
connection=None,
|
|
998
|
+
reporter=self.reporter,
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
if looker_viewfile is not None:
|
|
1002
|
+
for raw_view in looker_viewfile.views:
|
|
1003
|
+
raw_view_name = raw_view.get("name", "")
|
|
1004
|
+
|
|
1005
|
+
if (
|
|
1006
|
+
raw_view_name
|
|
1007
|
+
and self.source_config.view_pattern.allowed(
|
|
1008
|
+
raw_view_name
|
|
1009
|
+
)
|
|
1010
|
+
):
|
|
1011
|
+
skipped_paths.add(str(view_path))
|
|
1012
|
+
|
|
1013
|
+
skipped_view_paths[project] = list(skipped_paths)
|
|
1014
|
+
|
|
1015
|
+
for project, view_paths in skipped_view_paths.items():
|
|
1016
|
+
for path in view_paths:
|
|
1017
|
+
self.reporter.report_warning(
|
|
1018
|
+
title="Skipped View File",
|
|
1019
|
+
message=(
|
|
1020
|
+
"The Looker view file was skipped because it may not be referenced by any models."
|
|
1021
|
+
),
|
|
1022
|
+
context=(f"Project: {project}, View File Path: {path}"),
|
|
1023
|
+
)
|
|
1024
|
+
|
|
969
1025
|
def get_report(self):
|
|
970
1026
|
return self.reporter
|
|
File without changes
|
|
File without changes
|
{acryl_datahub-0.15.0.5rc3.dist-info → acryl_datahub-0.15.0.5rc4.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|