acryl-datahub 0.15.0.5rc3__py3-none-any.whl → 0.15.0.5rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=y-G1yv1bSuxJLKtiCJEKjyAqf2KI3SupqeNUfVB1MUg,324
3
+ datahub/_version.py,sha256=WfLwqZVZ8jnBAUkc1mxyo_REl_AjCDcwm9R97SuldaY,324
4
4
  datahub/entrypoints.py,sha256=osv2ailvuW-HHlAE0fOtyblJI1X7HInZutd9DC66jqQ,8022
5
5
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -219,7 +219,7 @@ datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm
219
219
  datahub/ingestion/source/abs/source.py,sha256=cuMezUzr-Smp5tok2ceYor5I5jp52NDMjfeN8kfIbvg,24816
220
220
  datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
221
221
  datahub/ingestion/source/aws/aws_common.py,sha256=DfdQgkJ_s2isFx8WvqKTlAcBk4KE8SgfpmA5BgC3fgY,17716
222
- datahub/ingestion/source/aws/glue.py,sha256=qwkZMcbBlHIdhhuRj-gHNYMeuMADrvaHcN3gik0n_08,57919
222
+ datahub/ingestion/source/aws/glue.py,sha256=DwROr923M01QnvImUbMoHS6TTTT9kBz2tEmQ3Sv4EoY,58019
223
223
  datahub/ingestion/source/aws/s3_boto_utils.py,sha256=Y54jlLV5gLcuZ4Zs57kIW5dYHD89RSFfsVNlFbRnSkQ,3901
224
224
  datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnreCaMhCzNlzs,2149
225
225
  datahub/ingestion/source/aws/sagemaker.py,sha256=Bl2tkBYnrindgx61VHYgNovUF_Kp_fXNcivQn28vC2w,5254
@@ -346,9 +346,9 @@ datahub/ingestion/source/looker/looker_template_language.py,sha256=mfbU27NYs0mkZ
346
346
  datahub/ingestion/source/looker/looker_usage.py,sha256=qFBX7OHtIcarYIqFe0jQMrDV8MMPV_nN4PZrZRUznTw,23029
347
347
  datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z_IBzDVZvezmUIkaBCZY1bdk6mE,4392
348
348
  datahub/ingestion/source/looker/lookml_concept_context.py,sha256=eDaze9S7cgO5eFP7-0azUMEJyR3EfMjmfj5pMPjpm8c,18066
349
- datahub/ingestion/source/looker/lookml_config.py,sha256=Q0fMsu_Cvm8807R6VB14VJDLqjoLTyGF-WsiUD6xEk8,10519
349
+ datahub/ingestion/source/looker/lookml_config.py,sha256=Ub5Efgzb1bDId5nNcUhcZKEm2hp273wF5edip283U2g,10775
350
350
  datahub/ingestion/source/looker/lookml_refinement.py,sha256=MkVreI0BylaCFyDHihDHaCcXyDSP84eF9p1h5d-ZHnM,9504
351
- datahub/ingestion/source/looker/lookml_source.py,sha256=jp58gSrWXITwxd-C5UfVoLJXpxBe5smFjdJyYza-Aek,40436
351
+ datahub/ingestion/source/looker/lookml_source.py,sha256=qkLVzs5AHSA5B518s7fCD99KKHh6_8-QR8KM0mySnFg,42757
352
352
  datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz7X3GrO951BkwSbF2afo,766
353
353
  datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
354
354
  datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
@@ -993,9 +993,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
993
993
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
994
994
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
995
995
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
996
- acryl_datahub-0.15.0.5rc3.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
997
- acryl_datahub-0.15.0.5rc3.dist-info/METADATA,sha256=iA_FeNAFTGBRzDr-rvVZXYUJtYerY57GdmWkuX4XCIg,173382
998
- acryl_datahub-0.15.0.5rc3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
999
- acryl_datahub-0.15.0.5rc3.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
1000
- acryl_datahub-0.15.0.5rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1001
- acryl_datahub-0.15.0.5rc3.dist-info/RECORD,,
996
+ acryl_datahub-0.15.0.5rc4.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
997
+ acryl_datahub-0.15.0.5rc4.dist-info/METADATA,sha256=5_fFeSDo0RY3z5NeouX22pyoKVy8iM7I7a8KyYQz4Xg,173382
998
+ acryl_datahub-0.15.0.5rc4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
999
+ acryl_datahub-0.15.0.5rc4.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
1000
+ acryl_datahub-0.15.0.5rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1001
+ acryl_datahub-0.15.0.5rc4.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "0.15.0.5rc3"
3
+ __version__ = "0.15.0.5rc4"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -218,6 +218,7 @@ class GlueSourceConfig(
218
218
 
219
219
  @dataclass
220
220
  class GlueSourceReport(StaleEntityRemovalSourceReport):
221
+ catalog_id: Optional[str] = None
221
222
  tables_scanned = 0
222
223
  filtered: List[str] = dataclass_field(default_factory=list)
223
224
  databases: EntityFilterReport = EntityFilterReport.field(type="database")
@@ -315,6 +316,7 @@ class GlueSource(StatefulIngestionSourceBase):
315
316
  self.extract_owners = config.extract_owners
316
317
  self.source_config = config
317
318
  self.report = GlueSourceReport()
319
+ self.report.catalog_id = self.source_config.catalog_id
318
320
  self.glue_client = config.glue_client
319
321
  self.s3_client = config.s3_client
320
322
  self.extract_transforms = config.extract_transforms
@@ -139,7 +139,10 @@ class LookMLSourceConfig(
139
139
  )
140
140
  emit_reachable_views_only: bool = Field(
141
141
  True,
142
- description="When enabled, only views that are reachable from explores defined in the model files are emitted",
142
+ description=(
143
+ "When enabled, only views that are reachable from explores defined in the model files are emitted. "
144
+ "If set to False, all views imported in model files are emitted. Views that are unreachable i.e. not explicitly defined in the model files are currently not emitted however reported as warning for debugging purposes."
145
+ ),
143
146
  )
144
147
  populate_sql_logic_for_missing_descriptions: bool = Field(
145
148
  False,
@@ -59,6 +59,7 @@ from datahub.ingestion.source.looker.lookml_concept_context import (
59
59
  from datahub.ingestion.source.looker.lookml_config import (
60
60
  BASE_PROJECT_NAME,
61
61
  MODEL_FILE_EXTENSION,
62
+ VIEW_FILE_EXTENSION,
62
63
  LookerConnectionDefinition,
63
64
  LookMLSourceConfig,
64
65
  LookMLSourceReport,
@@ -884,6 +885,7 @@ class LookMLSource(StatefulIngestionSourceBase):
884
885
  view_urn = maybe_looker_view.id.get_urn(
885
886
  self.source_config
886
887
  )
888
+
887
889
  view_connection_mapping = view_connection_map.get(
888
890
  view_urn
889
891
  )
@@ -939,6 +941,9 @@ class LookMLSource(StatefulIngestionSourceBase):
939
941
  str(maybe_looker_view.id)
940
942
  )
941
943
 
944
+ if not self.source_config.emit_reachable_views_only:
945
+ self.report_skipped_unreachable_views(viewfile_loader, processed_view_map)
946
+
942
947
  if (
943
948
  self.source_config.tag_measures_and_dimensions
944
949
  and self.reporter.events_produced != 0
@@ -966,5 +971,56 @@ class LookMLSource(StatefulIngestionSourceBase):
966
971
  ),
967
972
  ).as_workunit()
968
973
 
974
+ def report_skipped_unreachable_views(
975
+ self,
976
+ viewfile_loader: LookerViewFileLoader,
977
+ processed_view_map: Dict[str, Set[str]] = {},
978
+ ) -> None:
979
+ view_files: Dict[str, List[pathlib.Path]] = {}
980
+ for project, folder_path in self.base_projects_folder.items():
981
+ folder = pathlib.Path(folder_path)
982
+ view_files[project] = list(folder.glob(f"**/*{VIEW_FILE_EXTENSION}"))
983
+
984
+ skipped_view_paths: Dict[str, List[str]] = {}
985
+ for project, views in view_files.items():
986
+ skipped_paths: Set[str] = set()
987
+
988
+ for view_path in views:
989
+ # Check if the view is already in processed_view_map
990
+ if not any(
991
+ str(view_path) in view_set
992
+ for view_set in processed_view_map.values()
993
+ ):
994
+ looker_viewfile = viewfile_loader.load_viewfile(
995
+ path=str(view_path),
996
+ project_name=project,
997
+ connection=None,
998
+ reporter=self.reporter,
999
+ )
1000
+
1001
+ if looker_viewfile is not None:
1002
+ for raw_view in looker_viewfile.views:
1003
+ raw_view_name = raw_view.get("name", "")
1004
+
1005
+ if (
1006
+ raw_view_name
1007
+ and self.source_config.view_pattern.allowed(
1008
+ raw_view_name
1009
+ )
1010
+ ):
1011
+ skipped_paths.add(str(view_path))
1012
+
1013
+ skipped_view_paths[project] = list(skipped_paths)
1014
+
1015
+ for project, view_paths in skipped_view_paths.items():
1016
+ for path in view_paths:
1017
+ self.reporter.report_warning(
1018
+ title="Skipped View File",
1019
+ message=(
1020
+ "The Looker view file was skipped because it may not be referenced by any models."
1021
+ ),
1022
+ context=(f"Project: {project}, View File Path: {path}"),
1023
+ )
1024
+
969
1025
  def get_report(self):
970
1026
  return self.reporter