acryl-datahub 0.15.0.4rc3__py3-none-any.whl → 0.15.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (95) hide show
  1. {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/METADATA +2507 -2470
  2. {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/RECORD +95 -86
  3. {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/entry_points.txt +1 -0
  4. datahub/__init__.py +1 -25
  5. datahub/_version.py +13 -0
  6. datahub/api/entities/dataprocess/dataprocess_instance.py +104 -11
  7. datahub/cli/check_cli.py +1 -1
  8. datahub/cli/cli_utils.py +3 -3
  9. datahub/cli/container_cli.py +1 -64
  10. datahub/cli/iceberg_cli.py +707 -0
  11. datahub/cli/ingest_cli.py +2 -2
  12. datahub/emitter/composite_emitter.py +36 -0
  13. datahub/emitter/rest_emitter.py +1 -1
  14. datahub/entrypoints.py +26 -5
  15. datahub/ingestion/api/incremental_lineage_helper.py +4 -0
  16. datahub/ingestion/api/registry.py +1 -1
  17. datahub/ingestion/glossary/classification_mixin.py +6 -0
  18. datahub/ingestion/glossary/classifier.py +3 -2
  19. datahub/ingestion/graph/client.py +2 -1
  20. datahub/ingestion/graph/entity_versioning.py +201 -0
  21. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  22. datahub/ingestion/run/connection.py +1 -1
  23. datahub/ingestion/run/pipeline.py +3 -3
  24. datahub/ingestion/source/abs/report.py +2 -2
  25. datahub/ingestion/source/apply/__init__.py +0 -0
  26. datahub/ingestion/source/apply/datahub_apply.py +223 -0
  27. datahub/ingestion/source/aws/glue.py +5 -2
  28. datahub/ingestion/source/aws/sagemaker_processors/common.py +3 -2
  29. datahub/ingestion/source/bigquery_v2/bigquery_report.py +1 -1
  30. datahub/ingestion/source/dbt/dbt_core.py +1 -1
  31. datahub/ingestion/source/delta_lake/report.py +2 -2
  32. datahub/ingestion/source/dynamodb/dynamodb.py +2 -1
  33. datahub/ingestion/source/elastic_search.py +2 -1
  34. datahub/ingestion/source/ge_profiling_config.py +11 -7
  35. datahub/ingestion/source/iceberg/iceberg_common.py +3 -2
  36. datahub/ingestion/source/identity/azure_ad.py +6 -14
  37. datahub/ingestion/source/identity/okta.py +2 -1
  38. datahub/ingestion/source/kafka/kafka.py +2 -1
  39. datahub/ingestion/source/kafka_connect/common.py +2 -1
  40. datahub/ingestion/source/ldap.py +2 -1
  41. datahub/ingestion/source/looker/looker_config.py +3 -1
  42. datahub/ingestion/source/looker/looker_dataclasses.py +8 -0
  43. datahub/ingestion/source/looker/looker_file_loader.py +14 -3
  44. datahub/ingestion/source/looker/looker_template_language.py +104 -14
  45. datahub/ingestion/source/looker/lookml_config.py +29 -8
  46. datahub/ingestion/source/looker/lookml_source.py +110 -22
  47. datahub/ingestion/source/mode.py +2 -4
  48. datahub/ingestion/source/mongodb.py +2 -1
  49. datahub/ingestion/source/nifi.py +2 -1
  50. datahub/ingestion/source/powerbi/config.py +2 -2
  51. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -1
  52. datahub/ingestion/source/redash.py +5 -5
  53. datahub/ingestion/source/salesforce.py +4 -1
  54. datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
  55. datahub/ingestion/source/snowflake/snowflake_query.py +11 -0
  56. datahub/ingestion/source/snowflake/snowflake_report.py +3 -1
  57. datahub/ingestion/source/snowflake/snowflake_schema.py +17 -0
  58. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +35 -43
  59. datahub/ingestion/source/snowflake/snowflake_tag.py +57 -3
  60. datahub/ingestion/source/snowflake/snowflake_v2.py +42 -4
  61. datahub/ingestion/source/sql/clickhouse.py +5 -43
  62. datahub/ingestion/source/sql/mssql/job_models.py +37 -8
  63. datahub/ingestion/source/sql/mssql/source.py +17 -0
  64. datahub/ingestion/source/sql/sql_config.py +0 -10
  65. datahub/ingestion/source/tableau/tableau.py +16 -13
  66. datahub/ingestion/source/tableau/tableau_common.py +1 -1
  67. datahub/ingestion/source/unity/ge_profiler.py +55 -4
  68. datahub/ingestion/source/unity/proxy.py +2 -2
  69. datahub/ingestion/source/unity/report.py +1 -0
  70. datahub/ingestion/source_config/operation_config.py +9 -0
  71. datahub/ingestion/source_report/pulsar.py +5 -4
  72. datahub/metadata/_schema_classes.py +304 -6
  73. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
  74. datahub/metadata/com/linkedin/pegasus2avro/dataplatforminstance/__init__.py +2 -0
  75. datahub/metadata/com/linkedin/pegasus2avro/dataset/__init__.py +2 -0
  76. datahub/metadata/schema.avsc +211 -12
  77. datahub/metadata/schemas/AssertionInfo.avsc +2 -2
  78. datahub/metadata/schemas/CorpUserSettings.avsc +9 -0
  79. datahub/metadata/schemas/DashboardInfo.avsc +5 -5
  80. datahub/metadata/schemas/DataPlatformInstanceKey.avsc +2 -1
  81. datahub/metadata/schemas/DatasetKey.avsc +2 -1
  82. datahub/metadata/schemas/Deprecation.avsc +12 -0
  83. datahub/metadata/schemas/DisplayProperties.avsc +62 -0
  84. datahub/metadata/schemas/IcebergCatalogInfo.avsc +28 -0
  85. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +92 -0
  86. datahub/metadata/schemas/MetadataChangeEvent.avsc +17 -5
  87. datahub/metadata/schemas/PostInfo.avsc +28 -2
  88. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  89. datahub/specific/dashboard.py +43 -1
  90. datahub/telemetry/telemetry.py +4 -4
  91. datahub/testing/check_imports.py +28 -0
  92. datahub/upgrade/upgrade.py +17 -9
  93. {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/LICENSE +0 -0
  94. {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/WHEEL +0 -0
  95. {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ import logging
2
2
  import pathlib
3
3
  import re
4
4
  from abc import ABC, abstractmethod
5
- from typing import Any, ClassVar, Dict, List, Optional, Set, Union
5
+ from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Set, Union
6
6
 
7
7
  from deepmerge import always_merger
8
8
  from liquid import Undefined
@@ -27,8 +27,12 @@ from datahub.ingestion.source.looker.looker_liquid_tag import (
27
27
  from datahub.ingestion.source.looker.lookml_config import (
28
28
  DERIVED_VIEW_PATTERN,
29
29
  LookMLSourceConfig,
30
+ LookMLSourceReport,
30
31
  )
31
32
 
33
+ if TYPE_CHECKING:
34
+ from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant
35
+
32
36
  logger = logging.getLogger(__name__)
33
37
 
34
38
 
@@ -82,7 +86,12 @@ class SpecialVariable:
82
86
  return self._create_new_liquid_variables_with_default(variables=variables)
83
87
 
84
88
 
85
- def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
89
+ def resolve_liquid_variable(
90
+ text: str,
91
+ view_name: str,
92
+ liquid_variable: Dict[Any, Any],
93
+ report: LookMLSourceReport,
94
+ ) -> str:
86
95
  # Set variable value to NULL if not present in liquid_variable dictionary
87
96
  Undefined.__str__ = lambda instance: "NULL" # type: ignore
88
97
  try:
@@ -96,6 +105,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
96
105
  # Resolve liquid template
97
106
  return create_template(text).render(liquid_variable)
98
107
  except LiquidSyntaxError as e:
108
+ # TODO: Will add warning once we get rid of duplcate warning message for same view
99
109
  logger.warning(f"Unsupported liquid template encountered. error [{e.message}]")
100
110
  # TODO: There are some tag specific to looker and python-liquid library does not understand them. currently
101
111
  # we are not parsing such liquid template.
@@ -103,6 +113,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
103
113
  # See doc: https://cloud.google.com/looker/docs/templated-filters and look for { % condition region %}
104
114
  # order.region { % endcondition %}
105
115
  except CustomTagException as e:
116
+ # TODO: Will add warning once we get rid of duplcate warning message for same view
106
117
  logger.warning(e)
107
118
  logger.debug(e, exc_info=e)
108
119
 
@@ -192,15 +203,20 @@ class LookMLViewTransformer(ABC):
192
203
 
193
204
  source_config: LookMLSourceConfig
194
205
 
195
- def __init__(self, source_config: LookMLSourceConfig):
206
+ def __init__(
207
+ self,
208
+ source_config: LookMLSourceConfig,
209
+ reporter: LookMLSourceReport,
210
+ ):
196
211
  self.source_config = source_config
212
+ self.reporter = reporter
197
213
 
198
214
  def transform(self, view: dict) -> dict:
199
215
  value_to_transform: Optional[str] = None
200
216
 
201
- # is_attribute_supported check is required because not all transformer works on all attributes in current
202
- # case mostly all transformer works on sql_table_name and derived.sql attributes,
203
- # however IncompleteSqlTransformer only transform the derived.sql attribute
217
+ # is_attribute_supported check is required because not all transformers work on all attributes in the current
218
+ # case, mostly all transformers work on sql_table_name and derived.sql attributes;
219
+ # however, IncompleteSqlTransformer only transform the derived.sql attribute
204
220
  if SQL_TABLE_NAME in view and self.is_attribute_supported(SQL_TABLE_NAME):
205
221
  # Give precedence to already processed transformed view.sql_table_name to apply more transformation
206
222
  value_to_transform = view.get(
@@ -252,7 +268,9 @@ class LiquidVariableTransformer(LookMLViewTransformer):
252
268
  def _apply_transformation(self, value: str, view: dict) -> str:
253
269
  return resolve_liquid_variable(
254
270
  text=value,
255
- liquid_variable=self.source_config.liquid_variable,
271
+ liquid_variable=self.source_config.liquid_variables,
272
+ view_name=view["name"],
273
+ report=self.reporter,
256
274
  )
257
275
 
258
276
 
@@ -287,7 +305,7 @@ class IncompleteSqlTransformer(LookMLViewTransformer):
287
305
 
288
306
  class DropDerivedViewPatternTransformer(LookMLViewTransformer):
289
307
  """
290
- drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values.
308
+ drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values.
291
309
 
292
310
  Example: transform ${employee_income_source.SQL_TABLE_NAME} to employee_income_source.SQL_TABLE_NAME
293
311
  """
@@ -308,8 +326,8 @@ class LookMlIfCommentTransformer(LookMLViewTransformer):
308
326
  evaluate_to_true_regx: str
309
327
  remove_if_comment_line_regx: str
310
328
 
311
- def __init__(self, source_config: LookMLSourceConfig):
312
- super().__init__(source_config=source_config)
329
+ def __init__(self, source_config: LookMLSourceConfig, reporter: LookMLSourceReport):
330
+ super().__init__(source_config=source_config, reporter=reporter)
313
331
 
314
332
  # This regx will keep whatever after -- if looker_environment --
315
333
  self.evaluate_to_true_regx = r"-- if {} --".format(
@@ -335,6 +353,61 @@ class LookMlIfCommentTransformer(LookMLViewTransformer):
335
353
  return self._apply_regx(value)
336
354
 
337
355
 
356
+ class LookmlConstantTransformer(LookMLViewTransformer):
357
+ """
358
+ Replace LookML constants @{constant} from the manifest/configuration.
359
+ """
360
+
361
+ CONSTANT_PATTERN = r"@{(\w+)}" # Matches @{constant}
362
+
363
+ def __init__(
364
+ self,
365
+ source_config: LookMLSourceConfig,
366
+ reporter: LookMLSourceReport,
367
+ manifest_constants: Dict[str, "LookerConstant"],
368
+ ):
369
+ super().__init__(source_config=source_config, reporter=reporter)
370
+ self.manifest_constants = manifest_constants
371
+
372
+ def resolve_lookml_constant(self, text: str, view_name: Optional[str]) -> str:
373
+ """
374
+ Resolves LookML constants (@{ }) from manifest or config.
375
+ Logs warnings for misplaced or missing variables.
376
+ """
377
+
378
+ def replace_constants(match):
379
+ key = match.group(1)
380
+ # Resolve constant from config
381
+ if key in self.source_config.lookml_constants:
382
+ return str(self.source_config.lookml_constants.get(key))
383
+
384
+ # Resolve constant from manifest
385
+ if key in self.manifest_constants:
386
+ return self.manifest_constants[key].value
387
+
388
+ # Check if it's a misplaced lookml constant
389
+ if key in self.source_config.liquid_variables:
390
+ self.reporter.warning(
391
+ title="Misplaced lookml constant",
392
+ message="Use 'lookml_constants' instead of 'liquid_variables'.",
393
+ context=f"Key {key}",
394
+ )
395
+ return f"@{{{key}}}"
396
+
397
+ self.reporter.warning(
398
+ title="LookML constant not found",
399
+ message="The constant is missing. Either add it under 'lookml_constants' in the config or define it in `manifest.lkml`.",
400
+ context=f"view-name: {view_name}, constant: {key}",
401
+ )
402
+ return f"@{{{key}}}"
403
+
404
+ # Resolve @{} (constant)
405
+ return re.sub(self.CONSTANT_PATTERN, replace_constants, text)
406
+
407
+ def _apply_transformation(self, value: str, view: dict) -> str:
408
+ return self.resolve_lookml_constant(text=value, view_name=view.get("name"))
409
+
410
+
338
411
  class TransformedLookMlView:
339
412
  """
340
413
  TransformedLookMlView is collecting output of LookMLViewTransformer and creating a new transformed LookML view.
@@ -390,24 +463,35 @@ class TransformedLookMlView:
390
463
  def process_lookml_template_language(
391
464
  source_config: LookMLSourceConfig,
392
465
  view_lkml_file_dict: dict,
466
+ reporter: LookMLSourceReport,
467
+ manifest_constants: Dict[str, "LookerConstant"] = {},
468
+ resolve_constants: bool = False,
393
469
  ) -> None:
394
470
  if "views" not in view_lkml_file_dict:
395
471
  return
396
472
 
397
473
  transformers: List[LookMLViewTransformer] = [
398
474
  LookMlIfCommentTransformer(
399
- source_config=source_config
475
+ source_config=source_config, reporter=reporter
400
476
  ), # First evaluate the -- if -- comments. Looker does the same
401
477
  LiquidVariableTransformer(
402
- source_config=source_config
478
+ source_config=source_config, reporter=reporter
403
479
  ), # Now resolve liquid variables
404
480
  DropDerivedViewPatternTransformer(
405
- source_config=source_config
481
+ source_config=source_config, reporter=reporter
406
482
  ), # Remove any ${} symbol
407
483
  IncompleteSqlTransformer(
408
- source_config=source_config
484
+ source_config=source_config, reporter=reporter
409
485
  ), # complete any incomplete sql
410
486
  ]
487
+ if resolve_constants:
488
+ transformers.append(
489
+ LookmlConstantTransformer(
490
+ source_config=source_config,
491
+ manifest_constants=manifest_constants,
492
+ reporter=reporter,
493
+ ), # Resolve @{} constant with its corresponding value
494
+ )
411
495
 
412
496
  transformed_views: List[dict] = []
413
497
 
@@ -422,12 +506,18 @@ def process_lookml_template_language(
422
506
  def load_and_preprocess_file(
423
507
  path: Union[str, pathlib.Path],
424
508
  source_config: LookMLSourceConfig,
509
+ reporter: LookMLSourceReport,
510
+ manifest_constants: Dict[str, "LookerConstant"] = {},
511
+ resolve_constants: bool = False,
425
512
  ) -> dict:
426
513
  parsed = load_lkml(path)
427
514
 
428
515
  process_lookml_template_language(
429
516
  view_lkml_file_dict=parsed,
517
+ reporter=reporter,
430
518
  source_config=source_config,
519
+ manifest_constants=manifest_constants,
520
+ resolve_constants=resolve_constants,
431
521
  )
432
522
 
433
523
  return parsed
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from dataclasses import dataclass, field as dataclass_field
3
3
  from datetime import timedelta
4
- from typing import Any, Dict, List, Literal, Optional, Union
4
+ from typing import Any, Dict, Literal, Optional, Union
5
5
 
6
6
  import pydantic
7
7
  from pydantic import root_validator, validator
@@ -48,13 +48,17 @@ DERIVED_VIEW_PATTERN: str = r"\$\{([^}]*)\}"
48
48
  class LookMLSourceReport(StaleEntityRemovalSourceReport):
49
49
  git_clone_latency: Optional[timedelta] = None
50
50
  models_discovered: int = 0
51
- models_dropped: List[str] = dataclass_field(default_factory=LossyList)
51
+ models_dropped: LossyList[str] = dataclass_field(default_factory=LossyList)
52
52
  views_discovered: int = 0
53
- views_dropped: List[str] = dataclass_field(default_factory=LossyList)
54
- views_dropped_unreachable: List[str] = dataclass_field(default_factory=LossyList)
53
+ views_dropped: LossyList[str] = dataclass_field(default_factory=LossyList)
54
+ views_dropped_unreachable: LossyList[str] = dataclass_field(
55
+ default_factory=LossyList
56
+ )
55
57
  query_parse_attempts: int = 0
56
58
  query_parse_failures: int = 0
57
- query_parse_failure_views: List[str] = dataclass_field(default_factory=LossyList)
59
+ query_parse_failure_views: LossyList[str] = dataclass_field(
60
+ default_factory=LossyList
61
+ )
58
62
  _looker_api: Optional[LookerAPI] = None
59
63
 
60
64
  def report_models_scanned(self) -> None:
@@ -139,7 +143,10 @@ class LookMLSourceConfig(
139
143
  )
140
144
  emit_reachable_views_only: bool = Field(
141
145
  True,
142
- description="When enabled, only views that are reachable from explores defined in the model files are emitted",
146
+ description=(
147
+ "When enabled, only views that are reachable from explores defined in the model files are emitted. "
148
+ "If set to False, all views imported in model files are emitted. Views that are unreachable i.e. not explicitly defined in the model files are currently not emitted however reported as warning for debugging purposes."
149
+ ),
143
150
  )
144
151
  populate_sql_logic_for_missing_descriptions: bool = Field(
145
152
  False,
@@ -158,13 +165,27 @@ class LookMLSourceConfig(
158
165
  description="When enabled, looker refinement will be processed to adapt an existing view.",
159
166
  )
160
167
 
161
- liquid_variable: Dict[Any, Any] = Field(
168
+ liquid_variables: Dict[Any, Any] = Field(
162
169
  {},
163
- description="A dictionary containing Liquid variables and their corresponding values, utilized in SQL-defined "
170
+ description="A dictionary containing Liquid variables with their corresponding values, utilized in SQL-defined "
164
171
  "derived views. The Liquid template will be resolved in view.derived_table.sql and "
165
172
  "view.sql_table_name. Defaults to an empty dictionary.",
166
173
  )
167
174
 
175
+ _liquid_variable_deprecated = pydantic_renamed_field(
176
+ old_name="liquid_variable", new_name="liquid_variables", print_warning=True
177
+ )
178
+
179
+ lookml_constants: Dict[str, str] = Field(
180
+ {},
181
+ description=(
182
+ "A dictionary containing LookML constants (`@{constant_name}`) and their values. "
183
+ "If a constant is defined in the `manifest.lkml` file, its value will be used. "
184
+ "If not found in the manifest, the value from this config will be used instead. "
185
+ "Defaults to an empty dictionary."
186
+ ),
187
+ )
188
+
168
189
  looker_environment: Literal["prod", "dev"] = Field(
169
190
  "prod",
170
191
  description="A looker prod or dev environment. "
@@ -43,6 +43,7 @@ from datahub.ingestion.source.looker.looker_common import (
43
43
  from datahub.ingestion.source.looker.looker_connection import (
44
44
  get_connection_def_based_on_connection_string,
45
45
  )
46
+ from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant
46
47
  from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI
47
48
  from datahub.ingestion.source.looker.looker_template_language import (
48
49
  load_and_preprocess_file,
@@ -59,6 +60,7 @@ from datahub.ingestion.source.looker.lookml_concept_context import (
59
60
  from datahub.ingestion.source.looker.lookml_config import (
60
61
  BASE_PROJECT_NAME,
61
62
  MODEL_FILE_EXTENSION,
63
+ VIEW_FILE_EXTENSION,
62
64
  LookerConnectionDefinition,
63
65
  LookMLSourceConfig,
64
66
  LookMLSourceReport,
@@ -253,6 +255,7 @@ class LookerManifest:
253
255
  # This must be set if the manifest has local_dependency entries.
254
256
  # See https://cloud.google.com/looker/docs/reference/param-manifest-project-name
255
257
  project_name: Optional[str]
258
+ constants: Optional[List[Dict[str, str]]]
256
259
 
257
260
  local_dependencies: List[str]
258
261
  remote_dependencies: List[LookerRemoteDependency]
@@ -309,11 +312,14 @@ class LookMLSource(StatefulIngestionSourceBase):
309
312
  "manage_models permission enabled on this API key."
310
313
  ) from err
311
314
 
315
+ self.manifest_constants: Dict[str, "LookerConstant"] = {}
316
+
312
317
  def _load_model(self, path: str) -> LookerModel:
313
318
  logger.debug(f"Loading model from file {path}")
314
319
 
315
320
  parsed = load_and_preprocess_file(
316
321
  path=path,
322
+ reporter=self.reporter,
317
323
  source_config=self.source_config,
318
324
  )
319
325
 
@@ -499,27 +505,33 @@ class LookMLSource(StatefulIngestionSourceBase):
499
505
 
500
506
  def get_manifest_if_present(self, folder: pathlib.Path) -> Optional[LookerManifest]:
501
507
  manifest_file = folder / "manifest.lkml"
502
- if manifest_file.exists():
503
- manifest_dict = load_and_preprocess_file(
504
- path=manifest_file, source_config=self.source_config
505
- )
506
508
 
507
- manifest = LookerManifest(
508
- project_name=manifest_dict.get("project_name"),
509
- local_dependencies=[
510
- x["project"] for x in manifest_dict.get("local_dependencys", [])
511
- ],
512
- remote_dependencies=[
513
- LookerRemoteDependency(
514
- name=x["name"], url=x["url"], ref=x.get("ref")
515
- )
516
- for x in manifest_dict.get("remote_dependencys", [])
517
- ],
509
+ if not manifest_file.exists():
510
+ self.reporter.info(
511
+ message="manifest.lkml file missing from project",
512
+ context=str(manifest_file),
518
513
  )
519
- return manifest
520
- else:
521
514
  return None
522
515
 
516
+ manifest_dict = load_and_preprocess_file(
517
+ path=manifest_file,
518
+ source_config=self.source_config,
519
+ reporter=self.reporter,
520
+ )
521
+
522
+ manifest = LookerManifest(
523
+ project_name=manifest_dict.get("project_name"),
524
+ constants=manifest_dict.get("constants", []),
525
+ local_dependencies=[
526
+ x["project"] for x in manifest_dict.get("local_dependencys", [])
527
+ ],
528
+ remote_dependencies=[
529
+ LookerRemoteDependency(name=x["name"], url=x["url"], ref=x.get("ref"))
530
+ for x in manifest_dict.get("remote_dependencys", [])
531
+ ],
532
+ )
533
+ return manifest
534
+
523
535
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
524
536
  return [
525
537
  *super().get_workunit_processors(),
@@ -574,7 +586,10 @@ class LookMLSource(StatefulIngestionSourceBase):
574
586
  self.base_projects_folder[project] = p_ref
575
587
 
576
588
  self._recursively_check_manifests(
577
- tmp_dir, BASE_PROJECT_NAME, visited_projects
589
+ tmp_dir,
590
+ BASE_PROJECT_NAME,
591
+ visited_projects,
592
+ self.manifest_constants,
578
593
  )
579
594
 
580
595
  yield from self.get_internal_workunits()
@@ -587,7 +602,11 @@ class LookMLSource(StatefulIngestionSourceBase):
587
602
  )
588
603
 
589
604
  def _recursively_check_manifests(
590
- self, tmp_dir: str, project_name: str, project_visited: Set[str]
605
+ self,
606
+ tmp_dir: str,
607
+ project_name: str,
608
+ project_visited: Set[str],
609
+ manifest_constants: Dict[str, "LookerConstant"],
591
610
  ) -> None:
592
611
  if project_name in project_visited:
593
612
  return
@@ -604,6 +623,14 @@ class LookMLSource(StatefulIngestionSourceBase):
604
623
  if not manifest:
605
624
  return
606
625
 
626
+ if manifest.constants:
627
+ for constant in manifest.constants:
628
+ if constant.get("name") and constant.get("value"):
629
+ manifest_constants[constant["name"]] = LookerConstant(
630
+ name=constant["name"],
631
+ value=constant["value"],
632
+ )
633
+
607
634
  # Special case handling if the root project has a name in the manifest file.
608
635
  if project_name == BASE_PROJECT_NAME and manifest.project_name:
609
636
  if (
@@ -663,21 +690,27 @@ class LookMLSource(StatefulIngestionSourceBase):
663
690
  project_visited.add(project_name)
664
691
  else:
665
692
  self._recursively_check_manifests(
666
- tmp_dir, remote_project.name, project_visited
693
+ tmp_dir,
694
+ remote_project.name,
695
+ project_visited,
696
+ manifest_constants,
667
697
  )
668
698
 
669
699
  for project in manifest.local_dependencies:
670
- self._recursively_check_manifests(tmp_dir, project, project_visited)
700
+ self._recursively_check_manifests(
701
+ tmp_dir, project, project_visited, manifest_constants
702
+ )
671
703
 
672
704
  def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901
673
705
  assert self.source_config.base_folder
674
-
675
706
  viewfile_loader = LookerViewFileLoader(
676
707
  self.source_config.project_name,
677
708
  self.base_projects_folder,
678
709
  self.reporter,
679
710
  self.source_config,
711
+ self.manifest_constants,
680
712
  )
713
+ logger.debug(f"LookML Constants : {', '.join(self.manifest_constants.keys())}")
681
714
 
682
715
  # Some views can be mentioned by multiple 'include' statements and can be included via different connections.
683
716
 
@@ -884,6 +917,7 @@ class LookMLSource(StatefulIngestionSourceBase):
884
917
  view_urn = maybe_looker_view.id.get_urn(
885
918
  self.source_config
886
919
  )
920
+
887
921
  view_connection_mapping = view_connection_map.get(
888
922
  view_urn
889
923
  )
@@ -939,6 +973,9 @@ class LookMLSource(StatefulIngestionSourceBase):
939
973
  str(maybe_looker_view.id)
940
974
  )
941
975
 
976
+ if not self.source_config.emit_reachable_views_only:
977
+ self.report_skipped_unreachable_views(viewfile_loader, processed_view_map)
978
+
942
979
  if (
943
980
  self.source_config.tag_measures_and_dimensions
944
981
  and self.reporter.events_produced != 0
@@ -966,5 +1003,56 @@ class LookMLSource(StatefulIngestionSourceBase):
966
1003
  ),
967
1004
  ).as_workunit()
968
1005
 
1006
+ def report_skipped_unreachable_views(
1007
+ self,
1008
+ viewfile_loader: LookerViewFileLoader,
1009
+ processed_view_map: Dict[str, Set[str]] = {},
1010
+ ) -> None:
1011
+ view_files: Dict[str, List[pathlib.Path]] = {}
1012
+ for project, folder_path in self.base_projects_folder.items():
1013
+ folder = pathlib.Path(folder_path)
1014
+ view_files[project] = list(folder.glob(f"**/*{VIEW_FILE_EXTENSION}"))
1015
+
1016
+ skipped_view_paths: Dict[str, List[str]] = {}
1017
+ for project, views in view_files.items():
1018
+ skipped_paths: Set[str] = set()
1019
+
1020
+ for view_path in views:
1021
+ # Check if the view is already in processed_view_map
1022
+ if not any(
1023
+ str(view_path) in view_set
1024
+ for view_set in processed_view_map.values()
1025
+ ):
1026
+ looker_viewfile = viewfile_loader.load_viewfile(
1027
+ path=str(view_path),
1028
+ project_name=project,
1029
+ connection=None,
1030
+ reporter=self.reporter,
1031
+ )
1032
+
1033
+ if looker_viewfile is not None:
1034
+ for raw_view in looker_viewfile.views:
1035
+ raw_view_name = raw_view.get("name", "")
1036
+
1037
+ if (
1038
+ raw_view_name
1039
+ and self.source_config.view_pattern.allowed(
1040
+ raw_view_name
1041
+ )
1042
+ ):
1043
+ skipped_paths.add(str(view_path))
1044
+
1045
+ skipped_view_paths[project] = list(skipped_paths)
1046
+
1047
+ for project, view_paths in skipped_view_paths.items():
1048
+ for path in view_paths:
1049
+ self.reporter.report_warning(
1050
+ title="Skipped View File",
1051
+ message=(
1052
+ "The Looker view file was skipped because it may not be referenced by any models."
1053
+ ),
1054
+ context=(f"Project: {project}, View File Path: {path}"),
1055
+ )
1056
+
969
1057
  def get_report(self):
970
1058
  return self.reporter
@@ -24,6 +24,7 @@ from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponenti
24
24
  import datahub.emitter.mce_builder as builder
25
25
  from datahub.configuration.common import AllowDenyPattern, ConfigModel
26
26
  from datahub.configuration.source_common import DatasetLineageProviderConfigBase
27
+ from datahub.configuration.validate_field_removal import pydantic_removed_field
27
28
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
28
29
  from datahub.emitter.mcp_builder import (
29
30
  ContainerKey,
@@ -155,10 +156,7 @@ class ModeConfig(StatefulIngestionConfigBase, DatasetLineageProviderConfigBase):
155
156
  workspace: str = Field(
156
157
  description="The Mode workspace name. Find it in Settings > Workspace > Details."
157
158
  )
158
- default_schema: str = Field(
159
- default="public",
160
- description="Default schema to use when schema is not provided in an SQL query",
161
- )
159
+ _default_schema = pydantic_removed_field("default_schema")
162
160
 
163
161
  space_pattern: AllowDenyPattern = Field(
164
162
  default=AllowDenyPattern(
@@ -68,6 +68,7 @@ from datahub.metadata.schema_classes import (
68
68
  UnionTypeClass,
69
69
  )
70
70
  from datahub.metadata.urns import DatasetUrn
71
+ from datahub.utilities.lossy_collections import LossyList
71
72
 
72
73
  logger = logging.getLogger(__name__)
73
74
 
@@ -143,7 +144,7 @@ class MongoDBConfig(
143
144
 
144
145
  @dataclass
145
146
  class MongoDBSourceReport(StaleEntityRemovalSourceReport):
146
- filtered: List[str] = field(default_factory=list)
147
+ filtered: LossyList[str] = field(default_factory=LossyList)
147
148
 
148
149
  def report_dropped(self, name: str) -> None:
149
150
  self.filtered.append(name)
@@ -46,6 +46,7 @@ from datahub.metadata.schema_classes import (
46
46
  DatasetPropertiesClass,
47
47
  )
48
48
  from datahub.specific.datajob import DataJobPatchBuilder
49
+ from datahub.utilities.lossy_collections import LossyList
49
50
 
50
51
  logger = logging.getLogger(__name__)
51
52
  NIFI = "nifi"
@@ -452,7 +453,7 @@ def get_attribute_value(attr_lst: List[dict], attr_name: str) -> Optional[str]:
452
453
 
453
454
  @dataclass
454
455
  class NifiSourceReport(SourceReport):
455
- filtered: List[str] = field(default_factory=list)
456
+ filtered: LossyList[str] = field(default_factory=LossyList)
456
457
 
457
458
  def report_dropped(self, ent_name: str) -> None:
458
459
  self.filtered.append(ent_name)
@@ -195,8 +195,8 @@ class PowerBiDashboardSourceReport(StaleEntityRemovalSourceReport):
195
195
 
196
196
  dashboards_scanned: int = 0
197
197
  charts_scanned: int = 0
198
- filtered_dashboards: List[str] = dataclass_field(default_factory=list)
199
- filtered_charts: List[str] = dataclass_field(default_factory=list)
198
+ filtered_dashboards: LossyList[str] = dataclass_field(default_factory=LossyList)
199
+ filtered_charts: LossyList[str] = dataclass_field(default_factory=LossyList)
200
200
 
201
201
  m_query_parse_timer: PerfTimer = dataclass_field(default_factory=PerfTimer)
202
202
  m_query_parse_attempts: int = 0
@@ -53,6 +53,7 @@ from datahub.metadata.schema_classes import (
53
53
  StatusClass,
54
54
  )
55
55
  from datahub.utilities.dedup_list import deduplicate_list
56
+ from datahub.utilities.lossy_collections import LossyList
56
57
 
57
58
  LOGGER = logging.getLogger(__name__)
58
59
 
@@ -476,7 +477,7 @@ class Mapper:
476
477
  @dataclass
477
478
  class PowerBiReportServerDashboardSourceReport(SourceReport):
478
479
  scanned_report: int = 0
479
- filtered_reports: List[str] = dataclass_field(default_factory=list)
480
+ filtered_reports: LossyList[str] = dataclass_field(default_factory=LossyList)
480
481
 
481
482
  def report_scanned(self, count: int = 1) -> None:
482
483
  self.scanned_report += count
@@ -2,7 +2,7 @@ import logging
2
2
  import math
3
3
  import sys
4
4
  from dataclasses import dataclass, field
5
- from typing import Dict, Iterable, List, Optional, Set
5
+ from typing import Dict, Iterable, List, Optional
6
6
 
7
7
  import dateutil.parser as dp
8
8
  from packaging import version
@@ -39,7 +39,7 @@ from datahub.metadata.schema_classes import (
39
39
  DashboardInfoClass,
40
40
  )
41
41
  from datahub.sql_parsing.sqlglot_lineage import create_lineage_sql_parsed_result
42
- from datahub.utilities.lossy_collections import LossyDict, LossyList
42
+ from datahub.utilities.lossy_collections import LossyDict, LossyList, LossySet
43
43
  from datahub.utilities.perf_timer import PerfTimer
44
44
  from datahub.utilities.threaded_iterator_executor import ThreadedIteratorExecutor
45
45
 
@@ -280,9 +280,9 @@ class RedashConfig(ConfigModel):
280
280
  class RedashSourceReport(SourceReport):
281
281
  items_scanned: int = 0
282
282
  filtered: LossyList[str] = field(default_factory=LossyList)
283
- queries_problem_parsing: Set[str] = field(default_factory=set)
284
- queries_no_dataset: Set[str] = field(default_factory=set)
285
- charts_no_input: Set[str] = field(default_factory=set)
283
+ queries_problem_parsing: LossySet[str] = field(default_factory=LossySet)
284
+ queries_no_dataset: LossySet[str] = field(default_factory=LossySet)
285
+ charts_no_input: LossySet[str] = field(default_factory=LossySet)
286
286
  total_queries: Optional[int] = field(
287
287
  default=None,
288
288
  )
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  import time
4
+ from dataclasses import dataclass, field as dataclass_field
4
5
  from datetime import datetime
5
6
  from enum import Enum
6
7
  from typing import Any, Dict, Iterable, List, Optional
@@ -60,6 +61,7 @@ from datahub.metadata.schema_classes import (
60
61
  TagAssociationClass,
61
62
  )
62
63
  from datahub.utilities import config_clean
64
+ from datahub.utilities.lossy_collections import LossyList
63
65
 
64
66
  logger = logging.getLogger(__name__)
65
67
 
@@ -146,8 +148,9 @@ class SalesforceConfig(DatasetSourceConfigMixin):
146
148
  return config_clean.remove_trailing_slashes(v)
147
149
 
148
150
 
151
+ @dataclass
149
152
  class SalesforceSourceReport(SourceReport):
150
- filtered: List[str] = []
153
+ filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
151
154
 
152
155
  def report_dropped(self, ent_name: str) -> None:
153
156
  self.filtered.append(ent_name)