acryl-datahub 0.15.0.5rc4__py3-none-any.whl → 0.15.0.5rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -2,7 +2,7 @@ import logging
2
2
  import pathlib
3
3
  import re
4
4
  from abc import ABC, abstractmethod
5
- from typing import Any, ClassVar, Dict, List, Optional, Set, Union
5
+ from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Set, Union
6
6
 
7
7
  from deepmerge import always_merger
8
8
  from liquid import Undefined
@@ -27,8 +27,12 @@ from datahub.ingestion.source.looker.looker_liquid_tag import (
27
27
  from datahub.ingestion.source.looker.lookml_config import (
28
28
  DERIVED_VIEW_PATTERN,
29
29
  LookMLSourceConfig,
30
+ LookMLSourceReport,
30
31
  )
31
32
 
33
+ if TYPE_CHECKING:
34
+ from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant
35
+
32
36
  logger = logging.getLogger(__name__)
33
37
 
34
38
 
@@ -82,7 +86,12 @@ class SpecialVariable:
82
86
  return self._create_new_liquid_variables_with_default(variables=variables)
83
87
 
84
88
 
85
- def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
89
+ def resolve_liquid_variable(
90
+ text: str,
91
+ view_name: str,
92
+ liquid_variable: Dict[Any, Any],
93
+ report: LookMLSourceReport,
94
+ ) -> str:
86
95
  # Set variable value to NULL if not present in liquid_variable dictionary
87
96
  Undefined.__str__ = lambda instance: "NULL" # type: ignore
88
97
  try:
@@ -96,6 +105,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
96
105
  # Resolve liquid template
97
106
  return create_template(text).render(liquid_variable)
98
107
  except LiquidSyntaxError as e:
108
+ # TODO: Will add warning once we get rid of duplcate warning message for same view
99
109
  logger.warning(f"Unsupported liquid template encountered. error [{e.message}]")
100
110
  # TODO: There are some tag specific to looker and python-liquid library does not understand them. currently
101
111
  # we are not parsing such liquid template.
@@ -103,6 +113,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
103
113
  # See doc: https://cloud.google.com/looker/docs/templated-filters and look for { % condition region %}
104
114
  # order.region { % endcondition %}
105
115
  except CustomTagException as e:
116
+ # TODO: Will add warning once we get rid of duplcate warning message for same view
106
117
  logger.warning(e)
107
118
  logger.debug(e, exc_info=e)
108
119
 
@@ -192,15 +203,20 @@ class LookMLViewTransformer(ABC):
192
203
 
193
204
  source_config: LookMLSourceConfig
194
205
 
195
- def __init__(self, source_config: LookMLSourceConfig):
206
+ def __init__(
207
+ self,
208
+ source_config: LookMLSourceConfig,
209
+ reporter: LookMLSourceReport,
210
+ ):
196
211
  self.source_config = source_config
212
+ self.reporter = reporter
197
213
 
198
214
  def transform(self, view: dict) -> dict:
199
215
  value_to_transform: Optional[str] = None
200
216
 
201
- # is_attribute_supported check is required because not all transformer works on all attributes in current
202
- # case mostly all transformer works on sql_table_name and derived.sql attributes,
203
- # however IncompleteSqlTransformer only transform the derived.sql attribute
217
+ # is_attribute_supported check is required because not all transformers work on all attributes in the current
218
+ # case, mostly all transformers work on sql_table_name and derived.sql attributes;
219
+ # however, IncompleteSqlTransformer only transform the derived.sql attribute
204
220
  if SQL_TABLE_NAME in view and self.is_attribute_supported(SQL_TABLE_NAME):
205
221
  # Give precedence to already processed transformed view.sql_table_name to apply more transformation
206
222
  value_to_transform = view.get(
@@ -252,7 +268,9 @@ class LiquidVariableTransformer(LookMLViewTransformer):
252
268
  def _apply_transformation(self, value: str, view: dict) -> str:
253
269
  return resolve_liquid_variable(
254
270
  text=value,
255
- liquid_variable=self.source_config.liquid_variable,
271
+ liquid_variable=self.source_config.liquid_variables,
272
+ view_name=view["name"],
273
+ report=self.reporter,
256
274
  )
257
275
 
258
276
 
@@ -287,7 +305,7 @@ class IncompleteSqlTransformer(LookMLViewTransformer):
287
305
 
288
306
  class DropDerivedViewPatternTransformer(LookMLViewTransformer):
289
307
  """
290
- drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values.
308
+ drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values.
291
309
 
292
310
  Example: transform ${employee_income_source.SQL_TABLE_NAME} to employee_income_source.SQL_TABLE_NAME
293
311
  """
@@ -308,8 +326,8 @@ class LookMlIfCommentTransformer(LookMLViewTransformer):
308
326
  evaluate_to_true_regx: str
309
327
  remove_if_comment_line_regx: str
310
328
 
311
- def __init__(self, source_config: LookMLSourceConfig):
312
- super().__init__(source_config=source_config)
329
+ def __init__(self, source_config: LookMLSourceConfig, reporter: LookMLSourceReport):
330
+ super().__init__(source_config=source_config, reporter=reporter)
313
331
 
314
332
  # This regx will keep whatever after -- if looker_environment --
315
333
  self.evaluate_to_true_regx = r"-- if {} --".format(
@@ -335,6 +353,61 @@ class LookMlIfCommentTransformer(LookMLViewTransformer):
335
353
  return self._apply_regx(value)
336
354
 
337
355
 
356
+ class LookmlConstantTransformer(LookMLViewTransformer):
357
+ """
358
+ Replace LookML constants @{constant} from the manifest/configuration.
359
+ """
360
+
361
+ CONSTANT_PATTERN = r"@{(\w+)}" # Matches @{constant}
362
+
363
+ def __init__(
364
+ self,
365
+ source_config: LookMLSourceConfig,
366
+ reporter: LookMLSourceReport,
367
+ manifest_constants: Dict[str, "LookerConstant"],
368
+ ):
369
+ super().__init__(source_config=source_config, reporter=reporter)
370
+ self.manifest_constants = manifest_constants
371
+
372
+ def resolve_lookml_constant(self, text: str, view_name: Optional[str]) -> str:
373
+ """
374
+ Resolves LookML constants (@{ }) from manifest or config.
375
+ Logs warnings for misplaced or missing variables.
376
+ """
377
+
378
+ def replace_constants(match):
379
+ key = match.group(1)
380
+ # Resolve constant from config
381
+ if key in self.source_config.lookml_constants:
382
+ return str(self.source_config.lookml_constants.get(key))
383
+
384
+ # Resolve constant from manifest
385
+ if key in self.manifest_constants:
386
+ return self.manifest_constants[key].value
387
+
388
+ # Check if it's a misplaced lookml constant
389
+ if key in self.source_config.liquid_variables:
390
+ self.reporter.warning(
391
+ title="Misplaced lookml constant",
392
+ message="Use 'lookml_constants' instead of 'liquid_variables'.",
393
+ context=f"Key {key}",
394
+ )
395
+ return f"@{{{key}}}"
396
+
397
+ self.reporter.warning(
398
+ title="LookML constant not found",
399
+ message="The constant is missing. Either add it under 'lookml_constants' in the config or define it in `manifest.lkml`.",
400
+ context=f"view-name: {view_name}, constant: {key}",
401
+ )
402
+ return f"@{{{key}}}"
403
+
404
+ # Resolve @{} (constant)
405
+ return re.sub(self.CONSTANT_PATTERN, replace_constants, text)
406
+
407
+ def _apply_transformation(self, value: str, view: dict) -> str:
408
+ return self.resolve_lookml_constant(text=value, view_name=view.get("name"))
409
+
410
+
338
411
  class TransformedLookMlView:
339
412
  """
340
413
  TransformedLookMlView is collecting output of LookMLViewTransformer and creating a new transformed LookML view.
@@ -390,24 +463,35 @@ class TransformedLookMlView:
390
463
  def process_lookml_template_language(
391
464
  source_config: LookMLSourceConfig,
392
465
  view_lkml_file_dict: dict,
466
+ reporter: LookMLSourceReport,
467
+ manifest_constants: Dict[str, "LookerConstant"] = {},
468
+ resolve_constants: bool = False,
393
469
  ) -> None:
394
470
  if "views" not in view_lkml_file_dict:
395
471
  return
396
472
 
397
473
  transformers: List[LookMLViewTransformer] = [
398
474
  LookMlIfCommentTransformer(
399
- source_config=source_config
475
+ source_config=source_config, reporter=reporter
400
476
  ), # First evaluate the -- if -- comments. Looker does the same
401
477
  LiquidVariableTransformer(
402
- source_config=source_config
478
+ source_config=source_config, reporter=reporter
403
479
  ), # Now resolve liquid variables
404
480
  DropDerivedViewPatternTransformer(
405
- source_config=source_config
481
+ source_config=source_config, reporter=reporter
406
482
  ), # Remove any ${} symbol
407
483
  IncompleteSqlTransformer(
408
- source_config=source_config
484
+ source_config=source_config, reporter=reporter
409
485
  ), # complete any incomplete sql
410
486
  ]
487
+ if resolve_constants:
488
+ transformers.append(
489
+ LookmlConstantTransformer(
490
+ source_config=source_config,
491
+ manifest_constants=manifest_constants,
492
+ reporter=reporter,
493
+ ), # Resolve @{} constant with its corresponding value
494
+ )
411
495
 
412
496
  transformed_views: List[dict] = []
413
497
 
@@ -422,12 +506,18 @@ def process_lookml_template_language(
422
506
  def load_and_preprocess_file(
423
507
  path: Union[str, pathlib.Path],
424
508
  source_config: LookMLSourceConfig,
509
+ reporter: LookMLSourceReport,
510
+ manifest_constants: Dict[str, "LookerConstant"] = {},
511
+ resolve_constants: bool = False,
425
512
  ) -> dict:
426
513
  parsed = load_lkml(path)
427
514
 
428
515
  process_lookml_template_language(
429
516
  view_lkml_file_dict=parsed,
517
+ reporter=reporter,
430
518
  source_config=source_config,
519
+ manifest_constants=manifest_constants,
520
+ resolve_constants=resolve_constants,
431
521
  )
432
522
 
433
523
  return parsed
@@ -161,13 +161,27 @@ class LookMLSourceConfig(
161
161
  description="When enabled, looker refinement will be processed to adapt an existing view.",
162
162
  )
163
163
 
164
- liquid_variable: Dict[Any, Any] = Field(
164
+ liquid_variables: Dict[Any, Any] = Field(
165
165
  {},
166
- description="A dictionary containing Liquid variables and their corresponding values, utilized in SQL-defined "
166
+ description="A dictionary containing Liquid variables with their corresponding values, utilized in SQL-defined "
167
167
  "derived views. The Liquid template will be resolved in view.derived_table.sql and "
168
168
  "view.sql_table_name. Defaults to an empty dictionary.",
169
169
  )
170
170
 
171
+ _liquid_variable_deprecated = pydantic_renamed_field(
172
+ old_name="liquid_variable", new_name="liquid_variables", print_warning=True
173
+ )
174
+
175
+ lookml_constants: Dict[str, str] = Field(
176
+ {},
177
+ description=(
178
+ "A dictionary containing LookML constants (`@{constant_name}`) and their values. "
179
+ "If a constant is defined in the `manifest.lkml` file, its value will be used. "
180
+ "If not found in the manifest, the value from this config will be used instead. "
181
+ "Defaults to an empty dictionary."
182
+ ),
183
+ )
184
+
171
185
  looker_environment: Literal["prod", "dev"] = Field(
172
186
  "prod",
173
187
  description="A looker prod or dev environment. "
@@ -43,6 +43,7 @@ from datahub.ingestion.source.looker.looker_common import (
43
43
  from datahub.ingestion.source.looker.looker_connection import (
44
44
  get_connection_def_based_on_connection_string,
45
45
  )
46
+ from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant
46
47
  from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI
47
48
  from datahub.ingestion.source.looker.looker_template_language import (
48
49
  load_and_preprocess_file,
@@ -254,6 +255,7 @@ class LookerManifest:
254
255
  # This must be set if the manifest has local_dependency entries.
255
256
  # See https://cloud.google.com/looker/docs/reference/param-manifest-project-name
256
257
  project_name: Optional[str]
258
+ constants: Optional[List[Dict[str, str]]]
257
259
 
258
260
  local_dependencies: List[str]
259
261
  remote_dependencies: List[LookerRemoteDependency]
@@ -310,11 +312,14 @@ class LookMLSource(StatefulIngestionSourceBase):
310
312
  "manage_models permission enabled on this API key."
311
313
  ) from err
312
314
 
315
+ self.manifest_constants: Dict[str, "LookerConstant"] = {}
316
+
313
317
  def _load_model(self, path: str) -> LookerModel:
314
318
  logger.debug(f"Loading model from file {path}")
315
319
 
316
320
  parsed = load_and_preprocess_file(
317
321
  path=path,
322
+ reporter=self.reporter,
318
323
  source_config=self.source_config,
319
324
  )
320
325
 
@@ -500,27 +505,33 @@ class LookMLSource(StatefulIngestionSourceBase):
500
505
 
501
506
  def get_manifest_if_present(self, folder: pathlib.Path) -> Optional[LookerManifest]:
502
507
  manifest_file = folder / "manifest.lkml"
503
- if manifest_file.exists():
504
- manifest_dict = load_and_preprocess_file(
505
- path=manifest_file, source_config=self.source_config
506
- )
507
508
 
508
- manifest = LookerManifest(
509
- project_name=manifest_dict.get("project_name"),
510
- local_dependencies=[
511
- x["project"] for x in manifest_dict.get("local_dependencys", [])
512
- ],
513
- remote_dependencies=[
514
- LookerRemoteDependency(
515
- name=x["name"], url=x["url"], ref=x.get("ref")
516
- )
517
- for x in manifest_dict.get("remote_dependencys", [])
518
- ],
509
+ if not manifest_file.exists():
510
+ self.reporter.info(
511
+ message="manifest.lkml file missing from project",
512
+ context=str(manifest_file),
519
513
  )
520
- return manifest
521
- else:
522
514
  return None
523
515
 
516
+ manifest_dict = load_and_preprocess_file(
517
+ path=manifest_file,
518
+ source_config=self.source_config,
519
+ reporter=self.reporter,
520
+ )
521
+
522
+ manifest = LookerManifest(
523
+ project_name=manifest_dict.get("project_name"),
524
+ constants=manifest_dict.get("constants", []),
525
+ local_dependencies=[
526
+ x["project"] for x in manifest_dict.get("local_dependencys", [])
527
+ ],
528
+ remote_dependencies=[
529
+ LookerRemoteDependency(name=x["name"], url=x["url"], ref=x.get("ref"))
530
+ for x in manifest_dict.get("remote_dependencys", [])
531
+ ],
532
+ )
533
+ return manifest
534
+
524
535
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
525
536
  return [
526
537
  *super().get_workunit_processors(),
@@ -575,7 +586,10 @@ class LookMLSource(StatefulIngestionSourceBase):
575
586
  self.base_projects_folder[project] = p_ref
576
587
 
577
588
  self._recursively_check_manifests(
578
- tmp_dir, BASE_PROJECT_NAME, visited_projects
589
+ tmp_dir,
590
+ BASE_PROJECT_NAME,
591
+ visited_projects,
592
+ self.manifest_constants,
579
593
  )
580
594
 
581
595
  yield from self.get_internal_workunits()
@@ -588,7 +602,11 @@ class LookMLSource(StatefulIngestionSourceBase):
588
602
  )
589
603
 
590
604
  def _recursively_check_manifests(
591
- self, tmp_dir: str, project_name: str, project_visited: Set[str]
605
+ self,
606
+ tmp_dir: str,
607
+ project_name: str,
608
+ project_visited: Set[str],
609
+ manifest_constants: Dict[str, "LookerConstant"],
592
610
  ) -> None:
593
611
  if project_name in project_visited:
594
612
  return
@@ -605,6 +623,14 @@ class LookMLSource(StatefulIngestionSourceBase):
605
623
  if not manifest:
606
624
  return
607
625
 
626
+ if manifest.constants:
627
+ for constant in manifest.constants:
628
+ if constant.get("name") and constant.get("value"):
629
+ manifest_constants[constant["name"]] = LookerConstant(
630
+ name=constant["name"],
631
+ value=constant["value"],
632
+ )
633
+
608
634
  # Special case handling if the root project has a name in the manifest file.
609
635
  if project_name == BASE_PROJECT_NAME and manifest.project_name:
610
636
  if (
@@ -664,21 +690,27 @@ class LookMLSource(StatefulIngestionSourceBase):
664
690
  project_visited.add(project_name)
665
691
  else:
666
692
  self._recursively_check_manifests(
667
- tmp_dir, remote_project.name, project_visited
693
+ tmp_dir,
694
+ remote_project.name,
695
+ project_visited,
696
+ manifest_constants,
668
697
  )
669
698
 
670
699
  for project in manifest.local_dependencies:
671
- self._recursively_check_manifests(tmp_dir, project, project_visited)
700
+ self._recursively_check_manifests(
701
+ tmp_dir, project, project_visited, manifest_constants
702
+ )
672
703
 
673
704
  def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901
674
705
  assert self.source_config.base_folder
675
-
676
706
  viewfile_loader = LookerViewFileLoader(
677
707
  self.source_config.project_name,
678
708
  self.base_projects_folder,
679
709
  self.reporter,
680
710
  self.source_config,
711
+ self.manifest_constants,
681
712
  )
713
+ logger.debug(f"LookML Constants : {', '.join(self.manifest_constants.keys())}")
682
714
 
683
715
  # Some views can be mentioned by multiple 'include' statements and can be included via different connections.
684
716
 
@@ -5,6 +5,7 @@ import logging
5
5
  import os
6
6
  import os.path
7
7
  import platform
8
+ import re
8
9
  from dataclasses import dataclass
9
10
  from typing import Dict, Iterable, List, Optional, Union
10
11
 
@@ -33,6 +34,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
33
34
  from datahub.ingestion.source.snowflake.constants import (
34
35
  GENERIC_PERMISSION_ERROR_KEY,
35
36
  SnowflakeEdition,
37
+ SnowflakeObjectDomain,
36
38
  )
37
39
  from datahub.ingestion.source.snowflake.snowflake_assertion import (
38
40
  SnowflakeAssertionsHandler,
@@ -162,6 +164,8 @@ class SnowflakeV2Source(
162
164
  self.data_dictionary = SnowflakeDataDictionary(connection=self.connection)
163
165
  self.lineage_extractor: Optional[SnowflakeLineageExtractor] = None
164
166
 
167
+ self.discovered_datasets: Optional[List[str]] = None
168
+
165
169
  self.aggregator: SqlParsingAggregator = self._exit_stack.enter_context(
166
170
  SqlParsingAggregator(
167
171
  platform=self.identifiers.platform,
@@ -182,6 +186,8 @@ class SnowflakeV2Source(
182
186
  generate_usage_statistics=False,
183
187
  generate_operations=False,
184
188
  format_queries=self.config.format_sql_queries,
189
+ is_temp_table=self._is_temp_table,
190
+ is_allowed_table=self._is_allowed_table,
185
191
  )
186
192
  )
187
193
  self.report.sql_aggregator = self.aggregator.report
@@ -444,6 +450,34 @@ class SnowflakeV2Source(
444
450
 
445
451
  return _report
446
452
 
453
+ def _is_temp_table(self, name: str) -> bool:
454
+ if any(
455
+ re.match(pattern, name, flags=re.IGNORECASE)
456
+ for pattern in self.config.temporary_tables_pattern
457
+ ):
458
+ return True
459
+
460
+ # This is also a temp table if
461
+ # 1. this name would be allowed by the dataset patterns, and
462
+ # 2. we have a list of discovered tables, and
463
+ # 3. it's not in the discovered tables list
464
+ if (
465
+ self.filters.is_dataset_pattern_allowed(name, SnowflakeObjectDomain.TABLE)
466
+ and self.discovered_datasets
467
+ and name not in self.discovered_datasets
468
+ ):
469
+ return True
470
+
471
+ return False
472
+
473
+ def _is_allowed_table(self, name: str) -> bool:
474
+ if self.discovered_datasets and name not in self.discovered_datasets:
475
+ return False
476
+
477
+ return self.filters.is_dataset_pattern_allowed(
478
+ name, SnowflakeObjectDomain.TABLE
479
+ )
480
+
447
481
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
448
482
  return [
449
483
  *super().get_workunit_processors(),
@@ -513,7 +547,7 @@ class SnowflakeV2Source(
513
547
  )
514
548
  return
515
549
 
516
- discovered_datasets = discovered_tables + discovered_views
550
+ self.discovered_datasets = discovered_tables + discovered_views
517
551
 
518
552
  if self.config.use_queries_v2:
519
553
  with self.report.new_stage(f"*: {VIEW_PARSING}"):
@@ -538,13 +572,14 @@ class SnowflakeV2Source(
538
572
  filters=self.filters,
539
573
  identifiers=self.identifiers,
540
574
  schema_resolver=schema_resolver,
541
- discovered_tables=discovered_datasets,
575
+ discovered_tables=self.discovered_datasets,
542
576
  graph=self.ctx.graph,
543
577
  )
544
578
 
545
579
  # TODO: This is slightly suboptimal because we create two SqlParsingAggregator instances with different configs
546
580
  # but a shared schema resolver. That's fine for now though - once we remove the old lineage/usage extractors,
547
581
  # it should be pretty straightforward to refactor this and only initialize the aggregator once.
582
+ # This also applies for the _is_temp_table and _is_allowed_table methods above, duplicated from SnowflakeQueriesExtractor.
548
583
  self.report.queries_extractor = queries_extractor.report
549
584
  yield from queries_extractor.get_workunits_internal()
550
585
  queries_extractor.close()
@@ -568,12 +603,14 @@ class SnowflakeV2Source(
568
603
  if (
569
604
  self.config.include_usage_stats or self.config.include_operational_stats
570
605
  ) and self.usage_extractor:
571
- yield from self.usage_extractor.get_usage_workunits(discovered_datasets)
606
+ yield from self.usage_extractor.get_usage_workunits(
607
+ self.discovered_datasets
608
+ )
572
609
 
573
610
  if self.config.include_assertion_results:
574
611
  yield from SnowflakeAssertionsHandler(
575
612
  self.config, self.report, self.connection, self.identifiers
576
- ).get_assertion_workunits(discovered_datasets)
613
+ ).get_assertion_workunits(self.discovered_datasets)
577
614
 
578
615
  self.connection.close()
579
616