acryl-datahub 0.15.0.5rc4__py3-none-any.whl → 0.15.0.5rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/METADATA +2355 -2355
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/RECORD +18 -15
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/cli/container_cli.py +1 -64
- datahub/emitter/composite_emitter.py +36 -0
- datahub/ingestion/source/apply/__init__.py +0 -0
- datahub/ingestion/source/apply/datahub_apply.py +223 -0
- datahub/ingestion/source/looker/looker_config.py +3 -1
- datahub/ingestion/source/looker/looker_dataclasses.py +8 -0
- datahub/ingestion/source/looker/looker_file_loader.py +14 -3
- datahub/ingestion/source/looker/looker_template_language.py +104 -14
- datahub/ingestion/source/looker/lookml_config.py +16 -2
- datahub/ingestion/source/looker/lookml_source.py +54 -22
- datahub/ingestion/source/snowflake/snowflake_v2.py +41 -4
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
import pathlib
|
|
3
3
|
import re
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
5
|
-
from typing import Any, ClassVar, Dict, List, Optional, Set, Union
|
|
5
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Set, Union
|
|
6
6
|
|
|
7
7
|
from deepmerge import always_merger
|
|
8
8
|
from liquid import Undefined
|
|
@@ -27,8 +27,12 @@ from datahub.ingestion.source.looker.looker_liquid_tag import (
|
|
|
27
27
|
from datahub.ingestion.source.looker.lookml_config import (
|
|
28
28
|
DERIVED_VIEW_PATTERN,
|
|
29
29
|
LookMLSourceConfig,
|
|
30
|
+
LookMLSourceReport,
|
|
30
31
|
)
|
|
31
32
|
|
|
33
|
+
if TYPE_CHECKING:
|
|
34
|
+
from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant
|
|
35
|
+
|
|
32
36
|
logger = logging.getLogger(__name__)
|
|
33
37
|
|
|
34
38
|
|
|
@@ -82,7 +86,12 @@ class SpecialVariable:
|
|
|
82
86
|
return self._create_new_liquid_variables_with_default(variables=variables)
|
|
83
87
|
|
|
84
88
|
|
|
85
|
-
def resolve_liquid_variable(
|
|
89
|
+
def resolve_liquid_variable(
|
|
90
|
+
text: str,
|
|
91
|
+
view_name: str,
|
|
92
|
+
liquid_variable: Dict[Any, Any],
|
|
93
|
+
report: LookMLSourceReport,
|
|
94
|
+
) -> str:
|
|
86
95
|
# Set variable value to NULL if not present in liquid_variable dictionary
|
|
87
96
|
Undefined.__str__ = lambda instance: "NULL" # type: ignore
|
|
88
97
|
try:
|
|
@@ -96,6 +105,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
|
|
|
96
105
|
# Resolve liquid template
|
|
97
106
|
return create_template(text).render(liquid_variable)
|
|
98
107
|
except LiquidSyntaxError as e:
|
|
108
|
+
# TODO: Will add warning once we get rid of duplcate warning message for same view
|
|
99
109
|
logger.warning(f"Unsupported liquid template encountered. error [{e.message}]")
|
|
100
110
|
# TODO: There are some tag specific to looker and python-liquid library does not understand them. currently
|
|
101
111
|
# we are not parsing such liquid template.
|
|
@@ -103,6 +113,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
|
|
|
103
113
|
# See doc: https://cloud.google.com/looker/docs/templated-filters and look for { % condition region %}
|
|
104
114
|
# order.region { % endcondition %}
|
|
105
115
|
except CustomTagException as e:
|
|
116
|
+
# TODO: Will add warning once we get rid of duplcate warning message for same view
|
|
106
117
|
logger.warning(e)
|
|
107
118
|
logger.debug(e, exc_info=e)
|
|
108
119
|
|
|
@@ -192,15 +203,20 @@ class LookMLViewTransformer(ABC):
|
|
|
192
203
|
|
|
193
204
|
source_config: LookMLSourceConfig
|
|
194
205
|
|
|
195
|
-
def __init__(
|
|
206
|
+
def __init__(
|
|
207
|
+
self,
|
|
208
|
+
source_config: LookMLSourceConfig,
|
|
209
|
+
reporter: LookMLSourceReport,
|
|
210
|
+
):
|
|
196
211
|
self.source_config = source_config
|
|
212
|
+
self.reporter = reporter
|
|
197
213
|
|
|
198
214
|
def transform(self, view: dict) -> dict:
|
|
199
215
|
value_to_transform: Optional[str] = None
|
|
200
216
|
|
|
201
|
-
# is_attribute_supported check is required because not all
|
|
202
|
-
#
|
|
203
|
-
# however IncompleteSqlTransformer only transform the derived.sql attribute
|
|
217
|
+
# is_attribute_supported check is required because not all transformers work on all attributes in the current
|
|
218
|
+
# case, mostly all transformers work on sql_table_name and derived.sql attributes;
|
|
219
|
+
# however, IncompleteSqlTransformer only transform the derived.sql attribute
|
|
204
220
|
if SQL_TABLE_NAME in view and self.is_attribute_supported(SQL_TABLE_NAME):
|
|
205
221
|
# Give precedence to already processed transformed view.sql_table_name to apply more transformation
|
|
206
222
|
value_to_transform = view.get(
|
|
@@ -252,7 +268,9 @@ class LiquidVariableTransformer(LookMLViewTransformer):
|
|
|
252
268
|
def _apply_transformation(self, value: str, view: dict) -> str:
|
|
253
269
|
return resolve_liquid_variable(
|
|
254
270
|
text=value,
|
|
255
|
-
liquid_variable=self.source_config.
|
|
271
|
+
liquid_variable=self.source_config.liquid_variables,
|
|
272
|
+
view_name=view["name"],
|
|
273
|
+
report=self.reporter,
|
|
256
274
|
)
|
|
257
275
|
|
|
258
276
|
|
|
@@ -287,7 +305,7 @@ class IncompleteSqlTransformer(LookMLViewTransformer):
|
|
|
287
305
|
|
|
288
306
|
class DropDerivedViewPatternTransformer(LookMLViewTransformer):
|
|
289
307
|
"""
|
|
290
|
-
drop ${} from datahub_transformed_sql_table_name and
|
|
308
|
+
drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values.
|
|
291
309
|
|
|
292
310
|
Example: transform ${employee_income_source.SQL_TABLE_NAME} to employee_income_source.SQL_TABLE_NAME
|
|
293
311
|
"""
|
|
@@ -308,8 +326,8 @@ class LookMlIfCommentTransformer(LookMLViewTransformer):
|
|
|
308
326
|
evaluate_to_true_regx: str
|
|
309
327
|
remove_if_comment_line_regx: str
|
|
310
328
|
|
|
311
|
-
def __init__(self, source_config: LookMLSourceConfig):
|
|
312
|
-
super().__init__(source_config=source_config)
|
|
329
|
+
def __init__(self, source_config: LookMLSourceConfig, reporter: LookMLSourceReport):
|
|
330
|
+
super().__init__(source_config=source_config, reporter=reporter)
|
|
313
331
|
|
|
314
332
|
# This regx will keep whatever after -- if looker_environment --
|
|
315
333
|
self.evaluate_to_true_regx = r"-- if {} --".format(
|
|
@@ -335,6 +353,61 @@ class LookMlIfCommentTransformer(LookMLViewTransformer):
|
|
|
335
353
|
return self._apply_regx(value)
|
|
336
354
|
|
|
337
355
|
|
|
356
|
+
class LookmlConstantTransformer(LookMLViewTransformer):
|
|
357
|
+
"""
|
|
358
|
+
Replace LookML constants @{constant} from the manifest/configuration.
|
|
359
|
+
"""
|
|
360
|
+
|
|
361
|
+
CONSTANT_PATTERN = r"@{(\w+)}" # Matches @{constant}
|
|
362
|
+
|
|
363
|
+
def __init__(
|
|
364
|
+
self,
|
|
365
|
+
source_config: LookMLSourceConfig,
|
|
366
|
+
reporter: LookMLSourceReport,
|
|
367
|
+
manifest_constants: Dict[str, "LookerConstant"],
|
|
368
|
+
):
|
|
369
|
+
super().__init__(source_config=source_config, reporter=reporter)
|
|
370
|
+
self.manifest_constants = manifest_constants
|
|
371
|
+
|
|
372
|
+
def resolve_lookml_constant(self, text: str, view_name: Optional[str]) -> str:
|
|
373
|
+
"""
|
|
374
|
+
Resolves LookML constants (@{ }) from manifest or config.
|
|
375
|
+
Logs warnings for misplaced or missing variables.
|
|
376
|
+
"""
|
|
377
|
+
|
|
378
|
+
def replace_constants(match):
|
|
379
|
+
key = match.group(1)
|
|
380
|
+
# Resolve constant from config
|
|
381
|
+
if key in self.source_config.lookml_constants:
|
|
382
|
+
return str(self.source_config.lookml_constants.get(key))
|
|
383
|
+
|
|
384
|
+
# Resolve constant from manifest
|
|
385
|
+
if key in self.manifest_constants:
|
|
386
|
+
return self.manifest_constants[key].value
|
|
387
|
+
|
|
388
|
+
# Check if it's a misplaced lookml constant
|
|
389
|
+
if key in self.source_config.liquid_variables:
|
|
390
|
+
self.reporter.warning(
|
|
391
|
+
title="Misplaced lookml constant",
|
|
392
|
+
message="Use 'lookml_constants' instead of 'liquid_variables'.",
|
|
393
|
+
context=f"Key {key}",
|
|
394
|
+
)
|
|
395
|
+
return f"@{{{key}}}"
|
|
396
|
+
|
|
397
|
+
self.reporter.warning(
|
|
398
|
+
title="LookML constant not found",
|
|
399
|
+
message="The constant is missing. Either add it under 'lookml_constants' in the config or define it in `manifest.lkml`.",
|
|
400
|
+
context=f"view-name: {view_name}, constant: {key}",
|
|
401
|
+
)
|
|
402
|
+
return f"@{{{key}}}"
|
|
403
|
+
|
|
404
|
+
# Resolve @{} (constant)
|
|
405
|
+
return re.sub(self.CONSTANT_PATTERN, replace_constants, text)
|
|
406
|
+
|
|
407
|
+
def _apply_transformation(self, value: str, view: dict) -> str:
|
|
408
|
+
return self.resolve_lookml_constant(text=value, view_name=view.get("name"))
|
|
409
|
+
|
|
410
|
+
|
|
338
411
|
class TransformedLookMlView:
|
|
339
412
|
"""
|
|
340
413
|
TransformedLookMlView is collecting output of LookMLViewTransformer and creating a new transformed LookML view.
|
|
@@ -390,24 +463,35 @@ class TransformedLookMlView:
|
|
|
390
463
|
def process_lookml_template_language(
|
|
391
464
|
source_config: LookMLSourceConfig,
|
|
392
465
|
view_lkml_file_dict: dict,
|
|
466
|
+
reporter: LookMLSourceReport,
|
|
467
|
+
manifest_constants: Dict[str, "LookerConstant"] = {},
|
|
468
|
+
resolve_constants: bool = False,
|
|
393
469
|
) -> None:
|
|
394
470
|
if "views" not in view_lkml_file_dict:
|
|
395
471
|
return
|
|
396
472
|
|
|
397
473
|
transformers: List[LookMLViewTransformer] = [
|
|
398
474
|
LookMlIfCommentTransformer(
|
|
399
|
-
source_config=source_config
|
|
475
|
+
source_config=source_config, reporter=reporter
|
|
400
476
|
), # First evaluate the -- if -- comments. Looker does the same
|
|
401
477
|
LiquidVariableTransformer(
|
|
402
|
-
source_config=source_config
|
|
478
|
+
source_config=source_config, reporter=reporter
|
|
403
479
|
), # Now resolve liquid variables
|
|
404
480
|
DropDerivedViewPatternTransformer(
|
|
405
|
-
source_config=source_config
|
|
481
|
+
source_config=source_config, reporter=reporter
|
|
406
482
|
), # Remove any ${} symbol
|
|
407
483
|
IncompleteSqlTransformer(
|
|
408
|
-
source_config=source_config
|
|
484
|
+
source_config=source_config, reporter=reporter
|
|
409
485
|
), # complete any incomplete sql
|
|
410
486
|
]
|
|
487
|
+
if resolve_constants:
|
|
488
|
+
transformers.append(
|
|
489
|
+
LookmlConstantTransformer(
|
|
490
|
+
source_config=source_config,
|
|
491
|
+
manifest_constants=manifest_constants,
|
|
492
|
+
reporter=reporter,
|
|
493
|
+
), # Resolve @{} constant with its corresponding value
|
|
494
|
+
)
|
|
411
495
|
|
|
412
496
|
transformed_views: List[dict] = []
|
|
413
497
|
|
|
@@ -422,12 +506,18 @@ def process_lookml_template_language(
|
|
|
422
506
|
def load_and_preprocess_file(
|
|
423
507
|
path: Union[str, pathlib.Path],
|
|
424
508
|
source_config: LookMLSourceConfig,
|
|
509
|
+
reporter: LookMLSourceReport,
|
|
510
|
+
manifest_constants: Dict[str, "LookerConstant"] = {},
|
|
511
|
+
resolve_constants: bool = False,
|
|
425
512
|
) -> dict:
|
|
426
513
|
parsed = load_lkml(path)
|
|
427
514
|
|
|
428
515
|
process_lookml_template_language(
|
|
429
516
|
view_lkml_file_dict=parsed,
|
|
517
|
+
reporter=reporter,
|
|
430
518
|
source_config=source_config,
|
|
519
|
+
manifest_constants=manifest_constants,
|
|
520
|
+
resolve_constants=resolve_constants,
|
|
431
521
|
)
|
|
432
522
|
|
|
433
523
|
return parsed
|
|
@@ -161,13 +161,27 @@ class LookMLSourceConfig(
|
|
|
161
161
|
description="When enabled, looker refinement will be processed to adapt an existing view.",
|
|
162
162
|
)
|
|
163
163
|
|
|
164
|
-
|
|
164
|
+
liquid_variables: Dict[Any, Any] = Field(
|
|
165
165
|
{},
|
|
166
|
-
description="A dictionary containing Liquid variables
|
|
166
|
+
description="A dictionary containing Liquid variables with their corresponding values, utilized in SQL-defined "
|
|
167
167
|
"derived views. The Liquid template will be resolved in view.derived_table.sql and "
|
|
168
168
|
"view.sql_table_name. Defaults to an empty dictionary.",
|
|
169
169
|
)
|
|
170
170
|
|
|
171
|
+
_liquid_variable_deprecated = pydantic_renamed_field(
|
|
172
|
+
old_name="liquid_variable", new_name="liquid_variables", print_warning=True
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
lookml_constants: Dict[str, str] = Field(
|
|
176
|
+
{},
|
|
177
|
+
description=(
|
|
178
|
+
"A dictionary containing LookML constants (`@{constant_name}`) and their values. "
|
|
179
|
+
"If a constant is defined in the `manifest.lkml` file, its value will be used. "
|
|
180
|
+
"If not found in the manifest, the value from this config will be used instead. "
|
|
181
|
+
"Defaults to an empty dictionary."
|
|
182
|
+
),
|
|
183
|
+
)
|
|
184
|
+
|
|
171
185
|
looker_environment: Literal["prod", "dev"] = Field(
|
|
172
186
|
"prod",
|
|
173
187
|
description="A looker prod or dev environment. "
|
|
@@ -43,6 +43,7 @@ from datahub.ingestion.source.looker.looker_common import (
|
|
|
43
43
|
from datahub.ingestion.source.looker.looker_connection import (
|
|
44
44
|
get_connection_def_based_on_connection_string,
|
|
45
45
|
)
|
|
46
|
+
from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant
|
|
46
47
|
from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI
|
|
47
48
|
from datahub.ingestion.source.looker.looker_template_language import (
|
|
48
49
|
load_and_preprocess_file,
|
|
@@ -254,6 +255,7 @@ class LookerManifest:
|
|
|
254
255
|
# This must be set if the manifest has local_dependency entries.
|
|
255
256
|
# See https://cloud.google.com/looker/docs/reference/param-manifest-project-name
|
|
256
257
|
project_name: Optional[str]
|
|
258
|
+
constants: Optional[List[Dict[str, str]]]
|
|
257
259
|
|
|
258
260
|
local_dependencies: List[str]
|
|
259
261
|
remote_dependencies: List[LookerRemoteDependency]
|
|
@@ -310,11 +312,14 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
310
312
|
"manage_models permission enabled on this API key."
|
|
311
313
|
) from err
|
|
312
314
|
|
|
315
|
+
self.manifest_constants: Dict[str, "LookerConstant"] = {}
|
|
316
|
+
|
|
313
317
|
def _load_model(self, path: str) -> LookerModel:
|
|
314
318
|
logger.debug(f"Loading model from file {path}")
|
|
315
319
|
|
|
316
320
|
parsed = load_and_preprocess_file(
|
|
317
321
|
path=path,
|
|
322
|
+
reporter=self.reporter,
|
|
318
323
|
source_config=self.source_config,
|
|
319
324
|
)
|
|
320
325
|
|
|
@@ -500,27 +505,33 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
500
505
|
|
|
501
506
|
def get_manifest_if_present(self, folder: pathlib.Path) -> Optional[LookerManifest]:
|
|
502
507
|
manifest_file = folder / "manifest.lkml"
|
|
503
|
-
if manifest_file.exists():
|
|
504
|
-
manifest_dict = load_and_preprocess_file(
|
|
505
|
-
path=manifest_file, source_config=self.source_config
|
|
506
|
-
)
|
|
507
508
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
],
|
|
513
|
-
remote_dependencies=[
|
|
514
|
-
LookerRemoteDependency(
|
|
515
|
-
name=x["name"], url=x["url"], ref=x.get("ref")
|
|
516
|
-
)
|
|
517
|
-
for x in manifest_dict.get("remote_dependencys", [])
|
|
518
|
-
],
|
|
509
|
+
if not manifest_file.exists():
|
|
510
|
+
self.reporter.info(
|
|
511
|
+
message="manifest.lkml file missing from project",
|
|
512
|
+
context=str(manifest_file),
|
|
519
513
|
)
|
|
520
|
-
return manifest
|
|
521
|
-
else:
|
|
522
514
|
return None
|
|
523
515
|
|
|
516
|
+
manifest_dict = load_and_preprocess_file(
|
|
517
|
+
path=manifest_file,
|
|
518
|
+
source_config=self.source_config,
|
|
519
|
+
reporter=self.reporter,
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
manifest = LookerManifest(
|
|
523
|
+
project_name=manifest_dict.get("project_name"),
|
|
524
|
+
constants=manifest_dict.get("constants", []),
|
|
525
|
+
local_dependencies=[
|
|
526
|
+
x["project"] for x in manifest_dict.get("local_dependencys", [])
|
|
527
|
+
],
|
|
528
|
+
remote_dependencies=[
|
|
529
|
+
LookerRemoteDependency(name=x["name"], url=x["url"], ref=x.get("ref"))
|
|
530
|
+
for x in manifest_dict.get("remote_dependencys", [])
|
|
531
|
+
],
|
|
532
|
+
)
|
|
533
|
+
return manifest
|
|
534
|
+
|
|
524
535
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
525
536
|
return [
|
|
526
537
|
*super().get_workunit_processors(),
|
|
@@ -575,7 +586,10 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
575
586
|
self.base_projects_folder[project] = p_ref
|
|
576
587
|
|
|
577
588
|
self._recursively_check_manifests(
|
|
578
|
-
tmp_dir,
|
|
589
|
+
tmp_dir,
|
|
590
|
+
BASE_PROJECT_NAME,
|
|
591
|
+
visited_projects,
|
|
592
|
+
self.manifest_constants,
|
|
579
593
|
)
|
|
580
594
|
|
|
581
595
|
yield from self.get_internal_workunits()
|
|
@@ -588,7 +602,11 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
588
602
|
)
|
|
589
603
|
|
|
590
604
|
def _recursively_check_manifests(
|
|
591
|
-
self,
|
|
605
|
+
self,
|
|
606
|
+
tmp_dir: str,
|
|
607
|
+
project_name: str,
|
|
608
|
+
project_visited: Set[str],
|
|
609
|
+
manifest_constants: Dict[str, "LookerConstant"],
|
|
592
610
|
) -> None:
|
|
593
611
|
if project_name in project_visited:
|
|
594
612
|
return
|
|
@@ -605,6 +623,14 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
605
623
|
if not manifest:
|
|
606
624
|
return
|
|
607
625
|
|
|
626
|
+
if manifest.constants:
|
|
627
|
+
for constant in manifest.constants:
|
|
628
|
+
if constant.get("name") and constant.get("value"):
|
|
629
|
+
manifest_constants[constant["name"]] = LookerConstant(
|
|
630
|
+
name=constant["name"],
|
|
631
|
+
value=constant["value"],
|
|
632
|
+
)
|
|
633
|
+
|
|
608
634
|
# Special case handling if the root project has a name in the manifest file.
|
|
609
635
|
if project_name == BASE_PROJECT_NAME and manifest.project_name:
|
|
610
636
|
if (
|
|
@@ -664,21 +690,27 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
664
690
|
project_visited.add(project_name)
|
|
665
691
|
else:
|
|
666
692
|
self._recursively_check_manifests(
|
|
667
|
-
tmp_dir,
|
|
693
|
+
tmp_dir,
|
|
694
|
+
remote_project.name,
|
|
695
|
+
project_visited,
|
|
696
|
+
manifest_constants,
|
|
668
697
|
)
|
|
669
698
|
|
|
670
699
|
for project in manifest.local_dependencies:
|
|
671
|
-
self._recursively_check_manifests(
|
|
700
|
+
self._recursively_check_manifests(
|
|
701
|
+
tmp_dir, project, project_visited, manifest_constants
|
|
702
|
+
)
|
|
672
703
|
|
|
673
704
|
def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901
|
|
674
705
|
assert self.source_config.base_folder
|
|
675
|
-
|
|
676
706
|
viewfile_loader = LookerViewFileLoader(
|
|
677
707
|
self.source_config.project_name,
|
|
678
708
|
self.base_projects_folder,
|
|
679
709
|
self.reporter,
|
|
680
710
|
self.source_config,
|
|
711
|
+
self.manifest_constants,
|
|
681
712
|
)
|
|
713
|
+
logger.debug(f"LookML Constants : {', '.join(self.manifest_constants.keys())}")
|
|
682
714
|
|
|
683
715
|
# Some views can be mentioned by multiple 'include' statements and can be included via different connections.
|
|
684
716
|
|
|
@@ -5,6 +5,7 @@ import logging
|
|
|
5
5
|
import os
|
|
6
6
|
import os.path
|
|
7
7
|
import platform
|
|
8
|
+
import re
|
|
8
9
|
from dataclasses import dataclass
|
|
9
10
|
from typing import Dict, Iterable, List, Optional, Union
|
|
10
11
|
|
|
@@ -33,6 +34,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
33
34
|
from datahub.ingestion.source.snowflake.constants import (
|
|
34
35
|
GENERIC_PERMISSION_ERROR_KEY,
|
|
35
36
|
SnowflakeEdition,
|
|
37
|
+
SnowflakeObjectDomain,
|
|
36
38
|
)
|
|
37
39
|
from datahub.ingestion.source.snowflake.snowflake_assertion import (
|
|
38
40
|
SnowflakeAssertionsHandler,
|
|
@@ -162,6 +164,8 @@ class SnowflakeV2Source(
|
|
|
162
164
|
self.data_dictionary = SnowflakeDataDictionary(connection=self.connection)
|
|
163
165
|
self.lineage_extractor: Optional[SnowflakeLineageExtractor] = None
|
|
164
166
|
|
|
167
|
+
self.discovered_datasets: Optional[List[str]] = None
|
|
168
|
+
|
|
165
169
|
self.aggregator: SqlParsingAggregator = self._exit_stack.enter_context(
|
|
166
170
|
SqlParsingAggregator(
|
|
167
171
|
platform=self.identifiers.platform,
|
|
@@ -182,6 +186,8 @@ class SnowflakeV2Source(
|
|
|
182
186
|
generate_usage_statistics=False,
|
|
183
187
|
generate_operations=False,
|
|
184
188
|
format_queries=self.config.format_sql_queries,
|
|
189
|
+
is_temp_table=self._is_temp_table,
|
|
190
|
+
is_allowed_table=self._is_allowed_table,
|
|
185
191
|
)
|
|
186
192
|
)
|
|
187
193
|
self.report.sql_aggregator = self.aggregator.report
|
|
@@ -444,6 +450,34 @@ class SnowflakeV2Source(
|
|
|
444
450
|
|
|
445
451
|
return _report
|
|
446
452
|
|
|
453
|
+
def _is_temp_table(self, name: str) -> bool:
|
|
454
|
+
if any(
|
|
455
|
+
re.match(pattern, name, flags=re.IGNORECASE)
|
|
456
|
+
for pattern in self.config.temporary_tables_pattern
|
|
457
|
+
):
|
|
458
|
+
return True
|
|
459
|
+
|
|
460
|
+
# This is also a temp table if
|
|
461
|
+
# 1. this name would be allowed by the dataset patterns, and
|
|
462
|
+
# 2. we have a list of discovered tables, and
|
|
463
|
+
# 3. it's not in the discovered tables list
|
|
464
|
+
if (
|
|
465
|
+
self.filters.is_dataset_pattern_allowed(name, SnowflakeObjectDomain.TABLE)
|
|
466
|
+
and self.discovered_datasets
|
|
467
|
+
and name not in self.discovered_datasets
|
|
468
|
+
):
|
|
469
|
+
return True
|
|
470
|
+
|
|
471
|
+
return False
|
|
472
|
+
|
|
473
|
+
def _is_allowed_table(self, name: str) -> bool:
|
|
474
|
+
if self.discovered_datasets and name not in self.discovered_datasets:
|
|
475
|
+
return False
|
|
476
|
+
|
|
477
|
+
return self.filters.is_dataset_pattern_allowed(
|
|
478
|
+
name, SnowflakeObjectDomain.TABLE
|
|
479
|
+
)
|
|
480
|
+
|
|
447
481
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
448
482
|
return [
|
|
449
483
|
*super().get_workunit_processors(),
|
|
@@ -513,7 +547,7 @@ class SnowflakeV2Source(
|
|
|
513
547
|
)
|
|
514
548
|
return
|
|
515
549
|
|
|
516
|
-
discovered_datasets = discovered_tables + discovered_views
|
|
550
|
+
self.discovered_datasets = discovered_tables + discovered_views
|
|
517
551
|
|
|
518
552
|
if self.config.use_queries_v2:
|
|
519
553
|
with self.report.new_stage(f"*: {VIEW_PARSING}"):
|
|
@@ -538,13 +572,14 @@ class SnowflakeV2Source(
|
|
|
538
572
|
filters=self.filters,
|
|
539
573
|
identifiers=self.identifiers,
|
|
540
574
|
schema_resolver=schema_resolver,
|
|
541
|
-
discovered_tables=discovered_datasets,
|
|
575
|
+
discovered_tables=self.discovered_datasets,
|
|
542
576
|
graph=self.ctx.graph,
|
|
543
577
|
)
|
|
544
578
|
|
|
545
579
|
# TODO: This is slightly suboptimal because we create two SqlParsingAggregator instances with different configs
|
|
546
580
|
# but a shared schema resolver. That's fine for now though - once we remove the old lineage/usage extractors,
|
|
547
581
|
# it should be pretty straightforward to refactor this and only initialize the aggregator once.
|
|
582
|
+
# This also applies for the _is_temp_table and _is_allowed_table methods above, duplicated from SnowflakeQueriesExtractor.
|
|
548
583
|
self.report.queries_extractor = queries_extractor.report
|
|
549
584
|
yield from queries_extractor.get_workunits_internal()
|
|
550
585
|
queries_extractor.close()
|
|
@@ -568,12 +603,14 @@ class SnowflakeV2Source(
|
|
|
568
603
|
if (
|
|
569
604
|
self.config.include_usage_stats or self.config.include_operational_stats
|
|
570
605
|
) and self.usage_extractor:
|
|
571
|
-
yield from self.usage_extractor.get_usage_workunits(
|
|
606
|
+
yield from self.usage_extractor.get_usage_workunits(
|
|
607
|
+
self.discovered_datasets
|
|
608
|
+
)
|
|
572
609
|
|
|
573
610
|
if self.config.include_assertion_results:
|
|
574
611
|
yield from SnowflakeAssertionsHandler(
|
|
575
612
|
self.config, self.report, self.connection, self.identifiers
|
|
576
|
-
).get_assertion_workunits(discovered_datasets)
|
|
613
|
+
).get_assertion_workunits(self.discovered_datasets)
|
|
577
614
|
|
|
578
615
|
self.connection.close()
|
|
579
616
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|