acryl-datahub 0.15.0.4rc2__py3-none-any.whl → 0.15.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- acryl_datahub-0.15.0.5.dist-info/LICENSE +202 -0
- {acryl_datahub-0.15.0.4rc2.dist-info → acryl_datahub-0.15.0.5.dist-info}/METADATA +2444 -2404
- {acryl_datahub-0.15.0.4rc2.dist-info → acryl_datahub-0.15.0.5.dist-info}/RECORD +96 -86
- {acryl_datahub-0.15.0.4rc2.dist-info → acryl_datahub-0.15.0.5.dist-info}/entry_points.txt +1 -0
- datahub/__init__.py +1 -25
- datahub/_version.py +13 -0
- datahub/api/entities/dataprocess/dataprocess_instance.py +104 -11
- datahub/cli/check_cli.py +1 -1
- datahub/cli/cli_utils.py +3 -3
- datahub/cli/container_cli.py +1 -64
- datahub/cli/iceberg_cli.py +707 -0
- datahub/cli/ingest_cli.py +2 -2
- datahub/emitter/composite_emitter.py +36 -0
- datahub/emitter/rest_emitter.py +1 -1
- datahub/entrypoints.py +26 -5
- datahub/ingestion/api/incremental_lineage_helper.py +4 -0
- datahub/ingestion/api/registry.py +4 -2
- datahub/ingestion/glossary/classification_mixin.py +6 -0
- datahub/ingestion/glossary/classifier.py +3 -2
- datahub/ingestion/graph/client.py +2 -1
- datahub/ingestion/graph/entity_versioning.py +201 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/run/connection.py +1 -1
- datahub/ingestion/run/pipeline.py +3 -3
- datahub/ingestion/source/abs/report.py +2 -2
- datahub/ingestion/source/apply/__init__.py +0 -0
- datahub/ingestion/source/apply/datahub_apply.py +223 -0
- datahub/ingestion/source/aws/glue.py +15 -6
- datahub/ingestion/source/aws/sagemaker_processors/common.py +3 -2
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +1 -1
- datahub/ingestion/source/dbt/dbt_core.py +1 -1
- datahub/ingestion/source/delta_lake/report.py +2 -2
- datahub/ingestion/source/dynamodb/dynamodb.py +2 -1
- datahub/ingestion/source/elastic_search.py +2 -1
- datahub/ingestion/source/ge_profiling_config.py +11 -7
- datahub/ingestion/source/iceberg/iceberg_common.py +3 -2
- datahub/ingestion/source/identity/azure_ad.py +6 -14
- datahub/ingestion/source/identity/okta.py +2 -1
- datahub/ingestion/source/kafka/kafka.py +2 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -1
- datahub/ingestion/source/ldap.py +2 -1
- datahub/ingestion/source/looker/looker_config.py +3 -1
- datahub/ingestion/source/looker/looker_dataclasses.py +8 -0
- datahub/ingestion/source/looker/looker_file_loader.py +14 -3
- datahub/ingestion/source/looker/looker_template_language.py +104 -14
- datahub/ingestion/source/looker/lookml_config.py +29 -8
- datahub/ingestion/source/looker/lookml_source.py +110 -22
- datahub/ingestion/source/mode.py +2 -4
- datahub/ingestion/source/mongodb.py +2 -1
- datahub/ingestion/source/nifi.py +2 -1
- datahub/ingestion/source/powerbi/config.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -1
- datahub/ingestion/source/redash.py +5 -5
- datahub/ingestion/source/salesforce.py +4 -1
- datahub/ingestion/source/slack/slack.py +6 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
- datahub/ingestion/source/snowflake/snowflake_query.py +11 -0
- datahub/ingestion/source/snowflake/snowflake_report.py +3 -1
- datahub/ingestion/source/snowflake/snowflake_schema.py +17 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +35 -43
- datahub/ingestion/source/snowflake/snowflake_tag.py +57 -3
- datahub/ingestion/source/snowflake/snowflake_v2.py +42 -4
- datahub/ingestion/source/sql/clickhouse.py +5 -43
- datahub/ingestion/source/sql/mssql/job_models.py +37 -8
- datahub/ingestion/source/sql/mssql/source.py +17 -0
- datahub/ingestion/source/sql/sql_config.py +0 -10
- datahub/ingestion/source/tableau/tableau.py +16 -13
- datahub/ingestion/source/tableau/tableau_common.py +1 -1
- datahub/ingestion/source/unity/ge_profiler.py +55 -4
- datahub/ingestion/source/unity/proxy.py +2 -2
- datahub/ingestion/source/unity/report.py +1 -0
- datahub/ingestion/source_config/operation_config.py +9 -0
- datahub/ingestion/source_report/pulsar.py +5 -4
- datahub/metadata/_schema_classes.py +304 -6
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatforminstance/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataset/__init__.py +2 -0
- datahub/metadata/schema.avsc +211 -12
- datahub/metadata/schemas/AssertionInfo.avsc +2 -2
- datahub/metadata/schemas/CorpUserSettings.avsc +9 -0
- datahub/metadata/schemas/DashboardInfo.avsc +5 -5
- datahub/metadata/schemas/DataPlatformInstanceKey.avsc +2 -1
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/Deprecation.avsc +12 -0
- datahub/metadata/schemas/DisplayProperties.avsc +62 -0
- datahub/metadata/schemas/IcebergCatalogInfo.avsc +28 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +92 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +17 -5
- datahub/metadata/schemas/PostInfo.avsc +28 -2
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/specific/dashboard.py +43 -1
- datahub/telemetry/telemetry.py +4 -4
- datahub/testing/check_imports.py +28 -0
- datahub/upgrade/upgrade.py +17 -9
- {acryl_datahub-0.15.0.4rc2.dist-info → acryl_datahub-0.15.0.5.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.4rc2.dist-info → acryl_datahub-0.15.0.5.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
import pathlib
|
|
3
3
|
import re
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
5
|
-
from typing import Any, ClassVar, Dict, List, Optional, Set, Union
|
|
5
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Set, Union
|
|
6
6
|
|
|
7
7
|
from deepmerge import always_merger
|
|
8
8
|
from liquid import Undefined
|
|
@@ -27,8 +27,12 @@ from datahub.ingestion.source.looker.looker_liquid_tag import (
|
|
|
27
27
|
from datahub.ingestion.source.looker.lookml_config import (
|
|
28
28
|
DERIVED_VIEW_PATTERN,
|
|
29
29
|
LookMLSourceConfig,
|
|
30
|
+
LookMLSourceReport,
|
|
30
31
|
)
|
|
31
32
|
|
|
33
|
+
if TYPE_CHECKING:
|
|
34
|
+
from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant
|
|
35
|
+
|
|
32
36
|
logger = logging.getLogger(__name__)
|
|
33
37
|
|
|
34
38
|
|
|
@@ -82,7 +86,12 @@ class SpecialVariable:
|
|
|
82
86
|
return self._create_new_liquid_variables_with_default(variables=variables)
|
|
83
87
|
|
|
84
88
|
|
|
85
|
-
def resolve_liquid_variable(
|
|
89
|
+
def resolve_liquid_variable(
|
|
90
|
+
text: str,
|
|
91
|
+
view_name: str,
|
|
92
|
+
liquid_variable: Dict[Any, Any],
|
|
93
|
+
report: LookMLSourceReport,
|
|
94
|
+
) -> str:
|
|
86
95
|
# Set variable value to NULL if not present in liquid_variable dictionary
|
|
87
96
|
Undefined.__str__ = lambda instance: "NULL" # type: ignore
|
|
88
97
|
try:
|
|
@@ -96,6 +105,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
|
|
|
96
105
|
# Resolve liquid template
|
|
97
106
|
return create_template(text).render(liquid_variable)
|
|
98
107
|
except LiquidSyntaxError as e:
|
|
108
|
+
# TODO: Will add warning once we get rid of duplcate warning message for same view
|
|
99
109
|
logger.warning(f"Unsupported liquid template encountered. error [{e.message}]")
|
|
100
110
|
# TODO: There are some tag specific to looker and python-liquid library does not understand them. currently
|
|
101
111
|
# we are not parsing such liquid template.
|
|
@@ -103,6 +113,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
|
|
|
103
113
|
# See doc: https://cloud.google.com/looker/docs/templated-filters and look for { % condition region %}
|
|
104
114
|
# order.region { % endcondition %}
|
|
105
115
|
except CustomTagException as e:
|
|
116
|
+
# TODO: Will add warning once we get rid of duplcate warning message for same view
|
|
106
117
|
logger.warning(e)
|
|
107
118
|
logger.debug(e, exc_info=e)
|
|
108
119
|
|
|
@@ -192,15 +203,20 @@ class LookMLViewTransformer(ABC):
|
|
|
192
203
|
|
|
193
204
|
source_config: LookMLSourceConfig
|
|
194
205
|
|
|
195
|
-
def __init__(
|
|
206
|
+
def __init__(
|
|
207
|
+
self,
|
|
208
|
+
source_config: LookMLSourceConfig,
|
|
209
|
+
reporter: LookMLSourceReport,
|
|
210
|
+
):
|
|
196
211
|
self.source_config = source_config
|
|
212
|
+
self.reporter = reporter
|
|
197
213
|
|
|
198
214
|
def transform(self, view: dict) -> dict:
|
|
199
215
|
value_to_transform: Optional[str] = None
|
|
200
216
|
|
|
201
|
-
# is_attribute_supported check is required because not all
|
|
202
|
-
#
|
|
203
|
-
# however IncompleteSqlTransformer only transform the derived.sql attribute
|
|
217
|
+
# is_attribute_supported check is required because not all transformers work on all attributes in the current
|
|
218
|
+
# case, mostly all transformers work on sql_table_name and derived.sql attributes;
|
|
219
|
+
# however, IncompleteSqlTransformer only transform the derived.sql attribute
|
|
204
220
|
if SQL_TABLE_NAME in view and self.is_attribute_supported(SQL_TABLE_NAME):
|
|
205
221
|
# Give precedence to already processed transformed view.sql_table_name to apply more transformation
|
|
206
222
|
value_to_transform = view.get(
|
|
@@ -252,7 +268,9 @@ class LiquidVariableTransformer(LookMLViewTransformer):
|
|
|
252
268
|
def _apply_transformation(self, value: str, view: dict) -> str:
|
|
253
269
|
return resolve_liquid_variable(
|
|
254
270
|
text=value,
|
|
255
|
-
liquid_variable=self.source_config.
|
|
271
|
+
liquid_variable=self.source_config.liquid_variables,
|
|
272
|
+
view_name=view["name"],
|
|
273
|
+
report=self.reporter,
|
|
256
274
|
)
|
|
257
275
|
|
|
258
276
|
|
|
@@ -287,7 +305,7 @@ class IncompleteSqlTransformer(LookMLViewTransformer):
|
|
|
287
305
|
|
|
288
306
|
class DropDerivedViewPatternTransformer(LookMLViewTransformer):
|
|
289
307
|
"""
|
|
290
|
-
drop ${} from datahub_transformed_sql_table_name and
|
|
308
|
+
drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values.
|
|
291
309
|
|
|
292
310
|
Example: transform ${employee_income_source.SQL_TABLE_NAME} to employee_income_source.SQL_TABLE_NAME
|
|
293
311
|
"""
|
|
@@ -308,8 +326,8 @@ class LookMlIfCommentTransformer(LookMLViewTransformer):
|
|
|
308
326
|
evaluate_to_true_regx: str
|
|
309
327
|
remove_if_comment_line_regx: str
|
|
310
328
|
|
|
311
|
-
def __init__(self, source_config: LookMLSourceConfig):
|
|
312
|
-
super().__init__(source_config=source_config)
|
|
329
|
+
def __init__(self, source_config: LookMLSourceConfig, reporter: LookMLSourceReport):
|
|
330
|
+
super().__init__(source_config=source_config, reporter=reporter)
|
|
313
331
|
|
|
314
332
|
# This regx will keep whatever after -- if looker_environment --
|
|
315
333
|
self.evaluate_to_true_regx = r"-- if {} --".format(
|
|
@@ -335,6 +353,61 @@ class LookMlIfCommentTransformer(LookMLViewTransformer):
|
|
|
335
353
|
return self._apply_regx(value)
|
|
336
354
|
|
|
337
355
|
|
|
356
|
+
class LookmlConstantTransformer(LookMLViewTransformer):
|
|
357
|
+
"""
|
|
358
|
+
Replace LookML constants @{constant} from the manifest/configuration.
|
|
359
|
+
"""
|
|
360
|
+
|
|
361
|
+
CONSTANT_PATTERN = r"@{(\w+)}" # Matches @{constant}
|
|
362
|
+
|
|
363
|
+
def __init__(
|
|
364
|
+
self,
|
|
365
|
+
source_config: LookMLSourceConfig,
|
|
366
|
+
reporter: LookMLSourceReport,
|
|
367
|
+
manifest_constants: Dict[str, "LookerConstant"],
|
|
368
|
+
):
|
|
369
|
+
super().__init__(source_config=source_config, reporter=reporter)
|
|
370
|
+
self.manifest_constants = manifest_constants
|
|
371
|
+
|
|
372
|
+
def resolve_lookml_constant(self, text: str, view_name: Optional[str]) -> str:
|
|
373
|
+
"""
|
|
374
|
+
Resolves LookML constants (@{ }) from manifest or config.
|
|
375
|
+
Logs warnings for misplaced or missing variables.
|
|
376
|
+
"""
|
|
377
|
+
|
|
378
|
+
def replace_constants(match):
|
|
379
|
+
key = match.group(1)
|
|
380
|
+
# Resolve constant from config
|
|
381
|
+
if key in self.source_config.lookml_constants:
|
|
382
|
+
return str(self.source_config.lookml_constants.get(key))
|
|
383
|
+
|
|
384
|
+
# Resolve constant from manifest
|
|
385
|
+
if key in self.manifest_constants:
|
|
386
|
+
return self.manifest_constants[key].value
|
|
387
|
+
|
|
388
|
+
# Check if it's a misplaced lookml constant
|
|
389
|
+
if key in self.source_config.liquid_variables:
|
|
390
|
+
self.reporter.warning(
|
|
391
|
+
title="Misplaced lookml constant",
|
|
392
|
+
message="Use 'lookml_constants' instead of 'liquid_variables'.",
|
|
393
|
+
context=f"Key {key}",
|
|
394
|
+
)
|
|
395
|
+
return f"@{{{key}}}"
|
|
396
|
+
|
|
397
|
+
self.reporter.warning(
|
|
398
|
+
title="LookML constant not found",
|
|
399
|
+
message="The constant is missing. Either add it under 'lookml_constants' in the config or define it in `manifest.lkml`.",
|
|
400
|
+
context=f"view-name: {view_name}, constant: {key}",
|
|
401
|
+
)
|
|
402
|
+
return f"@{{{key}}}"
|
|
403
|
+
|
|
404
|
+
# Resolve @{} (constant)
|
|
405
|
+
return re.sub(self.CONSTANT_PATTERN, replace_constants, text)
|
|
406
|
+
|
|
407
|
+
def _apply_transformation(self, value: str, view: dict) -> str:
|
|
408
|
+
return self.resolve_lookml_constant(text=value, view_name=view.get("name"))
|
|
409
|
+
|
|
410
|
+
|
|
338
411
|
class TransformedLookMlView:
|
|
339
412
|
"""
|
|
340
413
|
TransformedLookMlView is collecting output of LookMLViewTransformer and creating a new transformed LookML view.
|
|
@@ -390,24 +463,35 @@ class TransformedLookMlView:
|
|
|
390
463
|
def process_lookml_template_language(
|
|
391
464
|
source_config: LookMLSourceConfig,
|
|
392
465
|
view_lkml_file_dict: dict,
|
|
466
|
+
reporter: LookMLSourceReport,
|
|
467
|
+
manifest_constants: Dict[str, "LookerConstant"] = {},
|
|
468
|
+
resolve_constants: bool = False,
|
|
393
469
|
) -> None:
|
|
394
470
|
if "views" not in view_lkml_file_dict:
|
|
395
471
|
return
|
|
396
472
|
|
|
397
473
|
transformers: List[LookMLViewTransformer] = [
|
|
398
474
|
LookMlIfCommentTransformer(
|
|
399
|
-
source_config=source_config
|
|
475
|
+
source_config=source_config, reporter=reporter
|
|
400
476
|
), # First evaluate the -- if -- comments. Looker does the same
|
|
401
477
|
LiquidVariableTransformer(
|
|
402
|
-
source_config=source_config
|
|
478
|
+
source_config=source_config, reporter=reporter
|
|
403
479
|
), # Now resolve liquid variables
|
|
404
480
|
DropDerivedViewPatternTransformer(
|
|
405
|
-
source_config=source_config
|
|
481
|
+
source_config=source_config, reporter=reporter
|
|
406
482
|
), # Remove any ${} symbol
|
|
407
483
|
IncompleteSqlTransformer(
|
|
408
|
-
source_config=source_config
|
|
484
|
+
source_config=source_config, reporter=reporter
|
|
409
485
|
), # complete any incomplete sql
|
|
410
486
|
]
|
|
487
|
+
if resolve_constants:
|
|
488
|
+
transformers.append(
|
|
489
|
+
LookmlConstantTransformer(
|
|
490
|
+
source_config=source_config,
|
|
491
|
+
manifest_constants=manifest_constants,
|
|
492
|
+
reporter=reporter,
|
|
493
|
+
), # Resolve @{} constant with its corresponding value
|
|
494
|
+
)
|
|
411
495
|
|
|
412
496
|
transformed_views: List[dict] = []
|
|
413
497
|
|
|
@@ -422,12 +506,18 @@ def process_lookml_template_language(
|
|
|
422
506
|
def load_and_preprocess_file(
|
|
423
507
|
path: Union[str, pathlib.Path],
|
|
424
508
|
source_config: LookMLSourceConfig,
|
|
509
|
+
reporter: LookMLSourceReport,
|
|
510
|
+
manifest_constants: Dict[str, "LookerConstant"] = {},
|
|
511
|
+
resolve_constants: bool = False,
|
|
425
512
|
) -> dict:
|
|
426
513
|
parsed = load_lkml(path)
|
|
427
514
|
|
|
428
515
|
process_lookml_template_language(
|
|
429
516
|
view_lkml_file_dict=parsed,
|
|
517
|
+
reporter=reporter,
|
|
430
518
|
source_config=source_config,
|
|
519
|
+
manifest_constants=manifest_constants,
|
|
520
|
+
resolve_constants=resolve_constants,
|
|
431
521
|
)
|
|
432
522
|
|
|
433
523
|
return parsed
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from dataclasses import dataclass, field as dataclass_field
|
|
3
3
|
from datetime import timedelta
|
|
4
|
-
from typing import Any, Dict,
|
|
4
|
+
from typing import Any, Dict, Literal, Optional, Union
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
7
|
from pydantic import root_validator, validator
|
|
@@ -48,13 +48,17 @@ DERIVED_VIEW_PATTERN: str = r"\$\{([^}]*)\}"
|
|
|
48
48
|
class LookMLSourceReport(StaleEntityRemovalSourceReport):
|
|
49
49
|
git_clone_latency: Optional[timedelta] = None
|
|
50
50
|
models_discovered: int = 0
|
|
51
|
-
models_dropped:
|
|
51
|
+
models_dropped: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
52
52
|
views_discovered: int = 0
|
|
53
|
-
views_dropped:
|
|
54
|
-
views_dropped_unreachable:
|
|
53
|
+
views_dropped: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
54
|
+
views_dropped_unreachable: LossyList[str] = dataclass_field(
|
|
55
|
+
default_factory=LossyList
|
|
56
|
+
)
|
|
55
57
|
query_parse_attempts: int = 0
|
|
56
58
|
query_parse_failures: int = 0
|
|
57
|
-
query_parse_failure_views:
|
|
59
|
+
query_parse_failure_views: LossyList[str] = dataclass_field(
|
|
60
|
+
default_factory=LossyList
|
|
61
|
+
)
|
|
58
62
|
_looker_api: Optional[LookerAPI] = None
|
|
59
63
|
|
|
60
64
|
def report_models_scanned(self) -> None:
|
|
@@ -139,7 +143,10 @@ class LookMLSourceConfig(
|
|
|
139
143
|
)
|
|
140
144
|
emit_reachable_views_only: bool = Field(
|
|
141
145
|
True,
|
|
142
|
-
description=
|
|
146
|
+
description=(
|
|
147
|
+
"When enabled, only views that are reachable from explores defined in the model files are emitted. "
|
|
148
|
+
"If set to False, all views imported in model files are emitted. Views that are unreachable i.e. not explicitly defined in the model files are currently not emitted however reported as warning for debugging purposes."
|
|
149
|
+
),
|
|
143
150
|
)
|
|
144
151
|
populate_sql_logic_for_missing_descriptions: bool = Field(
|
|
145
152
|
False,
|
|
@@ -158,13 +165,27 @@ class LookMLSourceConfig(
|
|
|
158
165
|
description="When enabled, looker refinement will be processed to adapt an existing view.",
|
|
159
166
|
)
|
|
160
167
|
|
|
161
|
-
|
|
168
|
+
liquid_variables: Dict[Any, Any] = Field(
|
|
162
169
|
{},
|
|
163
|
-
description="A dictionary containing Liquid variables
|
|
170
|
+
description="A dictionary containing Liquid variables with their corresponding values, utilized in SQL-defined "
|
|
164
171
|
"derived views. The Liquid template will be resolved in view.derived_table.sql and "
|
|
165
172
|
"view.sql_table_name. Defaults to an empty dictionary.",
|
|
166
173
|
)
|
|
167
174
|
|
|
175
|
+
_liquid_variable_deprecated = pydantic_renamed_field(
|
|
176
|
+
old_name="liquid_variable", new_name="liquid_variables", print_warning=True
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
lookml_constants: Dict[str, str] = Field(
|
|
180
|
+
{},
|
|
181
|
+
description=(
|
|
182
|
+
"A dictionary containing LookML constants (`@{constant_name}`) and their values. "
|
|
183
|
+
"If a constant is defined in the `manifest.lkml` file, its value will be used. "
|
|
184
|
+
"If not found in the manifest, the value from this config will be used instead. "
|
|
185
|
+
"Defaults to an empty dictionary."
|
|
186
|
+
),
|
|
187
|
+
)
|
|
188
|
+
|
|
168
189
|
looker_environment: Literal["prod", "dev"] = Field(
|
|
169
190
|
"prod",
|
|
170
191
|
description="A looker prod or dev environment. "
|
|
@@ -43,6 +43,7 @@ from datahub.ingestion.source.looker.looker_common import (
|
|
|
43
43
|
from datahub.ingestion.source.looker.looker_connection import (
|
|
44
44
|
get_connection_def_based_on_connection_string,
|
|
45
45
|
)
|
|
46
|
+
from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant
|
|
46
47
|
from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI
|
|
47
48
|
from datahub.ingestion.source.looker.looker_template_language import (
|
|
48
49
|
load_and_preprocess_file,
|
|
@@ -59,6 +60,7 @@ from datahub.ingestion.source.looker.lookml_concept_context import (
|
|
|
59
60
|
from datahub.ingestion.source.looker.lookml_config import (
|
|
60
61
|
BASE_PROJECT_NAME,
|
|
61
62
|
MODEL_FILE_EXTENSION,
|
|
63
|
+
VIEW_FILE_EXTENSION,
|
|
62
64
|
LookerConnectionDefinition,
|
|
63
65
|
LookMLSourceConfig,
|
|
64
66
|
LookMLSourceReport,
|
|
@@ -253,6 +255,7 @@ class LookerManifest:
|
|
|
253
255
|
# This must be set if the manifest has local_dependency entries.
|
|
254
256
|
# See https://cloud.google.com/looker/docs/reference/param-manifest-project-name
|
|
255
257
|
project_name: Optional[str]
|
|
258
|
+
constants: Optional[List[Dict[str, str]]]
|
|
256
259
|
|
|
257
260
|
local_dependencies: List[str]
|
|
258
261
|
remote_dependencies: List[LookerRemoteDependency]
|
|
@@ -309,11 +312,14 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
309
312
|
"manage_models permission enabled on this API key."
|
|
310
313
|
) from err
|
|
311
314
|
|
|
315
|
+
self.manifest_constants: Dict[str, "LookerConstant"] = {}
|
|
316
|
+
|
|
312
317
|
def _load_model(self, path: str) -> LookerModel:
|
|
313
318
|
logger.debug(f"Loading model from file {path}")
|
|
314
319
|
|
|
315
320
|
parsed = load_and_preprocess_file(
|
|
316
321
|
path=path,
|
|
322
|
+
reporter=self.reporter,
|
|
317
323
|
source_config=self.source_config,
|
|
318
324
|
)
|
|
319
325
|
|
|
@@ -499,27 +505,33 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
499
505
|
|
|
500
506
|
def get_manifest_if_present(self, folder: pathlib.Path) -> Optional[LookerManifest]:
|
|
501
507
|
manifest_file = folder / "manifest.lkml"
|
|
502
|
-
if manifest_file.exists():
|
|
503
|
-
manifest_dict = load_and_preprocess_file(
|
|
504
|
-
path=manifest_file, source_config=self.source_config
|
|
505
|
-
)
|
|
506
508
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
],
|
|
512
|
-
remote_dependencies=[
|
|
513
|
-
LookerRemoteDependency(
|
|
514
|
-
name=x["name"], url=x["url"], ref=x.get("ref")
|
|
515
|
-
)
|
|
516
|
-
for x in manifest_dict.get("remote_dependencys", [])
|
|
517
|
-
],
|
|
509
|
+
if not manifest_file.exists():
|
|
510
|
+
self.reporter.info(
|
|
511
|
+
message="manifest.lkml file missing from project",
|
|
512
|
+
context=str(manifest_file),
|
|
518
513
|
)
|
|
519
|
-
return manifest
|
|
520
|
-
else:
|
|
521
514
|
return None
|
|
522
515
|
|
|
516
|
+
manifest_dict = load_and_preprocess_file(
|
|
517
|
+
path=manifest_file,
|
|
518
|
+
source_config=self.source_config,
|
|
519
|
+
reporter=self.reporter,
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
manifest = LookerManifest(
|
|
523
|
+
project_name=manifest_dict.get("project_name"),
|
|
524
|
+
constants=manifest_dict.get("constants", []),
|
|
525
|
+
local_dependencies=[
|
|
526
|
+
x["project"] for x in manifest_dict.get("local_dependencys", [])
|
|
527
|
+
],
|
|
528
|
+
remote_dependencies=[
|
|
529
|
+
LookerRemoteDependency(name=x["name"], url=x["url"], ref=x.get("ref"))
|
|
530
|
+
for x in manifest_dict.get("remote_dependencys", [])
|
|
531
|
+
],
|
|
532
|
+
)
|
|
533
|
+
return manifest
|
|
534
|
+
|
|
523
535
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
524
536
|
return [
|
|
525
537
|
*super().get_workunit_processors(),
|
|
@@ -574,7 +586,10 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
574
586
|
self.base_projects_folder[project] = p_ref
|
|
575
587
|
|
|
576
588
|
self._recursively_check_manifests(
|
|
577
|
-
tmp_dir,
|
|
589
|
+
tmp_dir,
|
|
590
|
+
BASE_PROJECT_NAME,
|
|
591
|
+
visited_projects,
|
|
592
|
+
self.manifest_constants,
|
|
578
593
|
)
|
|
579
594
|
|
|
580
595
|
yield from self.get_internal_workunits()
|
|
@@ -587,7 +602,11 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
587
602
|
)
|
|
588
603
|
|
|
589
604
|
def _recursively_check_manifests(
|
|
590
|
-
self,
|
|
605
|
+
self,
|
|
606
|
+
tmp_dir: str,
|
|
607
|
+
project_name: str,
|
|
608
|
+
project_visited: Set[str],
|
|
609
|
+
manifest_constants: Dict[str, "LookerConstant"],
|
|
591
610
|
) -> None:
|
|
592
611
|
if project_name in project_visited:
|
|
593
612
|
return
|
|
@@ -604,6 +623,14 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
604
623
|
if not manifest:
|
|
605
624
|
return
|
|
606
625
|
|
|
626
|
+
if manifest.constants:
|
|
627
|
+
for constant in manifest.constants:
|
|
628
|
+
if constant.get("name") and constant.get("value"):
|
|
629
|
+
manifest_constants[constant["name"]] = LookerConstant(
|
|
630
|
+
name=constant["name"],
|
|
631
|
+
value=constant["value"],
|
|
632
|
+
)
|
|
633
|
+
|
|
607
634
|
# Special case handling if the root project has a name in the manifest file.
|
|
608
635
|
if project_name == BASE_PROJECT_NAME and manifest.project_name:
|
|
609
636
|
if (
|
|
@@ -663,21 +690,27 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
663
690
|
project_visited.add(project_name)
|
|
664
691
|
else:
|
|
665
692
|
self._recursively_check_manifests(
|
|
666
|
-
tmp_dir,
|
|
693
|
+
tmp_dir,
|
|
694
|
+
remote_project.name,
|
|
695
|
+
project_visited,
|
|
696
|
+
manifest_constants,
|
|
667
697
|
)
|
|
668
698
|
|
|
669
699
|
for project in manifest.local_dependencies:
|
|
670
|
-
self._recursively_check_manifests(
|
|
700
|
+
self._recursively_check_manifests(
|
|
701
|
+
tmp_dir, project, project_visited, manifest_constants
|
|
702
|
+
)
|
|
671
703
|
|
|
672
704
|
def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901
|
|
673
705
|
assert self.source_config.base_folder
|
|
674
|
-
|
|
675
706
|
viewfile_loader = LookerViewFileLoader(
|
|
676
707
|
self.source_config.project_name,
|
|
677
708
|
self.base_projects_folder,
|
|
678
709
|
self.reporter,
|
|
679
710
|
self.source_config,
|
|
711
|
+
self.manifest_constants,
|
|
680
712
|
)
|
|
713
|
+
logger.debug(f"LookML Constants : {', '.join(self.manifest_constants.keys())}")
|
|
681
714
|
|
|
682
715
|
# Some views can be mentioned by multiple 'include' statements and can be included via different connections.
|
|
683
716
|
|
|
@@ -884,6 +917,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
884
917
|
view_urn = maybe_looker_view.id.get_urn(
|
|
885
918
|
self.source_config
|
|
886
919
|
)
|
|
920
|
+
|
|
887
921
|
view_connection_mapping = view_connection_map.get(
|
|
888
922
|
view_urn
|
|
889
923
|
)
|
|
@@ -939,6 +973,9 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
939
973
|
str(maybe_looker_view.id)
|
|
940
974
|
)
|
|
941
975
|
|
|
976
|
+
if not self.source_config.emit_reachable_views_only:
|
|
977
|
+
self.report_skipped_unreachable_views(viewfile_loader, processed_view_map)
|
|
978
|
+
|
|
942
979
|
if (
|
|
943
980
|
self.source_config.tag_measures_and_dimensions
|
|
944
981
|
and self.reporter.events_produced != 0
|
|
@@ -966,5 +1003,56 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
966
1003
|
),
|
|
967
1004
|
).as_workunit()
|
|
968
1005
|
|
|
1006
|
+
def report_skipped_unreachable_views(
|
|
1007
|
+
self,
|
|
1008
|
+
viewfile_loader: LookerViewFileLoader,
|
|
1009
|
+
processed_view_map: Dict[str, Set[str]] = {},
|
|
1010
|
+
) -> None:
|
|
1011
|
+
view_files: Dict[str, List[pathlib.Path]] = {}
|
|
1012
|
+
for project, folder_path in self.base_projects_folder.items():
|
|
1013
|
+
folder = pathlib.Path(folder_path)
|
|
1014
|
+
view_files[project] = list(folder.glob(f"**/*{VIEW_FILE_EXTENSION}"))
|
|
1015
|
+
|
|
1016
|
+
skipped_view_paths: Dict[str, List[str]] = {}
|
|
1017
|
+
for project, views in view_files.items():
|
|
1018
|
+
skipped_paths: Set[str] = set()
|
|
1019
|
+
|
|
1020
|
+
for view_path in views:
|
|
1021
|
+
# Check if the view is already in processed_view_map
|
|
1022
|
+
if not any(
|
|
1023
|
+
str(view_path) in view_set
|
|
1024
|
+
for view_set in processed_view_map.values()
|
|
1025
|
+
):
|
|
1026
|
+
looker_viewfile = viewfile_loader.load_viewfile(
|
|
1027
|
+
path=str(view_path),
|
|
1028
|
+
project_name=project,
|
|
1029
|
+
connection=None,
|
|
1030
|
+
reporter=self.reporter,
|
|
1031
|
+
)
|
|
1032
|
+
|
|
1033
|
+
if looker_viewfile is not None:
|
|
1034
|
+
for raw_view in looker_viewfile.views:
|
|
1035
|
+
raw_view_name = raw_view.get("name", "")
|
|
1036
|
+
|
|
1037
|
+
if (
|
|
1038
|
+
raw_view_name
|
|
1039
|
+
and self.source_config.view_pattern.allowed(
|
|
1040
|
+
raw_view_name
|
|
1041
|
+
)
|
|
1042
|
+
):
|
|
1043
|
+
skipped_paths.add(str(view_path))
|
|
1044
|
+
|
|
1045
|
+
skipped_view_paths[project] = list(skipped_paths)
|
|
1046
|
+
|
|
1047
|
+
for project, view_paths in skipped_view_paths.items():
|
|
1048
|
+
for path in view_paths:
|
|
1049
|
+
self.reporter.report_warning(
|
|
1050
|
+
title="Skipped View File",
|
|
1051
|
+
message=(
|
|
1052
|
+
"The Looker view file was skipped because it may not be referenced by any models."
|
|
1053
|
+
),
|
|
1054
|
+
context=(f"Project: {project}, View File Path: {path}"),
|
|
1055
|
+
)
|
|
1056
|
+
|
|
969
1057
|
def get_report(self):
|
|
970
1058
|
return self.reporter
|
datahub/ingestion/source/mode.py
CHANGED
|
@@ -24,6 +24,7 @@ from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponenti
|
|
|
24
24
|
import datahub.emitter.mce_builder as builder
|
|
25
25
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
26
26
|
from datahub.configuration.source_common import DatasetLineageProviderConfigBase
|
|
27
|
+
from datahub.configuration.validate_field_removal import pydantic_removed_field
|
|
27
28
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
28
29
|
from datahub.emitter.mcp_builder import (
|
|
29
30
|
ContainerKey,
|
|
@@ -155,10 +156,7 @@ class ModeConfig(StatefulIngestionConfigBase, DatasetLineageProviderConfigBase):
|
|
|
155
156
|
workspace: str = Field(
|
|
156
157
|
description="The Mode workspace name. Find it in Settings > Workspace > Details."
|
|
157
158
|
)
|
|
158
|
-
|
|
159
|
-
default="public",
|
|
160
|
-
description="Default schema to use when schema is not provided in an SQL query",
|
|
161
|
-
)
|
|
159
|
+
_default_schema = pydantic_removed_field("default_schema")
|
|
162
160
|
|
|
163
161
|
space_pattern: AllowDenyPattern = Field(
|
|
164
162
|
default=AllowDenyPattern(
|
|
@@ -68,6 +68,7 @@ from datahub.metadata.schema_classes import (
|
|
|
68
68
|
UnionTypeClass,
|
|
69
69
|
)
|
|
70
70
|
from datahub.metadata.urns import DatasetUrn
|
|
71
|
+
from datahub.utilities.lossy_collections import LossyList
|
|
71
72
|
|
|
72
73
|
logger = logging.getLogger(__name__)
|
|
73
74
|
|
|
@@ -143,7 +144,7 @@ class MongoDBConfig(
|
|
|
143
144
|
|
|
144
145
|
@dataclass
|
|
145
146
|
class MongoDBSourceReport(StaleEntityRemovalSourceReport):
|
|
146
|
-
filtered:
|
|
147
|
+
filtered: LossyList[str] = field(default_factory=LossyList)
|
|
147
148
|
|
|
148
149
|
def report_dropped(self, name: str) -> None:
|
|
149
150
|
self.filtered.append(name)
|
datahub/ingestion/source/nifi.py
CHANGED
|
@@ -46,6 +46,7 @@ from datahub.metadata.schema_classes import (
|
|
|
46
46
|
DatasetPropertiesClass,
|
|
47
47
|
)
|
|
48
48
|
from datahub.specific.datajob import DataJobPatchBuilder
|
|
49
|
+
from datahub.utilities.lossy_collections import LossyList
|
|
49
50
|
|
|
50
51
|
logger = logging.getLogger(__name__)
|
|
51
52
|
NIFI = "nifi"
|
|
@@ -452,7 +453,7 @@ def get_attribute_value(attr_lst: List[dict], attr_name: str) -> Optional[str]:
|
|
|
452
453
|
|
|
453
454
|
@dataclass
|
|
454
455
|
class NifiSourceReport(SourceReport):
|
|
455
|
-
filtered:
|
|
456
|
+
filtered: LossyList[str] = field(default_factory=LossyList)
|
|
456
457
|
|
|
457
458
|
def report_dropped(self, ent_name: str) -> None:
|
|
458
459
|
self.filtered.append(ent_name)
|
|
@@ -195,8 +195,8 @@ class PowerBiDashboardSourceReport(StaleEntityRemovalSourceReport):
|
|
|
195
195
|
|
|
196
196
|
dashboards_scanned: int = 0
|
|
197
197
|
charts_scanned: int = 0
|
|
198
|
-
filtered_dashboards:
|
|
199
|
-
filtered_charts:
|
|
198
|
+
filtered_dashboards: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
199
|
+
filtered_charts: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
200
200
|
|
|
201
201
|
m_query_parse_timer: PerfTimer = dataclass_field(default_factory=PerfTimer)
|
|
202
202
|
m_query_parse_attempts: int = 0
|
|
@@ -53,6 +53,7 @@ from datahub.metadata.schema_classes import (
|
|
|
53
53
|
StatusClass,
|
|
54
54
|
)
|
|
55
55
|
from datahub.utilities.dedup_list import deduplicate_list
|
|
56
|
+
from datahub.utilities.lossy_collections import LossyList
|
|
56
57
|
|
|
57
58
|
LOGGER = logging.getLogger(__name__)
|
|
58
59
|
|
|
@@ -476,7 +477,7 @@ class Mapper:
|
|
|
476
477
|
@dataclass
|
|
477
478
|
class PowerBiReportServerDashboardSourceReport(SourceReport):
|
|
478
479
|
scanned_report: int = 0
|
|
479
|
-
filtered_reports:
|
|
480
|
+
filtered_reports: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
480
481
|
|
|
481
482
|
def report_scanned(self, count: int = 1) -> None:
|
|
482
483
|
self.scanned_report += count
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
import math
|
|
3
3
|
import sys
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
|
-
from typing import Dict, Iterable, List, Optional
|
|
5
|
+
from typing import Dict, Iterable, List, Optional
|
|
6
6
|
|
|
7
7
|
import dateutil.parser as dp
|
|
8
8
|
from packaging import version
|
|
@@ -39,7 +39,7 @@ from datahub.metadata.schema_classes import (
|
|
|
39
39
|
DashboardInfoClass,
|
|
40
40
|
)
|
|
41
41
|
from datahub.sql_parsing.sqlglot_lineage import create_lineage_sql_parsed_result
|
|
42
|
-
from datahub.utilities.lossy_collections import LossyDict, LossyList
|
|
42
|
+
from datahub.utilities.lossy_collections import LossyDict, LossyList, LossySet
|
|
43
43
|
from datahub.utilities.perf_timer import PerfTimer
|
|
44
44
|
from datahub.utilities.threaded_iterator_executor import ThreadedIteratorExecutor
|
|
45
45
|
|
|
@@ -280,9 +280,9 @@ class RedashConfig(ConfigModel):
|
|
|
280
280
|
class RedashSourceReport(SourceReport):
|
|
281
281
|
items_scanned: int = 0
|
|
282
282
|
filtered: LossyList[str] = field(default_factory=LossyList)
|
|
283
|
-
queries_problem_parsing:
|
|
284
|
-
queries_no_dataset:
|
|
285
|
-
charts_no_input:
|
|
283
|
+
queries_problem_parsing: LossySet[str] = field(default_factory=LossySet)
|
|
284
|
+
queries_no_dataset: LossySet[str] = field(default_factory=LossySet)
|
|
285
|
+
charts_no_input: LossySet[str] = field(default_factory=LossySet)
|
|
286
286
|
total_queries: Optional[int] = field(
|
|
287
287
|
default=None,
|
|
288
288
|
)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import time
|
|
4
|
+
from dataclasses import dataclass, field as dataclass_field
|
|
4
5
|
from datetime import datetime
|
|
5
6
|
from enum import Enum
|
|
6
7
|
from typing import Any, Dict, Iterable, List, Optional
|
|
@@ -60,6 +61,7 @@ from datahub.metadata.schema_classes import (
|
|
|
60
61
|
TagAssociationClass,
|
|
61
62
|
)
|
|
62
63
|
from datahub.utilities import config_clean
|
|
64
|
+
from datahub.utilities.lossy_collections import LossyList
|
|
63
65
|
|
|
64
66
|
logger = logging.getLogger(__name__)
|
|
65
67
|
|
|
@@ -146,8 +148,9 @@ class SalesforceConfig(DatasetSourceConfigMixin):
|
|
|
146
148
|
return config_clean.remove_trailing_slashes(v)
|
|
147
149
|
|
|
148
150
|
|
|
151
|
+
@dataclass
|
|
149
152
|
class SalesforceSourceReport(SourceReport):
|
|
150
|
-
filtered:
|
|
153
|
+
filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
151
154
|
|
|
152
155
|
def report_dropped(self, ent_name: str) -> None:
|
|
153
156
|
self.filtered.append(ent_name)
|