acryl-datahub 1.2.0.10rc2__py3-none-any.whl → 1.2.0.10rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/METADATA +2525 -2609
- {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/RECORD +93 -93
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +6 -3
- datahub/api/entities/dataset/dataset.py +9 -18
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/docker_check.py +2 -2
- datahub/configuration/common.py +29 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/pydantic_migration_helpers.py +0 -9
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +5 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/source/azure/azure_common.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +28 -14
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +4 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/data_lake_common/path_spec.py +16 -16
- datahub/ingestion/source/datahub/config.py +8 -9
- datahub/ingestion/source/delta_lake/config.py +1 -1
- datahub/ingestion/source/dremio/dremio_config.py +3 -4
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/fivetran/config.py +1 -1
- datahub/ingestion/source/ge_profiling_config.py +26 -22
- datahub/ingestion/source/grafana/grafana_config.py +2 -2
- datahub/ingestion/source/grafana/models.py +12 -14
- datahub/ingestion/source/hex/hex.py +6 -1
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/looker/looker_common.py +55 -75
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_source.py +445 -548
- datahub/ingestion/source/looker/lookml_config.py +1 -1
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +1 -1
- datahub/ingestion/source/mode.py +13 -5
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +14 -21
- datahub/ingestion/source/preset.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/redshift/config.py +6 -3
- datahub/ingestion/source/salesforce.py +13 -9
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +12 -15
- datahub/ingestion/source/snowflake/snowflake_connection.py +8 -3
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +15 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +4 -5
- datahub/ingestion/source/sql/athena.py +2 -1
- datahub/ingestion/source/sql/clickhouse.py +12 -7
- datahub/ingestion/source/sql/cockroachdb.py +5 -3
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +7 -9
- datahub/ingestion/source/sql/mssql/source.py +2 -2
- datahub/ingestion/source/sql/mysql.py +2 -2
- datahub/ingestion/source/sql/oracle.py +3 -3
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/teradata.py +4 -4
- datahub/ingestion/source/sql/trino.py +2 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +1 -1
- datahub/ingestion/source/sql_queries.py +6 -6
- datahub/ingestion/source/state/checkpoint.py +5 -1
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +5 -8
- datahub/ingestion/source/superset.py +29 -4
- datahub/ingestion/source/tableau/tableau.py +65 -11
- datahub/ingestion/source/tableau/tableau_common.py +5 -0
- datahub/ingestion/source/tableau/tableau_constant.py +1 -0
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +7 -3
- datahub/ingestion/source/usage/usage_common.py +3 -3
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/metadata/_internal_schema_classes.py +45 -1
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/schema.avsc +24 -1
- datahub/metadata/schemas/InstitutionalMemory.avsc +22 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +22 -0
- datahub/sdk/dashboard.py +0 -2
- datahub/sdk/search_filters.py +1 -7
- {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/top_level.txt +0 -0
|
@@ -28,7 +28,7 @@ from looker_sdk.sdk.api40.models import (
|
|
|
28
28
|
User,
|
|
29
29
|
WriteQuery,
|
|
30
30
|
)
|
|
31
|
-
from pydantic
|
|
31
|
+
from pydantic import validator
|
|
32
32
|
|
|
33
33
|
import datahub.emitter.mce_builder as builder
|
|
34
34
|
from datahub.api.entities.platformresource.platform_resource import (
|
|
@@ -36,7 +36,7 @@ from datahub.api.entities.platformresource.platform_resource import (
|
|
|
36
36
|
PlatformResourceKey,
|
|
37
37
|
)
|
|
38
38
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
39
|
-
from datahub.emitter.mcp_builder import ContainerKey
|
|
39
|
+
from datahub.emitter.mcp_builder import ContainerKey
|
|
40
40
|
from datahub.ingestion.api.report import Report
|
|
41
41
|
from datahub.ingestion.api.source import SourceReport
|
|
42
42
|
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
|
@@ -72,7 +72,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
|
72
72
|
UpstreamClass,
|
|
73
73
|
UpstreamLineage,
|
|
74
74
|
)
|
|
75
|
-
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
|
|
76
75
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
77
76
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
78
77
|
ArrayTypeClass,
|
|
@@ -90,21 +89,18 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
90
89
|
)
|
|
91
90
|
from datahub.metadata.schema_classes import (
|
|
92
91
|
BrowsePathEntryClass,
|
|
93
|
-
BrowsePathsClass,
|
|
94
92
|
BrowsePathsV2Class,
|
|
95
|
-
|
|
96
|
-
DatasetPropertiesClass,
|
|
93
|
+
EmbedClass,
|
|
97
94
|
EnumTypeClass,
|
|
98
95
|
FineGrainedLineageClass,
|
|
99
96
|
GlobalTagsClass,
|
|
100
97
|
SchemaMetadataClass,
|
|
101
|
-
StatusClass,
|
|
102
|
-
SubTypesClass,
|
|
103
98
|
TagAssociationClass,
|
|
104
99
|
TagPropertiesClass,
|
|
105
100
|
TagSnapshotClass,
|
|
106
101
|
)
|
|
107
102
|
from datahub.metadata.urns import TagUrn
|
|
103
|
+
from datahub.sdk.dataset import Dataset
|
|
108
104
|
from datahub.sql_parsing.sqlglot_lineage import ColumnRef
|
|
109
105
|
from datahub.utilities.lossy_collections import LossyList, LossySet
|
|
110
106
|
from datahub.utilities.url_util import remove_port_from_url
|
|
@@ -1307,50 +1303,28 @@ class LookerExplore:
|
|
|
1307
1303
|
reporter: SourceReport,
|
|
1308
1304
|
base_url: str,
|
|
1309
1305
|
extract_embed_urls: bool,
|
|
1310
|
-
) ->
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
dataset_snapshot = DatasetSnapshot(
|
|
1315
|
-
urn=self.get_explore_urn(config),
|
|
1316
|
-
aspects=[], # we append to this list later on
|
|
1317
|
-
)
|
|
1318
|
-
|
|
1319
|
-
model_key = gen_model_key(config, self.model_name)
|
|
1320
|
-
browse_paths = BrowsePathsClass(paths=[self.get_explore_browse_path(config)])
|
|
1321
|
-
container = ContainerClass(container=model_key.as_urn())
|
|
1322
|
-
dataset_snapshot.aspects.append(browse_paths)
|
|
1323
|
-
dataset_snapshot.aspects.append(StatusClass(removed=False))
|
|
1324
|
-
|
|
1325
|
-
custom_properties = {
|
|
1326
|
-
"project": self.project_name,
|
|
1327
|
-
"model": self.model_name,
|
|
1328
|
-
"looker.explore.label": self.label,
|
|
1329
|
-
"looker.explore.name": self.name,
|
|
1330
|
-
"looker.explore.file": self.source_file,
|
|
1331
|
-
}
|
|
1332
|
-
dataset_props = DatasetPropertiesClass(
|
|
1333
|
-
name=str(self.label) if self.label else LookerUtil._display_name(self.name),
|
|
1334
|
-
description=self.description,
|
|
1335
|
-
customProperties={
|
|
1336
|
-
k: str(v) for k, v in custom_properties.items() if v is not None
|
|
1337
|
-
},
|
|
1338
|
-
)
|
|
1339
|
-
dataset_props.externalUrl = self._get_url(base_url)
|
|
1306
|
+
) -> Dataset:
|
|
1307
|
+
"""
|
|
1308
|
+
Generate a Dataset metadata event for this Looker Explore.
|
|
1340
1309
|
|
|
1341
|
-
|
|
1310
|
+
Only generates datasets for explores that contain FROM clauses and do NOT contain joins.
|
|
1311
|
+
Passthrough explores and joins are handled via lineage and do not need additional nodes.
|
|
1312
|
+
"""
|
|
1313
|
+
upstream_lineage = None
|
|
1342
1314
|
view_name_to_urn_map: Dict[str, str] = {}
|
|
1315
|
+
|
|
1343
1316
|
if self.upstream_views is not None:
|
|
1344
1317
|
assert self.project_name is not None
|
|
1345
|
-
upstreams = []
|
|
1318
|
+
upstreams: list[UpstreamClass] = []
|
|
1346
1319
|
observed_lineage_ts = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
1320
|
+
|
|
1347
1321
|
for view_ref in sorted(self.upstream_views):
|
|
1348
1322
|
# set file_path to ViewFieldType.UNKNOWN if file_path is not available to keep backward compatibility
|
|
1349
1323
|
# if we raise error on file_path equal to None then existing test-cases will fail as mock data
|
|
1350
1324
|
# doesn't have required attributes.
|
|
1351
1325
|
file_path: str = (
|
|
1352
1326
|
cast(str, self.upstream_views_file_path[view_ref.include])
|
|
1353
|
-
if self.upstream_views_file_path
|
|
1327
|
+
if self.upstream_views_file_path.get(view_ref.include) is not None
|
|
1354
1328
|
else ViewFieldValue.NOT_AVAILABLE.value
|
|
1355
1329
|
)
|
|
1356
1330
|
|
|
@@ -1377,7 +1351,7 @@ class LookerExplore:
|
|
|
1377
1351
|
)
|
|
1378
1352
|
view_name_to_urn_map[view_ref.include] = view_urn
|
|
1379
1353
|
|
|
1380
|
-
fine_grained_lineages = []
|
|
1354
|
+
fine_grained_lineages: list[FineGrainedLineageClass] = []
|
|
1381
1355
|
if config.extract_column_level_lineage:
|
|
1382
1356
|
for field in self.fields or []:
|
|
1383
1357
|
# Skip creating fine-grained lineage for empty field names to prevent invalid schema field URNs
|
|
@@ -1418,9 +1392,11 @@ class LookerExplore:
|
|
|
1418
1392
|
)
|
|
1419
1393
|
|
|
1420
1394
|
upstream_lineage = UpstreamLineage(
|
|
1421
|
-
upstreams=upstreams,
|
|
1395
|
+
upstreams=upstreams,
|
|
1396
|
+
fineGrainedLineages=fine_grained_lineages or None,
|
|
1422
1397
|
)
|
|
1423
|
-
|
|
1398
|
+
|
|
1399
|
+
schema_metadata = None
|
|
1424
1400
|
if self.fields is not None:
|
|
1425
1401
|
schema_metadata = LookerUtil._get_schema(
|
|
1426
1402
|
platform_name=config.platform_name,
|
|
@@ -1428,42 +1404,46 @@ class LookerExplore:
|
|
|
1428
1404
|
view_fields=self.fields,
|
|
1429
1405
|
reporter=reporter,
|
|
1430
1406
|
)
|
|
1431
|
-
if schema_metadata is not None:
|
|
1432
|
-
dataset_snapshot.aspects.append(schema_metadata)
|
|
1433
|
-
|
|
1434
|
-
mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
|
|
1435
|
-
mcp = MetadataChangeProposalWrapper(
|
|
1436
|
-
entityUrn=dataset_snapshot.urn,
|
|
1437
|
-
aspect=SubTypesClass(typeNames=[DatasetSubTypes.LOOKER_EXPLORE]),
|
|
1438
|
-
)
|
|
1439
1407
|
|
|
1440
|
-
|
|
1441
|
-
mce,
|
|
1442
|
-
mcp,
|
|
1443
|
-
]
|
|
1444
|
-
|
|
1445
|
-
# Add tags
|
|
1446
|
-
explore_tag_urns: List[TagAssociationClass] = [
|
|
1447
|
-
TagAssociationClass(tag=TagUrn(tag).urn()) for tag in self.tags
|
|
1448
|
-
]
|
|
1449
|
-
if explore_tag_urns:
|
|
1450
|
-
dataset_snapshot.aspects.append(GlobalTagsClass(explore_tag_urns))
|
|
1408
|
+
extra_aspects: List[Union[GlobalTagsClass, EmbedClass]] = []
|
|
1451
1409
|
|
|
1452
|
-
|
|
1410
|
+
explore_tag_urns: List[TagUrn] = [TagUrn(tag) for tag in self.tags]
|
|
1453
1411
|
if extract_embed_urls:
|
|
1454
|
-
|
|
1455
|
-
dataset_snapshot.urn, self._get_embed_url(base_url)
|
|
1456
|
-
)
|
|
1457
|
-
proposals.append(embed_mcp)
|
|
1412
|
+
extra_aspects.append(EmbedClass(renderUrl=self._get_embed_url(base_url)))
|
|
1458
1413
|
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1414
|
+
custom_properties: Dict[str, Optional[str]] = {
|
|
1415
|
+
"project": self.project_name,
|
|
1416
|
+
"model": self.model_name,
|
|
1417
|
+
"looker.explore.label": self.label,
|
|
1418
|
+
"looker.explore.name": self.name,
|
|
1419
|
+
"looker.explore.file": self.source_file,
|
|
1420
|
+
}
|
|
1465
1421
|
|
|
1466
|
-
return
|
|
1422
|
+
return Dataset(
|
|
1423
|
+
platform=config.platform_name,
|
|
1424
|
+
name=config.explore_naming_pattern.replace_variables(
|
|
1425
|
+
self.get_mapping(config)
|
|
1426
|
+
),
|
|
1427
|
+
display_name=str(self.label)
|
|
1428
|
+
if self.label
|
|
1429
|
+
else LookerUtil._display_name(self.name),
|
|
1430
|
+
description=self.description,
|
|
1431
|
+
subtype=DatasetSubTypes.LOOKER_EXPLORE,
|
|
1432
|
+
env=config.env,
|
|
1433
|
+
platform_instance=config.platform_instance,
|
|
1434
|
+
custom_properties={
|
|
1435
|
+
k: str(v) for k, v in custom_properties.items() if v is not None
|
|
1436
|
+
},
|
|
1437
|
+
external_url=self._get_url(base_url),
|
|
1438
|
+
upstreams=upstream_lineage,
|
|
1439
|
+
schema=schema_metadata,
|
|
1440
|
+
parent_container=[
|
|
1441
|
+
"Explore",
|
|
1442
|
+
gen_model_key(config, self.model_name).as_urn(),
|
|
1443
|
+
],
|
|
1444
|
+
tags=explore_tag_urns if explore_tag_urns else None,
|
|
1445
|
+
extra_aspects=extra_aspects,
|
|
1446
|
+
)
|
|
1467
1447
|
|
|
1468
1448
|
|
|
1469
1449
|
def gen_project_key(config: LookerCommonConfig, project_name: str) -> LookMLProjectKey:
|
|
@@ -5,10 +5,14 @@ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union, cast
|
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
7
|
from looker_sdk.sdk.api40.models import DBConnection
|
|
8
|
-
from pydantic import Field, validator
|
|
8
|
+
from pydantic import Field, model_validator, validator
|
|
9
9
|
|
|
10
10
|
from datahub.configuration import ConfigModel
|
|
11
|
-
from datahub.configuration.common import
|
|
11
|
+
from datahub.configuration.common import (
|
|
12
|
+
AllowDenyPattern,
|
|
13
|
+
ConfigurationError,
|
|
14
|
+
HiddenFromDocs,
|
|
15
|
+
)
|
|
12
16
|
from datahub.configuration.source_common import (
|
|
13
17
|
EnvConfigMixin,
|
|
14
18
|
PlatformInstanceConfigMixin,
|
|
@@ -43,6 +47,14 @@ class NamingPattern(ConfigModel):
|
|
|
43
47
|
assert isinstance(v, str), "pattern must be a string"
|
|
44
48
|
return {"pattern": v}
|
|
45
49
|
|
|
50
|
+
@model_validator(mode="before")
|
|
51
|
+
@classmethod
|
|
52
|
+
def pydantic_v2_accept_raw_pattern(cls, v):
|
|
53
|
+
# Pydantic v2 compatibility: handle string input by converting to dict
|
|
54
|
+
if isinstance(v, str):
|
|
55
|
+
return {"pattern": v}
|
|
56
|
+
return v
|
|
57
|
+
|
|
46
58
|
@classmethod
|
|
47
59
|
def pydantic_validate_pattern(cls, v):
|
|
48
60
|
assert isinstance(v, NamingPattern)
|
|
@@ -132,11 +144,10 @@ class LookerCommonConfig(EnvConfigMixin, PlatformInstanceConfigMixin):
|
|
|
132
144
|
description="When enabled, attaches tags to measures, dimensions and dimension groups to make them more "
|
|
133
145
|
"discoverable. When disabled, adds this information to the description of the column.",
|
|
134
146
|
)
|
|
135
|
-
platform_name: str = Field(
|
|
147
|
+
platform_name: HiddenFromDocs[str] = Field(
|
|
136
148
|
# TODO: This shouldn't be part of the config.
|
|
137
149
|
"looker",
|
|
138
150
|
description="Default platform name.",
|
|
139
|
-
hidden_from_docs=True,
|
|
140
151
|
)
|
|
141
152
|
extract_column_level_lineage: bool = Field(
|
|
142
153
|
True,
|