acryl-datahub 0.15.0rc25__py3-none-any.whl → 0.15.0.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1rc1.dist-info}/METADATA +2425 -2425
- {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1rc1.dist-info}/RECORD +31 -27
- {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1rc1.dist-info}/entry_points.txt +1 -1
- datahub/__init__.py +1 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +20 -8
- datahub/configuration/source_common.py +13 -0
- datahub/ingestion/source/iceberg/iceberg.py +27 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +4 -0
- datahub/ingestion/source/kafka_connect/__init__.py +0 -0
- datahub/ingestion/source/kafka_connect/common.py +202 -0
- datahub/ingestion/source/kafka_connect/kafka_connect.py +367 -0
- datahub/ingestion/source/kafka_connect/sink_connectors.py +341 -0
- datahub/ingestion/source/kafka_connect/source_connectors.py +570 -0
- datahub/ingestion/source/looker/looker_common.py +54 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +13 -1
- datahub/ingestion/source/looker/looker_source.py +12 -1
- datahub/ingestion/source/mlflow.py +30 -5
- datahub/ingestion/source/powerbi/config.py +1 -14
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +1 -1
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -0
- datahub/ingestion/source/sql/mssql/job_models.py +30 -1
- datahub/ingestion/source/sql/mssql/source.py +14 -0
- datahub/ingestion/source/tableau/tableau.py +4 -5
- datahub/ingestion/source/tableau/tableau_constant.py +3 -1
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +6 -2
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/sql_parsing/sql_parsing_aggregator.py +1 -1
- datahub/sql_parsing/tool_meta_extractor.py +116 -5
- datahub/ingestion/source/kafka/kafka_connect.py +0 -1468
- {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1rc1.dist-info}/top_level.txt +0 -0
|
@@ -145,7 +145,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
145
145
|
self.source_config: LookerDashboardSourceConfig = config
|
|
146
146
|
self.reporter: LookerDashboardSourceReport = LookerDashboardSourceReport()
|
|
147
147
|
self.looker_api: LookerAPI = LookerAPI(self.source_config)
|
|
148
|
-
self.user_registry: LookerUserRegistry = LookerUserRegistry(
|
|
148
|
+
self.user_registry: LookerUserRegistry = LookerUserRegistry(
|
|
149
|
+
self.looker_api, self.reporter
|
|
150
|
+
)
|
|
149
151
|
self.explore_registry: LookerExploreRegistry = LookerExploreRegistry(
|
|
150
152
|
self.looker_api, self.reporter, self.source_config
|
|
151
153
|
)
|
|
@@ -1673,5 +1675,14 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1673
1675
|
yield usage_mcp.as_workunit()
|
|
1674
1676
|
self.reporter.report_stage_end("usage_extraction")
|
|
1675
1677
|
|
|
1678
|
+
# Dump looker user resource mappings.
|
|
1679
|
+
logger.info("Ingesting looker user resource mapping workunits")
|
|
1680
|
+
self.reporter.report_stage_start("user_resource_extraction")
|
|
1681
|
+
yield from auto_workunit(
|
|
1682
|
+
self.user_registry.to_platform_resource(
|
|
1683
|
+
self.source_config.platform_instance
|
|
1684
|
+
)
|
|
1685
|
+
)
|
|
1686
|
+
|
|
1676
1687
|
def get_report(self) -> SourceReport:
|
|
1677
1688
|
return self.reporter
|
|
@@ -38,16 +38,30 @@ T = TypeVar("T")
|
|
|
38
38
|
class MLflowConfig(EnvConfigMixin):
|
|
39
39
|
tracking_uri: Optional[str] = Field(
|
|
40
40
|
default=None,
|
|
41
|
-
description=
|
|
41
|
+
description=(
|
|
42
|
+
"Tracking server URI. If not set, an MLflow default tracking_uri is used"
|
|
43
|
+
" (local `mlruns/` directory or `MLFLOW_TRACKING_URI` environment variable)"
|
|
44
|
+
),
|
|
42
45
|
)
|
|
43
46
|
registry_uri: Optional[str] = Field(
|
|
44
47
|
default=None,
|
|
45
|
-
description=
|
|
48
|
+
description=(
|
|
49
|
+
"Registry server URI. If not set, an MLflow default registry_uri is used"
|
|
50
|
+
" (value of tracking_uri or `MLFLOW_REGISTRY_URI` environment variable)"
|
|
51
|
+
),
|
|
46
52
|
)
|
|
47
53
|
model_name_separator: str = Field(
|
|
48
54
|
default="_",
|
|
49
55
|
description="A string which separates model name from its version (e.g. model_1 or model-1)",
|
|
50
56
|
)
|
|
57
|
+
base_external_url: Optional[str] = Field(
|
|
58
|
+
default=None,
|
|
59
|
+
description=(
|
|
60
|
+
"Base URL to use when constructing external URLs to MLflow."
|
|
61
|
+
" If not set, tracking_uri is used if it's an HTTP URL."
|
|
62
|
+
" If neither is set, external URLs are not generated."
|
|
63
|
+
),
|
|
64
|
+
)
|
|
51
65
|
|
|
52
66
|
|
|
53
67
|
@dataclass
|
|
@@ -279,12 +293,23 @@ class MLflowSource(Source):
|
|
|
279
293
|
)
|
|
280
294
|
return urn
|
|
281
295
|
|
|
282
|
-
def
|
|
296
|
+
def _get_base_external_url_from_tracking_uri(self) -> Optional[str]:
|
|
297
|
+
if isinstance(
|
|
298
|
+
self.client.tracking_uri, str
|
|
299
|
+
) and self.client.tracking_uri.startswith("http"):
|
|
300
|
+
return self.client.tracking_uri
|
|
301
|
+
else:
|
|
302
|
+
return None
|
|
303
|
+
|
|
304
|
+
def _make_external_url(self, model_version: ModelVersion) -> Optional[str]:
|
|
283
305
|
"""
|
|
284
306
|
Generate URL for a Model Version to MLflow UI.
|
|
285
307
|
"""
|
|
286
|
-
base_uri =
|
|
287
|
-
|
|
308
|
+
base_uri = (
|
|
309
|
+
self.config.base_external_url
|
|
310
|
+
or self._get_base_external_url_from_tracking_uri()
|
|
311
|
+
)
|
|
312
|
+
if base_uri:
|
|
288
313
|
return f"{base_uri.rstrip('/')}/#/models/{model_version.name}/versions/{model_version.version}"
|
|
289
314
|
else:
|
|
290
315
|
return None
|
|
@@ -9,7 +9,7 @@ from pydantic.class_validators import root_validator
|
|
|
9
9
|
|
|
10
10
|
import datahub.emitter.mce_builder as builder
|
|
11
11
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
12
|
-
from datahub.configuration.source_common import DatasetSourceConfigMixin
|
|
12
|
+
from datahub.configuration.source_common import DatasetSourceConfigMixin, PlatformDetail
|
|
13
13
|
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
|
|
14
14
|
from datahub.ingestion.source.common.subtypes import BIAssetSubTypes
|
|
15
15
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
@@ -232,19 +232,6 @@ def default_for_dataset_type_mapping() -> Dict[str, str]:
|
|
|
232
232
|
return dict_
|
|
233
233
|
|
|
234
234
|
|
|
235
|
-
class PlatformDetail(ConfigModel):
|
|
236
|
-
platform_instance: Optional[str] = pydantic.Field(
|
|
237
|
-
default=None,
|
|
238
|
-
description="DataHub platform instance name. To generate correct urn for upstream dataset, this should match "
|
|
239
|
-
"with platform instance name used in ingestion "
|
|
240
|
-
"recipe of other datahub sources.",
|
|
241
|
-
)
|
|
242
|
-
env: str = pydantic.Field(
|
|
243
|
-
default=builder.DEFAULT_ENV,
|
|
244
|
-
description="The environment that all assets produced by DataHub platform ingestion source belong to",
|
|
245
|
-
)
|
|
246
|
-
|
|
247
|
-
|
|
248
235
|
class DataBricksPlatformDetail(PlatformDetail):
|
|
249
236
|
"""
|
|
250
237
|
metastore is an additional field used in Databricks connector to generate the dataset urn
|
|
@@ -2,8 +2,8 @@ import logging
|
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
from typing import Union
|
|
4
4
|
|
|
5
|
+
from datahub.configuration.source_common import PlatformDetail
|
|
5
6
|
from datahub.ingestion.source.powerbi.config import (
|
|
6
|
-
PlatformDetail,
|
|
7
7
|
PowerBiDashboardSourceConfig,
|
|
8
8
|
PowerBIPlatformDetail,
|
|
9
9
|
)
|
|
@@ -5,13 +5,13 @@ from typing import Dict, List, Optional, Tuple, Type, cast
|
|
|
5
5
|
|
|
6
6
|
from lark import Tree
|
|
7
7
|
|
|
8
|
+
from datahub.configuration.source_common import PlatformDetail
|
|
8
9
|
from datahub.emitter import mce_builder as builder
|
|
9
10
|
from datahub.ingestion.api.common import PipelineContext
|
|
10
11
|
from datahub.ingestion.source.powerbi.config import (
|
|
11
12
|
Constant,
|
|
12
13
|
DataBricksPlatformDetail,
|
|
13
14
|
DataPlatformPair,
|
|
14
|
-
PlatformDetail,
|
|
15
15
|
PowerBiDashboardSourceConfig,
|
|
16
16
|
PowerBiDashboardSourceReport,
|
|
17
17
|
PowerBIPlatformDetail,
|
|
@@ -540,6 +540,7 @@ class SnowflakeV2Source(
|
|
|
540
540
|
identifiers=self.identifiers,
|
|
541
541
|
schema_resolver=schema_resolver,
|
|
542
542
|
discovered_tables=discovered_datasets,
|
|
543
|
+
graph=self.ctx.graph,
|
|
543
544
|
)
|
|
544
545
|
|
|
545
546
|
# TODO: This is slightly suboptimal because we create two SqlParsingAggregator instances with different configs
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from typing import Dict, List, Optional, Union
|
|
3
3
|
|
|
4
|
-
from datahub.emitter.mce_builder import
|
|
4
|
+
from datahub.emitter.mce_builder import (
|
|
5
|
+
make_data_flow_urn,
|
|
6
|
+
make_data_job_urn,
|
|
7
|
+
make_data_platform_urn,
|
|
8
|
+
make_dataplatform_instance_urn,
|
|
9
|
+
)
|
|
5
10
|
from datahub.metadata.schema_classes import (
|
|
6
11
|
DataFlowInfoClass,
|
|
7
12
|
DataJobInfoClass,
|
|
8
13
|
DataJobInputOutputClass,
|
|
14
|
+
DataPlatformInstanceClass,
|
|
9
15
|
)
|
|
10
16
|
|
|
11
17
|
|
|
@@ -204,6 +210,18 @@ class MSSQLDataJob:
|
|
|
204
210
|
status=self.status,
|
|
205
211
|
)
|
|
206
212
|
|
|
213
|
+
@property
|
|
214
|
+
def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
|
|
215
|
+
if self.entity.flow.platform_instance:
|
|
216
|
+
return DataPlatformInstanceClass(
|
|
217
|
+
platform=make_data_platform_urn(self.entity.flow.orchestrator),
|
|
218
|
+
instance=make_dataplatform_instance_urn(
|
|
219
|
+
platform=self.entity.flow.orchestrator,
|
|
220
|
+
instance=self.entity.flow.platform_instance,
|
|
221
|
+
),
|
|
222
|
+
)
|
|
223
|
+
return None
|
|
224
|
+
|
|
207
225
|
|
|
208
226
|
@dataclass
|
|
209
227
|
class MSSQLDataFlow:
|
|
@@ -238,3 +256,14 @@ class MSSQLDataFlow:
|
|
|
238
256
|
customProperties=self.flow_properties,
|
|
239
257
|
externalUrl=self.external_url,
|
|
240
258
|
)
|
|
259
|
+
|
|
260
|
+
@property
|
|
261
|
+
def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
|
|
262
|
+
if self.entity.platform_instance:
|
|
263
|
+
return DataPlatformInstanceClass(
|
|
264
|
+
platform=make_data_platform_urn(self.entity.orchestrator),
|
|
265
|
+
instance=make_dataplatform_instance_urn(
|
|
266
|
+
self.entity.orchestrator, self.entity.platform_instance
|
|
267
|
+
),
|
|
268
|
+
)
|
|
269
|
+
return None
|
|
@@ -639,6 +639,13 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
639
639
|
aspect=data_job.as_datajob_info_aspect,
|
|
640
640
|
).as_workunit()
|
|
641
641
|
|
|
642
|
+
data_platform_instance_aspect = data_job.as_maybe_platform_instance_aspect
|
|
643
|
+
if data_platform_instance_aspect:
|
|
644
|
+
yield MetadataChangeProposalWrapper(
|
|
645
|
+
entityUrn=data_job.urn,
|
|
646
|
+
aspect=data_platform_instance_aspect,
|
|
647
|
+
).as_workunit()
|
|
648
|
+
|
|
642
649
|
if include_lineage:
|
|
643
650
|
yield MetadataChangeProposalWrapper(
|
|
644
651
|
entityUrn=data_job.urn,
|
|
@@ -654,6 +661,13 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
654
661
|
entityUrn=data_flow.urn,
|
|
655
662
|
aspect=data_flow.as_dataflow_info_aspect,
|
|
656
663
|
).as_workunit()
|
|
664
|
+
|
|
665
|
+
data_platform_instance_aspect = data_flow.as_maybe_platform_instance_aspect
|
|
666
|
+
if data_platform_instance_aspect:
|
|
667
|
+
yield MetadataChangeProposalWrapper(
|
|
668
|
+
entityUrn=data_flow.urn,
|
|
669
|
+
aspect=data_platform_instance_aspect,
|
|
670
|
+
).as_workunit()
|
|
657
671
|
# TODO: Add SubType when it appear
|
|
658
672
|
|
|
659
673
|
def get_inspectors(self) -> Iterable[Inspector]:
|
|
@@ -645,7 +645,7 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
|
645
645
|
# the site-role might be different on another site
|
|
646
646
|
logged_in_user: UserInfo = UserInfo.from_server(server=server)
|
|
647
647
|
|
|
648
|
-
if not logged_in_user.
|
|
648
|
+
if not logged_in_user.has_site_administrator_explorer_privileges():
|
|
649
649
|
report.warning(
|
|
650
650
|
title=title,
|
|
651
651
|
message=message,
|
|
@@ -896,10 +896,9 @@ class TableauSiteSource:
|
|
|
896
896
|
return f"/{self.config.env.lower()}{self.no_env_browse_prefix}"
|
|
897
897
|
|
|
898
898
|
def _re_authenticate(self):
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
self.server.auth.sign_in(tableau_auth)
|
|
899
|
+
# Sign-in again may not be enough because Tableau sometimes caches invalid sessions
|
|
900
|
+
# so we need to recreate the Tableau Server object
|
|
901
|
+
self.server = self.config.make_tableau_client(self.site_id)
|
|
903
902
|
|
|
904
903
|
@property
|
|
905
904
|
def site_content_url(self) -> Optional[str]:
|
|
@@ -82,4 +82,6 @@ PROJECT = "Project"
|
|
|
82
82
|
SITE = "Site"
|
|
83
83
|
IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql"
|
|
84
84
|
SITE_PERMISSION = "sitePermission"
|
|
85
|
-
|
|
85
|
+
ROLE_SITE_ADMIN_EXPLORER = "SiteAdministratorExplorer"
|
|
86
|
+
ROLE_SITE_ADMIN_CREATOR = "SiteAdministratorCreator"
|
|
87
|
+
ROLE_SERVER_ADMIN = "ServerAdministrator"
|
|
@@ -11,8 +11,12 @@ class UserInfo:
|
|
|
11
11
|
site_role: str
|
|
12
12
|
site_id: str
|
|
13
13
|
|
|
14
|
-
def
|
|
15
|
-
return self.site_role
|
|
14
|
+
def has_site_administrator_explorer_privileges(self):
|
|
15
|
+
return self.site_role in [
|
|
16
|
+
c.ROLE_SITE_ADMIN_EXPLORER,
|
|
17
|
+
c.ROLE_SITE_ADMIN_CREATOR,
|
|
18
|
+
c.ROLE_SERVER_ADMIN,
|
|
19
|
+
]
|
|
16
20
|
|
|
17
21
|
@staticmethod
|
|
18
22
|
def from_server(server: Server) -> "UserInfo":
|
|
@@ -28,7 +28,7 @@ def check_user_role(
|
|
|
28
28
|
|
|
29
29
|
try:
|
|
30
30
|
# TODO: Add check for `Enable Derived Permissions`
|
|
31
|
-
if not logged_in_user.
|
|
31
|
+
if not logged_in_user.has_site_administrator_explorer_privileges():
|
|
32
32
|
capability_dict[c.SITE_PERMISSION] = CapabilityReport(
|
|
33
33
|
capable=False,
|
|
34
34
|
failure_reason=f"{failure_reason} Their current role is {logged_in_user.site_role}.",
|
|
@@ -490,7 +490,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
490
490
|
self._exit_stack.push(self._query_usage_counts)
|
|
491
491
|
|
|
492
492
|
# Tool Extractor
|
|
493
|
-
self._tool_meta_extractor = ToolMetaExtractor()
|
|
493
|
+
self._tool_meta_extractor = ToolMetaExtractor.create(graph)
|
|
494
494
|
self.report.tool_meta_report = self._tool_meta_extractor.report
|
|
495
495
|
|
|
496
496
|
def close(self) -> None:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
4
|
from dataclasses import dataclass, field
|
|
@@ -5,8 +6,15 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
|
5
6
|
|
|
6
7
|
from typing_extensions import Protocol
|
|
7
8
|
|
|
9
|
+
from datahub.api.entities.platformresource.platform_resource import (
|
|
10
|
+
ElasticPlatformResourceQuery,
|
|
11
|
+
PlatformResource,
|
|
12
|
+
PlatformResourceSearchFields,
|
|
13
|
+
)
|
|
8
14
|
from datahub.ingestion.api.report import Report
|
|
15
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
9
16
|
from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn
|
|
17
|
+
from datahub.utilities.search_utils import LogicalOperator
|
|
10
18
|
from datahub.utilities.stats_collections import int_top_k_dict
|
|
11
19
|
|
|
12
20
|
UrnStr = str
|
|
@@ -31,6 +39,7 @@ def _get_last_line(query: str) -> str:
|
|
|
31
39
|
@dataclass
|
|
32
40
|
class ToolMetaExtractorReport(Report):
|
|
33
41
|
num_queries_meta_extracted: Dict[str, int] = field(default_factory=int_top_k_dict)
|
|
42
|
+
failures: List[str] = field(default_factory=list)
|
|
34
43
|
|
|
35
44
|
|
|
36
45
|
class ToolMetaExtractor:
|
|
@@ -42,14 +51,81 @@ class ToolMetaExtractor:
|
|
|
42
51
|
by warehouse query logs.
|
|
43
52
|
"""
|
|
44
53
|
|
|
45
|
-
def __init__(
|
|
46
|
-
self
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
report: ToolMetaExtractorReport,
|
|
57
|
+
looker_user_mapping: Optional[Dict[str, str]] = None,
|
|
58
|
+
) -> None:
|
|
59
|
+
self.report = report
|
|
47
60
|
self.known_tool_extractors: List[Tuple[str, Callable[[QueryLog], bool]]] = [
|
|
48
61
|
(
|
|
49
62
|
"mode",
|
|
50
63
|
self._extract_mode_query,
|
|
51
|
-
)
|
|
64
|
+
),
|
|
65
|
+
(
|
|
66
|
+
"looker",
|
|
67
|
+
self._extract_looker_query,
|
|
68
|
+
),
|
|
52
69
|
]
|
|
70
|
+
# maps user id (as string) to email address
|
|
71
|
+
self.looker_user_mapping = looker_user_mapping
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def create(
|
|
75
|
+
cls,
|
|
76
|
+
graph: Optional[DataHubGraph] = None,
|
|
77
|
+
) -> "ToolMetaExtractor":
|
|
78
|
+
report = ToolMetaExtractorReport()
|
|
79
|
+
looker_user_mapping = None
|
|
80
|
+
if graph:
|
|
81
|
+
try:
|
|
82
|
+
looker_user_mapping = cls.extract_looker_user_mapping_from_graph(
|
|
83
|
+
graph, report
|
|
84
|
+
)
|
|
85
|
+
except Exception as e:
|
|
86
|
+
report.failures.append(
|
|
87
|
+
f"Unexpected error during Looker user metadata extraction: {str(e)}"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
return cls(report, looker_user_mapping)
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def extract_looker_user_mapping_from_graph(
|
|
94
|
+
cls, graph: DataHubGraph, report: ToolMetaExtractorReport
|
|
95
|
+
) -> Optional[Dict[str, str]]:
|
|
96
|
+
looker_user_mapping = None
|
|
97
|
+
query = (
|
|
98
|
+
ElasticPlatformResourceQuery.create_from()
|
|
99
|
+
.group(LogicalOperator.AND)
|
|
100
|
+
.add_field_match(PlatformResourceSearchFields.PLATFORM, "looker")
|
|
101
|
+
.add_field_match(
|
|
102
|
+
PlatformResourceSearchFields.RESOURCE_TYPE,
|
|
103
|
+
"USER_ID_MAPPING",
|
|
104
|
+
)
|
|
105
|
+
.end()
|
|
106
|
+
)
|
|
107
|
+
platform_resources = list(
|
|
108
|
+
PlatformResource.search_by_filters(query=query, graph_client=graph)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
if len(platform_resources) > 1:
|
|
112
|
+
report.failures.append(
|
|
113
|
+
"Looker user metadata extraction failed. Found more than one looker user id mappings."
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
platform_resource = platform_resources[0]
|
|
117
|
+
|
|
118
|
+
if (
|
|
119
|
+
platform_resource
|
|
120
|
+
and platform_resource.resource_info
|
|
121
|
+
and platform_resource.resource_info.value
|
|
122
|
+
):
|
|
123
|
+
with contextlib.suppress(ValueError, AssertionError):
|
|
124
|
+
value = platform_resource.resource_info.value.as_raw_json()
|
|
125
|
+
if value:
|
|
126
|
+
looker_user_mapping = value
|
|
127
|
+
|
|
128
|
+
return looker_user_mapping
|
|
53
129
|
|
|
54
130
|
def _extract_mode_query(self, entry: QueryLog) -> bool:
|
|
55
131
|
"""
|
|
@@ -78,14 +154,49 @@ class ToolMetaExtractor:
|
|
|
78
154
|
|
|
79
155
|
return True
|
|
80
156
|
|
|
157
|
+
def _extract_looker_query(self, entry: QueryLog) -> bool:
|
|
158
|
+
"""
|
|
159
|
+
Returns:
|
|
160
|
+
bool: whether QueryLog entry is that of looker and looker user info
|
|
161
|
+
is extracted into entry.
|
|
162
|
+
"""
|
|
163
|
+
if not self.looker_user_mapping:
|
|
164
|
+
return False
|
|
165
|
+
|
|
166
|
+
last_line = _get_last_line(entry.query_text)
|
|
167
|
+
|
|
168
|
+
if not (last_line.startswith("--") and "Looker Query Context" in last_line):
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
start_quote_idx = last_line.index("'")
|
|
172
|
+
end_quote_idx = last_line.rindex("'")
|
|
173
|
+
if start_quote_idx == -1 or end_quote_idx == -1:
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
looker_json_raw = last_line[start_quote_idx + 1 : end_quote_idx]
|
|
177
|
+
looker_json = json.loads(looker_json_raw)
|
|
178
|
+
|
|
179
|
+
user_id = str(looker_json["user_id"])
|
|
180
|
+
email = self.looker_user_mapping.get(user_id)
|
|
181
|
+
if not email:
|
|
182
|
+
return False
|
|
183
|
+
|
|
184
|
+
original_user = entry.user
|
|
185
|
+
|
|
186
|
+
entry.user = email_to_user_urn(email)
|
|
187
|
+
entry.extra_info = entry.extra_info or {}
|
|
188
|
+
entry.extra_info["user_via"] = original_user
|
|
189
|
+
|
|
190
|
+
return True
|
|
191
|
+
|
|
81
192
|
def extract_bi_metadata(self, entry: QueryLog) -> bool:
|
|
82
193
|
for tool, meta_extractor in self.known_tool_extractors:
|
|
83
194
|
try:
|
|
84
195
|
if meta_extractor(entry):
|
|
85
196
|
self.report.num_queries_meta_extracted[tool] += 1
|
|
86
197
|
return True
|
|
87
|
-
except Exception:
|
|
88
|
-
logger.debug("Tool metadata extraction failed with error : {e}")
|
|
198
|
+
except Exception as e:
|
|
199
|
+
logger.debug(f"Tool metadata extraction failed with error : {e}")
|
|
89
200
|
return False
|
|
90
201
|
|
|
91
202
|
|