acryl-datahub 0.15.0rc25__py3-none-any.whl → 0.15.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1.dist-info}/METADATA +2236 -2240
- {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1.dist-info}/RECORD +116 -106
- {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1.dist-info}/entry_points.txt +1 -1
- datahub/__init__.py +1 -1
- datahub/api/circuit_breaker/assertion_circuit_breaker.py +5 -4
- datahub/api/entities/structuredproperties/structuredproperties.py +20 -8
- datahub/configuration/common.py +2 -5
- datahub/configuration/source_common.py +13 -0
- datahub/emitter/mce_builder.py +20 -4
- datahub/emitter/mcp_builder.py +2 -7
- datahub/emitter/mcp_patch_builder.py +37 -13
- datahub/emitter/rest_emitter.py +25 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +98 -0
- datahub/ingestion/api/closeable.py +3 -3
- datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py +4 -7
- datahub/ingestion/api/report.py +4 -1
- datahub/ingestion/api/sink.py +4 -3
- datahub/ingestion/api/source.py +4 -0
- datahub/ingestion/api/source_helpers.py +2 -6
- datahub/ingestion/glossary/classifier.py +2 -3
- datahub/ingestion/graph/client.py +6 -3
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +44 -1
- datahub/ingestion/source/aws/aws_common.py +231 -27
- datahub/ingestion/source/aws/glue.py +12 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +10 -18
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +3 -9
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -20
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -17
- datahub/ingestion/source/bigquery_v2/lineage.py +9 -22
- datahub/ingestion/source/datahub/config.py +22 -1
- datahub/ingestion/source/datahub/datahub_database_reader.py +3 -17
- datahub/ingestion/source/datahub/datahub_kafka_reader.py +2 -1
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +10 -3
- datahub/ingestion/source/gc/datahub_gc.py +21 -5
- datahub/ingestion/source/gc/dataprocess_cleanup.py +23 -10
- datahub/ingestion/source/gc/execution_request_cleanup.py +61 -16
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +178 -83
- datahub/ingestion/source/iceberg/iceberg.py +27 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +4 -0
- datahub/ingestion/source/kafka_connect/__init__.py +0 -0
- datahub/ingestion/source/kafka_connect/common.py +202 -0
- datahub/ingestion/source/kafka_connect/kafka_connect.py +367 -0
- datahub/ingestion/source/kafka_connect/sink_connectors.py +341 -0
- datahub/ingestion/source/kafka_connect/source_connectors.py +570 -0
- datahub/ingestion/source/looker/looker_common.py +63 -2
- datahub/ingestion/source/looker/looker_dataclasses.py +7 -9
- datahub/ingestion/source/looker/looker_lib_wrapper.py +13 -1
- datahub/ingestion/source/looker/looker_source.py +31 -4
- datahub/ingestion/source/looker/looker_usage.py +23 -17
- datahub/ingestion/source/mlflow.py +30 -5
- datahub/ingestion/source/mode.py +40 -27
- datahub/ingestion/source/powerbi/config.py +1 -14
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +1 -1
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +1 -1
- datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule +16 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +16 -15
- datahub/ingestion/source/s3/source.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +13 -34
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +14 -64
- datahub/ingestion/source/snowflake/snowflake_queries.py +44 -14
- datahub/ingestion/source/snowflake/snowflake_query.py +5 -10
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +53 -7
- datahub/ingestion/source/snowflake/snowflake_shares.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +2 -5
- datahub/ingestion/source/snowflake/snowflake_utils.py +22 -18
- datahub/ingestion/source/snowflake/snowflake_v2.py +38 -34
- datahub/ingestion/source/sql/hive.py +621 -8
- datahub/ingestion/source/sql/hive_metastore.py +7 -0
- datahub/ingestion/source/sql/mssql/job_models.py +30 -1
- datahub/ingestion/source/sql/mssql/source.py +15 -1
- datahub/ingestion/source/sql/sql_common.py +41 -102
- datahub/ingestion/source/sql/sql_generic_profiler.py +5 -6
- datahub/ingestion/source/sql/sql_report.py +2 -0
- datahub/ingestion/source/state/checkpoint.py +2 -1
- datahub/ingestion/source/tableau/tableau.py +122 -45
- datahub/ingestion/source/tableau/tableau_common.py +18 -0
- datahub/ingestion/source/tableau/tableau_constant.py +3 -1
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +6 -2
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/proxy.py +8 -27
- datahub/ingestion/source/usage/usage_common.py +15 -1
- datahub/ingestion/source_report/ingestion_stage.py +3 -0
- datahub/metadata/_schema_classes.py +256 -3
- datahub/metadata/_urns/urn_defs.py +168 -168
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py +2 -0
- datahub/metadata/schema.avsc +252 -33
- datahub/metadata/schemas/DataJobKey.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceKey.avsc +5 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +63 -0
- datahub/metadata/schemas/MLModelGroupProperties.avsc +82 -0
- datahub/metadata/schemas/MLModelProperties.avsc +62 -2
- datahub/metadata/schemas/MLTrainingRunProperties.avsc +171 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +94 -2
- datahub/specific/aspect_helpers/__init__.py +0 -0
- datahub/specific/aspect_helpers/custom_properties.py +79 -0
- datahub/specific/aspect_helpers/ownership.py +67 -0
- datahub/specific/aspect_helpers/structured_properties.py +72 -0
- datahub/specific/aspect_helpers/tags.py +42 -0
- datahub/specific/aspect_helpers/terms.py +43 -0
- datahub/specific/chart.py +28 -184
- datahub/specific/dashboard.py +31 -196
- datahub/specific/datajob.py +34 -189
- datahub/specific/dataproduct.py +24 -86
- datahub/specific/dataset.py +48 -133
- datahub/specific/form.py +12 -32
- datahub/specific/structured_property.py +9 -9
- datahub/sql_parsing/sql_parsing_aggregator.py +10 -9
- datahub/sql_parsing/sqlglot_lineage.py +15 -5
- datahub/sql_parsing/tool_meta_extractor.py +119 -5
- datahub/utilities/time.py +8 -3
- datahub/utilities/urns/_urn_base.py +5 -7
- datahub/ingestion/source/kafka/kafka_connect.py +0 -1468
- datahub/specific/custom_properties.py +0 -37
- datahub/specific/ownership.py +0 -48
- datahub/specific/structured_properties.py +0 -53
- {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1.dist-info}/top_level.txt +0 -0
|
@@ -29,7 +29,7 @@ class StructuredPropertyPatchBuilder(MetadataPatchProposal):
|
|
|
29
29
|
self._add_patch(
|
|
30
30
|
StructuredPropertyDefinition.ASPECT_NAME,
|
|
31
31
|
"add",
|
|
32
|
-
path="
|
|
32
|
+
path=("qualifiedName",),
|
|
33
33
|
value=qualified_name,
|
|
34
34
|
)
|
|
35
35
|
return self
|
|
@@ -41,7 +41,7 @@ class StructuredPropertyPatchBuilder(MetadataPatchProposal):
|
|
|
41
41
|
self._add_patch(
|
|
42
42
|
StructuredPropertyDefinition.ASPECT_NAME,
|
|
43
43
|
"add",
|
|
44
|
-
path="
|
|
44
|
+
path=("displayName",),
|
|
45
45
|
value=display_name,
|
|
46
46
|
)
|
|
47
47
|
return self
|
|
@@ -53,7 +53,7 @@ class StructuredPropertyPatchBuilder(MetadataPatchProposal):
|
|
|
53
53
|
self._add_patch(
|
|
54
54
|
StructuredPropertyDefinition.ASPECT_NAME,
|
|
55
55
|
"add",
|
|
56
|
-
path="
|
|
56
|
+
path=("valueType",),
|
|
57
57
|
value=value_type,
|
|
58
58
|
)
|
|
59
59
|
return self
|
|
@@ -66,7 +66,7 @@ class StructuredPropertyPatchBuilder(MetadataPatchProposal):
|
|
|
66
66
|
self._add_patch(
|
|
67
67
|
StructuredPropertyDefinition.ASPECT_NAME,
|
|
68
68
|
"add",
|
|
69
|
-
path="
|
|
69
|
+
path=("typeQualifier",),
|
|
70
70
|
value=type_qualifier,
|
|
71
71
|
)
|
|
72
72
|
return self
|
|
@@ -78,7 +78,7 @@ class StructuredPropertyPatchBuilder(MetadataPatchProposal):
|
|
|
78
78
|
self._add_patch(
|
|
79
79
|
StructuredPropertyDefinition.ASPECT_NAME,
|
|
80
80
|
"add",
|
|
81
|
-
path=
|
|
81
|
+
path=("allowedValues", str(allowed_value.get("value"))),
|
|
82
82
|
value=allowed_value,
|
|
83
83
|
)
|
|
84
84
|
return self
|
|
@@ -87,7 +87,7 @@ class StructuredPropertyPatchBuilder(MetadataPatchProposal):
|
|
|
87
87
|
self._add_patch(
|
|
88
88
|
StructuredPropertyDefinition.ASPECT_NAME,
|
|
89
89
|
"add",
|
|
90
|
-
path="
|
|
90
|
+
path=("cardinality",),
|
|
91
91
|
value=cardinality,
|
|
92
92
|
)
|
|
93
93
|
return self
|
|
@@ -98,7 +98,7 @@ class StructuredPropertyPatchBuilder(MetadataPatchProposal):
|
|
|
98
98
|
self._add_patch(
|
|
99
99
|
StructuredPropertyDefinition.ASPECT_NAME,
|
|
100
100
|
"add",
|
|
101
|
-
path=
|
|
101
|
+
path=("entityTypes", str(entity_type)),
|
|
102
102
|
value=entity_type,
|
|
103
103
|
)
|
|
104
104
|
return self
|
|
@@ -110,7 +110,7 @@ class StructuredPropertyPatchBuilder(MetadataPatchProposal):
|
|
|
110
110
|
self._add_patch(
|
|
111
111
|
StructuredPropertyDefinition.ASPECT_NAME,
|
|
112
112
|
"add",
|
|
113
|
-
path="
|
|
113
|
+
path=("description",),
|
|
114
114
|
value=description,
|
|
115
115
|
)
|
|
116
116
|
return self
|
|
@@ -119,7 +119,7 @@ class StructuredPropertyPatchBuilder(MetadataPatchProposal):
|
|
|
119
119
|
self._add_patch(
|
|
120
120
|
StructuredPropertyDefinition.ASPECT_NAME,
|
|
121
121
|
"add",
|
|
122
|
-
path="
|
|
122
|
+
path=("immutable",),
|
|
123
123
|
value=immutable,
|
|
124
124
|
)
|
|
125
125
|
return self
|
|
@@ -165,6 +165,7 @@ class KnownQueryLineageInfo:
|
|
|
165
165
|
timestamp: Optional[datetime] = None
|
|
166
166
|
session_id: Optional[str] = None
|
|
167
167
|
query_type: QueryType = QueryType.UNKNOWN
|
|
168
|
+
query_id: Optional[str] = None
|
|
168
169
|
|
|
169
170
|
|
|
170
171
|
@dataclasses.dataclass
|
|
@@ -198,7 +199,7 @@ class TableSwap:
|
|
|
198
199
|
|
|
199
200
|
@dataclasses.dataclass
|
|
200
201
|
class PreparsedQuery:
|
|
201
|
-
# If not provided, we will generate one using the
|
|
202
|
+
# If not provided, we will generate one using the fingerprint generator.
|
|
202
203
|
query_id: Optional[QueryId]
|
|
203
204
|
|
|
204
205
|
query_text: str
|
|
@@ -490,7 +491,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
490
491
|
self._exit_stack.push(self._query_usage_counts)
|
|
491
492
|
|
|
492
493
|
# Tool Extractor
|
|
493
|
-
self._tool_meta_extractor = ToolMetaExtractor()
|
|
494
|
+
self._tool_meta_extractor = ToolMetaExtractor.create(graph)
|
|
494
495
|
self.report.tool_meta_report = self._tool_meta_extractor.report
|
|
495
496
|
|
|
496
497
|
def close(self) -> None:
|
|
@@ -618,12 +619,13 @@ class SqlParsingAggregator(Closeable):
|
|
|
618
619
|
self.report.num_known_query_lineage += 1
|
|
619
620
|
|
|
620
621
|
# Generate a fingerprint for the query.
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
622
|
+
query_fingerprint = known_query_lineage.query_id
|
|
623
|
+
if not query_fingerprint:
|
|
624
|
+
with self.report.sql_fingerprinting_timer:
|
|
625
|
+
query_fingerprint = get_query_fingerprint(
|
|
626
|
+
known_query_lineage.query_text,
|
|
627
|
+
platform=self.platform.platform_name,
|
|
628
|
+
)
|
|
627
629
|
formatted_query = self._maybe_format_query(known_query_lineage.query_text)
|
|
628
630
|
|
|
629
631
|
# Register the query.
|
|
@@ -848,7 +850,6 @@ class SqlParsingAggregator(Closeable):
|
|
|
848
850
|
query_fingerprint = get_query_fingerprint(
|
|
849
851
|
parsed.query_text,
|
|
850
852
|
platform=self.platform.platform_name,
|
|
851
|
-
fast=True,
|
|
852
853
|
)
|
|
853
854
|
|
|
854
855
|
# Format the query.
|
|
@@ -66,6 +66,7 @@ SQL_LINEAGE_TIMEOUT_ENABLED = get_boolean_env_variable(
|
|
|
66
66
|
"SQL_LINEAGE_TIMEOUT_ENABLED", True
|
|
67
67
|
)
|
|
68
68
|
SQL_LINEAGE_TIMEOUT_SECONDS = 10
|
|
69
|
+
SQL_PARSER_TRACE = get_boolean_env_variable("DATAHUB_SQL_PARSER_TRACE", False)
|
|
69
70
|
|
|
70
71
|
|
|
71
72
|
# These rules are a subset of the rules in sqlglot.optimizer.optimizer.RULES.
|
|
@@ -365,10 +366,11 @@ def _prepare_query_columns(
|
|
|
365
366
|
|
|
366
367
|
return node
|
|
367
368
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
369
|
+
if SQL_PARSER_TRACE:
|
|
370
|
+
logger.debug(
|
|
371
|
+
"Prior to case normalization sql %s",
|
|
372
|
+
statement.sql(pretty=True, dialect=dialect),
|
|
373
|
+
)
|
|
372
374
|
statement = statement.transform(_sqlglot_force_column_normalizer, copy=False)
|
|
373
375
|
# logger.debug(
|
|
374
376
|
# "Sql after casing normalization %s",
|
|
@@ -562,7 +564,7 @@ def _select_statement_cll( # noqa: C901
|
|
|
562
564
|
)
|
|
563
565
|
)
|
|
564
566
|
|
|
565
|
-
# TODO: Also extract referenced columns (aka
|
|
567
|
+
# TODO: Also extract referenced columns (aka auxiliary / non-SELECT lineage)
|
|
566
568
|
except (sqlglot.errors.OptimizeError, ValueError, IndexError) as e:
|
|
567
569
|
raise SqlUnderstandingError(
|
|
568
570
|
f"sqlglot failed to compute some lineage: {e}"
|
|
@@ -1022,6 +1024,14 @@ def _sqlglot_lineage_inner(
|
|
|
1022
1024
|
logger.debug(
|
|
1023
1025
|
f"Resolved {total_schemas_resolved} of {total_tables_discovered} table schemas"
|
|
1024
1026
|
)
|
|
1027
|
+
if SQL_PARSER_TRACE:
|
|
1028
|
+
for qualified_table, schema_info in table_name_schema_mapping.items():
|
|
1029
|
+
logger.debug(
|
|
1030
|
+
"Table name %s resolved to %s with schema %s",
|
|
1031
|
+
qualified_table,
|
|
1032
|
+
table_name_urn_mapping[qualified_table],
|
|
1033
|
+
schema_info,
|
|
1034
|
+
)
|
|
1025
1035
|
|
|
1026
1036
|
column_lineage: Optional[List[_ColumnLineageInfo]] = None
|
|
1027
1037
|
try:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
4
|
from dataclasses import dataclass, field
|
|
@@ -5,8 +6,15 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
|
5
6
|
|
|
6
7
|
from typing_extensions import Protocol
|
|
7
8
|
|
|
9
|
+
from datahub.api.entities.platformresource.platform_resource import (
|
|
10
|
+
ElasticPlatformResourceQuery,
|
|
11
|
+
PlatformResource,
|
|
12
|
+
PlatformResourceSearchFields,
|
|
13
|
+
)
|
|
8
14
|
from datahub.ingestion.api.report import Report
|
|
15
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
9
16
|
from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn
|
|
17
|
+
from datahub.utilities.search_utils import LogicalOperator
|
|
10
18
|
from datahub.utilities.stats_collections import int_top_k_dict
|
|
11
19
|
|
|
12
20
|
UrnStr = str
|
|
@@ -31,6 +39,8 @@ def _get_last_line(query: str) -> str:
|
|
|
31
39
|
@dataclass
|
|
32
40
|
class ToolMetaExtractorReport(Report):
|
|
33
41
|
num_queries_meta_extracted: Dict[str, int] = field(default_factory=int_top_k_dict)
|
|
42
|
+
failures: List[str] = field(default_factory=list)
|
|
43
|
+
looker_user_mapping_missing: Optional[bool] = None
|
|
34
44
|
|
|
35
45
|
|
|
36
46
|
class ToolMetaExtractor:
|
|
@@ -42,14 +52,83 @@ class ToolMetaExtractor:
|
|
|
42
52
|
by warehouse query logs.
|
|
43
53
|
"""
|
|
44
54
|
|
|
45
|
-
def __init__(
|
|
46
|
-
self
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
report: ToolMetaExtractorReport,
|
|
58
|
+
looker_user_mapping: Optional[Dict[str, str]] = None,
|
|
59
|
+
) -> None:
|
|
60
|
+
self.report = report
|
|
47
61
|
self.known_tool_extractors: List[Tuple[str, Callable[[QueryLog], bool]]] = [
|
|
48
62
|
(
|
|
49
63
|
"mode",
|
|
50
64
|
self._extract_mode_query,
|
|
51
|
-
)
|
|
65
|
+
),
|
|
66
|
+
(
|
|
67
|
+
"looker",
|
|
68
|
+
self._extract_looker_query,
|
|
69
|
+
),
|
|
52
70
|
]
|
|
71
|
+
# maps user id (as string) to email address
|
|
72
|
+
self.looker_user_mapping = looker_user_mapping
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def create(
|
|
76
|
+
cls,
|
|
77
|
+
graph: Optional[DataHubGraph] = None,
|
|
78
|
+
) -> "ToolMetaExtractor":
|
|
79
|
+
report = ToolMetaExtractorReport()
|
|
80
|
+
looker_user_mapping = None
|
|
81
|
+
if graph:
|
|
82
|
+
try:
|
|
83
|
+
looker_user_mapping = cls.extract_looker_user_mapping_from_graph(
|
|
84
|
+
graph, report
|
|
85
|
+
)
|
|
86
|
+
except Exception as e:
|
|
87
|
+
report.failures.append(
|
|
88
|
+
f"Unexpected error during Looker user metadata extraction: {str(e)}"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return cls(report, looker_user_mapping)
|
|
92
|
+
|
|
93
|
+
@classmethod
|
|
94
|
+
def extract_looker_user_mapping_from_graph(
|
|
95
|
+
cls, graph: DataHubGraph, report: ToolMetaExtractorReport
|
|
96
|
+
) -> Optional[Dict[str, str]]:
|
|
97
|
+
looker_user_mapping = None
|
|
98
|
+
query = (
|
|
99
|
+
ElasticPlatformResourceQuery.create_from()
|
|
100
|
+
.group(LogicalOperator.AND)
|
|
101
|
+
.add_field_match(PlatformResourceSearchFields.PLATFORM, "looker")
|
|
102
|
+
.add_field_match(
|
|
103
|
+
PlatformResourceSearchFields.RESOURCE_TYPE,
|
|
104
|
+
"USER_ID_MAPPING",
|
|
105
|
+
)
|
|
106
|
+
.end()
|
|
107
|
+
)
|
|
108
|
+
platform_resources = list(
|
|
109
|
+
PlatformResource.search_by_filters(query=query, graph_client=graph)
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
if len(platform_resources) == 0:
|
|
113
|
+
report.looker_user_mapping_missing = True
|
|
114
|
+
elif len(platform_resources) > 1:
|
|
115
|
+
report.failures.append(
|
|
116
|
+
"Looker user metadata extraction failed. Found more than one looker user id mappings."
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
platform_resource = platform_resources[0]
|
|
120
|
+
|
|
121
|
+
if (
|
|
122
|
+
platform_resource
|
|
123
|
+
and platform_resource.resource_info
|
|
124
|
+
and platform_resource.resource_info.value
|
|
125
|
+
):
|
|
126
|
+
with contextlib.suppress(ValueError, AssertionError):
|
|
127
|
+
value = platform_resource.resource_info.value.as_raw_json()
|
|
128
|
+
if value:
|
|
129
|
+
looker_user_mapping = value
|
|
130
|
+
|
|
131
|
+
return looker_user_mapping
|
|
53
132
|
|
|
54
133
|
def _extract_mode_query(self, entry: QueryLog) -> bool:
|
|
55
134
|
"""
|
|
@@ -78,14 +157,49 @@ class ToolMetaExtractor:
|
|
|
78
157
|
|
|
79
158
|
return True
|
|
80
159
|
|
|
160
|
+
def _extract_looker_query(self, entry: QueryLog) -> bool:
|
|
161
|
+
"""
|
|
162
|
+
Returns:
|
|
163
|
+
bool: whether QueryLog entry is that of looker and looker user info
|
|
164
|
+
is extracted into entry.
|
|
165
|
+
"""
|
|
166
|
+
if not self.looker_user_mapping:
|
|
167
|
+
return False
|
|
168
|
+
|
|
169
|
+
last_line = _get_last_line(entry.query_text)
|
|
170
|
+
|
|
171
|
+
if not (last_line.startswith("--") and "Looker Query Context" in last_line):
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
start_quote_idx = last_line.index("'")
|
|
175
|
+
end_quote_idx = last_line.rindex("'")
|
|
176
|
+
if start_quote_idx == -1 or end_quote_idx == -1:
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
looker_json_raw = last_line[start_quote_idx + 1 : end_quote_idx]
|
|
180
|
+
looker_json = json.loads(looker_json_raw)
|
|
181
|
+
|
|
182
|
+
user_id = str(looker_json["user_id"])
|
|
183
|
+
email = self.looker_user_mapping.get(user_id)
|
|
184
|
+
if not email:
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
original_user = entry.user
|
|
188
|
+
|
|
189
|
+
entry.user = email_to_user_urn(email)
|
|
190
|
+
entry.extra_info = entry.extra_info or {}
|
|
191
|
+
entry.extra_info["user_via"] = original_user
|
|
192
|
+
|
|
193
|
+
return True
|
|
194
|
+
|
|
81
195
|
def extract_bi_metadata(self, entry: QueryLog) -> bool:
|
|
82
196
|
for tool, meta_extractor in self.known_tool_extractors:
|
|
83
197
|
try:
|
|
84
198
|
if meta_extractor(entry):
|
|
85
199
|
self.report.num_queries_meta_extracted[tool] += 1
|
|
86
200
|
return True
|
|
87
|
-
except Exception:
|
|
88
|
-
logger.debug("Tool metadata extraction failed with error : {e}")
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.debug(f"Tool metadata extraction failed with error : {e}")
|
|
89
203
|
return False
|
|
90
204
|
|
|
91
205
|
|
datahub/utilities/time.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from datetime import datetime
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from datahub.emitter.mce_builder import make_ts_millis, parse_ts_millis
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
def get_current_time_in_seconds() -> int:
|
|
@@ -9,12 +11,15 @@ def get_current_time_in_seconds() -> int:
|
|
|
9
11
|
|
|
10
12
|
def ts_millis_to_datetime(ts_millis: int) -> datetime:
|
|
11
13
|
"""Converts input timestamp in milliseconds to a datetime object with UTC timezone"""
|
|
12
|
-
return
|
|
14
|
+
return parse_ts_millis(ts_millis)
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
def datetime_to_ts_millis(dt: datetime) -> int:
|
|
16
18
|
"""Converts a datetime object to timestamp in milliseconds"""
|
|
17
|
-
|
|
19
|
+
# TODO: Deprecate these helpers in favor of make_ts_millis and parse_ts_millis.
|
|
20
|
+
# The other ones support None with a typing overload.
|
|
21
|
+
# Also possibly move those helpers to this file.
|
|
22
|
+
return make_ts_millis(dt)
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
@dataclass
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import urllib.parse
|
|
3
3
|
from abc import abstractmethod
|
|
4
|
-
from typing import ClassVar, Dict, List, Optional, Type
|
|
4
|
+
from typing import ClassVar, Dict, List, Optional, Type
|
|
5
5
|
|
|
6
6
|
from deprecated import deprecated
|
|
7
|
+
from typing_extensions import Self
|
|
7
8
|
|
|
8
9
|
from datahub.utilities.urns.error import InvalidUrnError
|
|
9
10
|
|
|
@@ -42,9 +43,6 @@ def _split_entity_id(entity_id: str) -> List[str]:
|
|
|
42
43
|
return parts
|
|
43
44
|
|
|
44
45
|
|
|
45
|
-
_UrnSelf = TypeVar("_UrnSelf", bound="Urn")
|
|
46
|
-
|
|
47
|
-
|
|
48
46
|
@functools.total_ordering
|
|
49
47
|
class Urn:
|
|
50
48
|
"""
|
|
@@ -88,7 +86,7 @@ class Urn:
|
|
|
88
86
|
return self._entity_ids
|
|
89
87
|
|
|
90
88
|
@classmethod
|
|
91
|
-
def from_string(cls
|
|
89
|
+
def from_string(cls, urn_str: str) -> Self:
|
|
92
90
|
"""
|
|
93
91
|
Creates an Urn from its string representation.
|
|
94
92
|
|
|
@@ -174,7 +172,7 @@ class Urn:
|
|
|
174
172
|
|
|
175
173
|
@classmethod
|
|
176
174
|
@deprecated(reason="prefer .from_string")
|
|
177
|
-
def create_from_string(cls
|
|
175
|
+
def create_from_string(cls, urn_str: str) -> Self:
|
|
178
176
|
return cls.from_string(urn_str)
|
|
179
177
|
|
|
180
178
|
@deprecated(reason="prefer .entity_ids")
|
|
@@ -270,5 +268,5 @@ class _SpecificUrn(Urn):
|
|
|
270
268
|
|
|
271
269
|
@classmethod
|
|
272
270
|
@abstractmethod
|
|
273
|
-
def _parse_ids(cls
|
|
271
|
+
def _parse_ids(cls, entity_ids: List[str]) -> Self:
|
|
274
272
|
raise NotImplementedError()
|