acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.0.0.1rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/METADATA +2486 -2487
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/RECORD +64 -49
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/entry_points.txt +2 -1
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -28
- datahub/emitter/request_helper.py +19 -14
- datahub/ingestion/api/source.py +6 -2
- datahub/ingestion/api/source_helpers.py +6 -2
- datahub/ingestion/extractor/schema_util.py +1 -0
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +6 -0
- datahub/ingestion/source/common/subtypes.py +15 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +21 -1
- datahub/ingestion/source/dbt/dbt_common.py +6 -4
- datahub/ingestion/source/dbt/dbt_core.py +4 -6
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_source.py +96 -117
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +394 -0
- datahub/ingestion/source/hex/constants.py +3 -0
- datahub/ingestion/source/hex/hex.py +167 -0
- datahub/ingestion/source/hex/mapper.py +372 -0
- datahub/ingestion/source/hex/model.py +68 -0
- datahub/ingestion/source/iceberg/iceberg.py +62 -66
- datahub/ingestion/source/mlflow.py +198 -7
- datahub/ingestion/source/mode.py +11 -1
- datahub/ingestion/source/openapi.py +69 -34
- datahub/ingestion/source/powerbi/powerbi.py +29 -23
- datahub/ingestion/source/s3/source.py +11 -0
- datahub/ingestion/source/slack/slack.py +399 -82
- datahub/ingestion/source/superset.py +138 -22
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1055 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +68 -0
- datahub/metadata/_schema_classes.py +472 -1
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/schema.avsc +311 -2
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +14 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +95 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +30 -0
- datahub/metadata/schemas/QueryProperties.avsc +20 -0
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/sdk/dataset.py +122 -0
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +27 -3
- datahub/sdk/main_client.py +22 -0
- datahub/sdk/search_filters.py +4 -4
- datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
- datahub/sql_parsing/tool_meta_extractor.py +27 -2
- datahub/testing/mcp_diff.py +1 -18
- datahub/ingestion/source/vertexai.py +0 -697
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1rc2.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/top_level.txt +0 -0
|
@@ -30,6 +30,7 @@ from datahub.metadata.urns import (
|
|
|
30
30
|
DatasetUrn,
|
|
31
31
|
QueryUrn,
|
|
32
32
|
SchemaFieldUrn,
|
|
33
|
+
Urn,
|
|
33
34
|
)
|
|
34
35
|
from datahub.sql_parsing.schema_resolver import (
|
|
35
36
|
SchemaResolver,
|
|
@@ -139,6 +140,8 @@ class QueryMetadata:
|
|
|
139
140
|
|
|
140
141
|
used_temp_tables: bool = True
|
|
141
142
|
|
|
143
|
+
origin: Optional[Urn] = None
|
|
144
|
+
|
|
142
145
|
def make_created_audit_stamp(self) -> models.AuditStampClass:
|
|
143
146
|
return models.AuditStampClass(
|
|
144
147
|
time=make_ts_millis(self.latest_timestamp) or 0,
|
|
@@ -221,6 +224,7 @@ class PreparsedQuery:
|
|
|
221
224
|
)
|
|
222
225
|
# Use this to store addtitional key-value information about query for debugging
|
|
223
226
|
extra_info: Optional[dict] = None
|
|
227
|
+
origin: Optional[Urn] = None
|
|
224
228
|
|
|
225
229
|
|
|
226
230
|
@dataclasses.dataclass
|
|
@@ -903,6 +907,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
903
907
|
column_usage=parsed.column_usage or {},
|
|
904
908
|
confidence_score=parsed.confidence_score,
|
|
905
909
|
used_temp_tables=session_has_temp_tables,
|
|
910
|
+
origin=parsed.origin,
|
|
906
911
|
)
|
|
907
912
|
)
|
|
908
913
|
|
|
@@ -1464,6 +1469,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
1464
1469
|
source=models.QuerySourceClass.SYSTEM,
|
|
1465
1470
|
created=query.make_created_audit_stamp(),
|
|
1466
1471
|
lastModified=query.make_last_modified_audit_stamp(),
|
|
1472
|
+
origin=query.origin.urn() if query.origin else None,
|
|
1467
1473
|
),
|
|
1468
1474
|
models.QuerySubjectsClass(
|
|
1469
1475
|
subjects=[
|
|
@@ -13,7 +13,7 @@ from datahub.api.entities.platformresource.platform_resource import (
|
|
|
13
13
|
)
|
|
14
14
|
from datahub.ingestion.api.report import Report
|
|
15
15
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
16
|
-
from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn
|
|
16
|
+
from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn, DataPlatformUrn, Urn
|
|
17
17
|
from datahub.utilities.search_utils import LogicalOperator
|
|
18
18
|
from datahub.utilities.stats_collections import int_top_k_dict
|
|
19
19
|
|
|
@@ -21,6 +21,10 @@ UrnStr = str
|
|
|
21
21
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
23
23
|
|
|
24
|
+
MODE_PLATFORM_URN = DataPlatformUrn.from_string("urn:li:dataPlatform:mode")
|
|
25
|
+
LOOKER_PLATFORM_URN = DataPlatformUrn.from_string("urn:li:dataPlatform:looker")
|
|
26
|
+
HEX_PLATFORM_URN = DataPlatformUrn.from_string("urn:li:dataPlatform:hex")
|
|
27
|
+
|
|
24
28
|
|
|
25
29
|
class QueryLog(Protocol):
|
|
26
30
|
"""Represents Query Log Entry
|
|
@@ -30,6 +34,7 @@ class QueryLog(Protocol):
|
|
|
30
34
|
query_text: str
|
|
31
35
|
user: Optional[Union[CorpUserUrn, CorpGroupUrn]]
|
|
32
36
|
extra_info: Optional[dict]
|
|
37
|
+
origin: Optional[Urn]
|
|
33
38
|
|
|
34
39
|
|
|
35
40
|
def _get_last_line(query: str) -> str:
|
|
@@ -67,6 +72,10 @@ class ToolMetaExtractor:
|
|
|
67
72
|
"looker",
|
|
68
73
|
self._extract_looker_query,
|
|
69
74
|
),
|
|
75
|
+
(
|
|
76
|
+
"hex",
|
|
77
|
+
self._extract_hex_query,
|
|
78
|
+
),
|
|
70
79
|
]
|
|
71
80
|
# maps user id (as string) to email address
|
|
72
81
|
self.looker_user_mapping = looker_user_mapping
|
|
@@ -153,7 +162,7 @@ class ToolMetaExtractor:
|
|
|
153
162
|
entry.extra_info = entry.extra_info or {}
|
|
154
163
|
entry.extra_info["user_via"] = original_user
|
|
155
164
|
|
|
156
|
-
|
|
165
|
+
entry.origin = MODE_PLATFORM_URN
|
|
157
166
|
|
|
158
167
|
return True
|
|
159
168
|
|
|
@@ -190,6 +199,22 @@ class ToolMetaExtractor:
|
|
|
190
199
|
entry.extra_info = entry.extra_info or {}
|
|
191
200
|
entry.extra_info["user_via"] = original_user
|
|
192
201
|
|
|
202
|
+
entry.origin = LOOKER_PLATFORM_URN
|
|
203
|
+
|
|
204
|
+
return True
|
|
205
|
+
|
|
206
|
+
def _extract_hex_query(self, entry: QueryLog) -> bool:
|
|
207
|
+
"""
|
|
208
|
+
Returns:
|
|
209
|
+
bool: whether QueryLog entry is that of hex.
|
|
210
|
+
"""
|
|
211
|
+
last_line = _get_last_line(entry.query_text)
|
|
212
|
+
|
|
213
|
+
if not last_line.startswith("-- Hex query metadata:"):
|
|
214
|
+
return False
|
|
215
|
+
|
|
216
|
+
entry.origin = HEX_PLATFORM_URN
|
|
217
|
+
|
|
193
218
|
return True
|
|
194
219
|
|
|
195
220
|
def extract_bi_metadata(self, entry: QueryLog) -> bool:
|
datahub/testing/mcp_diff.py
CHANGED
|
@@ -8,7 +8,6 @@ import deepdiff.serialization
|
|
|
8
8
|
import yaml
|
|
9
9
|
from deepdiff import DeepDiff
|
|
10
10
|
from deepdiff.model import DiffLevel
|
|
11
|
-
from deepdiff.operator import BaseOperator
|
|
12
11
|
from typing_extensions import Literal
|
|
13
12
|
|
|
14
13
|
ReportType = Literal[
|
|
@@ -59,27 +58,12 @@ class AspectForDiff:
|
|
|
59
58
|
|
|
60
59
|
@dataclasses.dataclass
|
|
61
60
|
class DeltaInfo:
|
|
62
|
-
"""Information about an MCP used to construct a diff delta.
|
|
63
|
-
|
|
64
|
-
In a separate class so it can be ignored by DeepDiff via MCPDeltaInfoOperator.
|
|
65
|
-
"""
|
|
61
|
+
"""Information about an MCP used to construct a diff delta."""
|
|
66
62
|
|
|
67
63
|
idx: int # Location in list of MCEs in golden file
|
|
68
64
|
original: Dict[str, Any] # Original json-serialized MCP
|
|
69
65
|
|
|
70
66
|
|
|
71
|
-
class DeltaInfoOperator(BaseOperator):
|
|
72
|
-
"""Warning: Doesn't seem to be working right now.
|
|
73
|
-
Ignored via an ignore path as an extra layer of defense.
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
def __init__(self):
|
|
77
|
-
super().__init__(types=[DeltaInfo])
|
|
78
|
-
|
|
79
|
-
def give_up_diffing(self, *args: Any, **kwargs: Any) -> bool:
|
|
80
|
-
return True
|
|
81
|
-
|
|
82
|
-
|
|
83
67
|
AspectsByUrn = Dict[str, Dict[str, List[AspectForDiff]]]
|
|
84
68
|
|
|
85
69
|
|
|
@@ -176,7 +160,6 @@ class MCPDiff:
|
|
|
176
160
|
t2=t2,
|
|
177
161
|
exclude_regex_paths=ignore_paths,
|
|
178
162
|
ignore_order=True,
|
|
179
|
-
custom_operators=[DeltaInfoOperator()],
|
|
180
163
|
)
|
|
181
164
|
if diff:
|
|
182
165
|
aspect_changes[urn][aspect_name] = MCPAspectDiff.create(diff)
|