acryl-datahub 1.0.0.2rc5__py3-none-any.whl → 1.0.0.3rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/METADATA +2516 -2516
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/RECORD +24 -23
- datahub/_version.py +1 -1
- datahub/emitter/mcp.py +5 -1
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
- datahub/ingestion/source/hex/api.py +1 -20
- datahub/ingestion/source/mlflow.py +19 -6
- datahub/ingestion/source/powerbi/config.py +12 -0
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
- datahub/ingestion/source/sigma/config.py +75 -6
- datahub/ingestion/source/sigma/sigma.py +16 -1
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/snowflake/snowflake_config.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
- datahub/ingestion/source/snowflake/snowflake_query.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/sql_parsing/sqlglot_utils.py +16 -8
- datahub/testing/mcp_diff.py +15 -2
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/top_level.txt +0 -0
|
@@ -257,7 +257,10 @@ def generate_hash(text: str) -> str:
|
|
|
257
257
|
|
|
258
258
|
|
|
259
259
|
def get_query_fingerprint_debug(
|
|
260
|
-
expression: sqlglot.exp.ExpOrStr,
|
|
260
|
+
expression: sqlglot.exp.ExpOrStr,
|
|
261
|
+
platform: DialectOrStr,
|
|
262
|
+
fast: bool = False,
|
|
263
|
+
secondary_id: Optional[str] = None,
|
|
261
264
|
) -> Tuple[str, Optional[str]]:
|
|
262
265
|
try:
|
|
263
266
|
if not fast:
|
|
@@ -272,16 +275,18 @@ def get_query_fingerprint_debug(
|
|
|
272
275
|
logger.debug("Failed to generalize query for fingerprinting: %s", e)
|
|
273
276
|
expression_sql = None
|
|
274
277
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
)
|
|
278
|
+
text = expression_sql or _expression_to_string(expression, platform=platform)
|
|
279
|
+
if secondary_id:
|
|
280
|
+
text = text + " -- " + secondary_id
|
|
281
|
+
fingerprint = generate_hash(text=text)
|
|
280
282
|
return fingerprint, expression_sql
|
|
281
283
|
|
|
282
284
|
|
|
283
285
|
def get_query_fingerprint(
|
|
284
|
-
expression: sqlglot.exp.ExpOrStr,
|
|
286
|
+
expression: sqlglot.exp.ExpOrStr,
|
|
287
|
+
platform: DialectOrStr,
|
|
288
|
+
fast: bool = False,
|
|
289
|
+
secondary_id: Optional[str] = None,
|
|
285
290
|
) -> str:
|
|
286
291
|
"""Get a fingerprint for a SQL query.
|
|
287
292
|
|
|
@@ -298,12 +303,15 @@ def get_query_fingerprint(
|
|
|
298
303
|
Args:
|
|
299
304
|
expression: The SQL query to fingerprint.
|
|
300
305
|
platform: The SQL dialect to use.
|
|
306
|
+
secondary_id: An optional additional id string to included in the final fingerprint.
|
|
301
307
|
|
|
302
308
|
Returns:
|
|
303
309
|
The fingerprint for the SQL query.
|
|
304
310
|
"""
|
|
305
311
|
|
|
306
|
-
return get_query_fingerprint_debug(
|
|
312
|
+
return get_query_fingerprint_debug(
|
|
313
|
+
expression=expression, platform=platform, fast=fast, secondary_id=secondary_id
|
|
314
|
+
)[0]
|
|
307
315
|
|
|
308
316
|
|
|
309
317
|
@functools.lru_cache(maxsize=FORMAT_QUERY_CACHE_SIZE)
|
datahub/testing/mcp_diff.py
CHANGED
|
@@ -2,7 +2,7 @@ import dataclasses
|
|
|
2
2
|
import json
|
|
3
3
|
import re
|
|
4
4
|
from collections import defaultdict
|
|
5
|
-
from typing import Any, Dict, List, Sequence, Set, Tuple, Union
|
|
5
|
+
from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
|
|
6
6
|
|
|
7
7
|
import deepdiff.serialization
|
|
8
8
|
import yaml
|
|
@@ -34,6 +34,7 @@ class AspectForDiff:
|
|
|
34
34
|
aspect_name: str
|
|
35
35
|
aspect: Dict[str, Any] = dataclasses.field(hash=False)
|
|
36
36
|
delta_info: "DeltaInfo" = dataclasses.field(hash=False, repr=False)
|
|
37
|
+
headers: Optional[Dict[str, str]] = dataclasses.field(default=None, hash=False)
|
|
37
38
|
|
|
38
39
|
@classmethod
|
|
39
40
|
def create_from_mcp(cls, idx: int, obj: Dict[str, Any]) -> "AspectForDiff":
|
|
@@ -44,6 +45,7 @@ class AspectForDiff:
|
|
|
44
45
|
aspect_name=obj["aspectName"],
|
|
45
46
|
aspect=aspect.get("json", aspect),
|
|
46
47
|
delta_info=DeltaInfo(idx=idx, original=obj),
|
|
48
|
+
headers=obj.get("headers"),
|
|
47
49
|
)
|
|
48
50
|
|
|
49
51
|
def __repr__(self):
|
|
@@ -240,9 +242,12 @@ class MCPDiff:
|
|
|
240
242
|
s.append(serialize_aspect(ga.aspect))
|
|
241
243
|
for (i, old, new), diffs in aspect_diffs.aspects_changed.items():
|
|
242
244
|
s.append(self.report_aspect(old, i, "changed") + ":")
|
|
245
|
+
|
|
246
|
+
print_aspects = False
|
|
243
247
|
for diff_level in diffs:
|
|
244
248
|
s.append(self.report_diff_level(diff_level, i))
|
|
245
|
-
|
|
249
|
+
print_aspects |= self.is_diff_level_on_aspect(diff_level)
|
|
250
|
+
if verbose and print_aspects:
|
|
246
251
|
s.append(f"Old aspect:\n{serialize_aspect(old.aspect)}")
|
|
247
252
|
s.append(f"New aspect:\n{serialize_aspect(new.aspect)}")
|
|
248
253
|
|
|
@@ -271,6 +276,14 @@ class MCPDiff:
|
|
|
271
276
|
f"root[{idx}].", ""
|
|
272
277
|
)
|
|
273
278
|
|
|
279
|
+
@staticmethod
|
|
280
|
+
def is_diff_level_on_aspect(diff: DiffLevel) -> bool:
|
|
281
|
+
skip_print_fields = ["changeType", "headers"]
|
|
282
|
+
try:
|
|
283
|
+
return diff.path(output_format="list")[1] not in skip_print_fields
|
|
284
|
+
except IndexError:
|
|
285
|
+
return True
|
|
286
|
+
|
|
274
287
|
|
|
275
288
|
def serialize_aspect(aspect: Union[AspectForDiff, Dict[str, Any]]) -> str:
|
|
276
289
|
if isinstance(aspect, AspectForDiff): # Unpack aspect
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|