semantic-link-labs 0.9.10__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/METADATA +28 -21
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/RECORD +38 -31
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +26 -1
- sempy_labs/_delta_analyzer.py +9 -8
- sempy_labs/_dictionary_diffs.py +221 -0
- sempy_labs/_environments.py +19 -1
- sempy_labs/_generate_semantic_model.py +1 -1
- sempy_labs/_helper_functions.py +358 -134
- sempy_labs/_kusto.py +25 -23
- sempy_labs/_list_functions.py +13 -35
- sempy_labs/_model_bpa_rules.py +13 -3
- sempy_labs/_notebooks.py +44 -11
- sempy_labs/_semantic_models.py +93 -1
- sempy_labs/_sql.py +4 -3
- sempy_labs/_tags.py +194 -0
- sempy_labs/_user_delegation_key.py +42 -0
- sempy_labs/_variable_libraries.py +89 -0
- sempy_labs/_vpax.py +388 -0
- sempy_labs/admin/__init__.py +8 -0
- sempy_labs/admin/_tags.py +126 -0
- sempy_labs/directlake/_generate_shared_expression.py +5 -1
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +55 -5
- sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
- sempy_labs/lakehouse/__init__.py +14 -0
- sempy_labs/lakehouse/_blobs.py +100 -85
- sempy_labs/lakehouse/_get_lakehouse_tables.py +1 -13
- sempy_labs/lakehouse/_helper.py +211 -0
- sempy_labs/lakehouse/_lakehouse.py +1 -1
- sempy_labs/lakehouse/_livy_sessions.py +137 -0
- sempy_labs/report/__init__.py +2 -0
- sempy_labs/report/_download_report.py +1 -1
- sempy_labs/report/_generate_report.py +5 -1
- sempy_labs/report/_report_helper.py +27 -128
- sempy_labs/report/_reportwrapper.py +1903 -1165
- sempy_labs/tom/_model.py +83 -21
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +0 -9
- sempy_labs/report/_bpareporttemplate/.platform +0 -11
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/top_level.txt +0 -0
sempy_labs/_environments.py
CHANGED
|
@@ -3,6 +3,7 @@ import sempy_labs._icons as icons
|
|
|
3
3
|
from typing import Optional
|
|
4
4
|
from sempy_labs._helper_functions import (
|
|
5
5
|
resolve_workspace_name_and_id,
|
|
6
|
+
resolve_workspace_id,
|
|
6
7
|
_base_api,
|
|
7
8
|
_create_dataframe,
|
|
8
9
|
resolve_item_id,
|
|
@@ -67,10 +68,16 @@ def list_environments(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
|
|
|
67
68
|
"Environment Name": "string",
|
|
68
69
|
"Environment Id": "string",
|
|
69
70
|
"Description": "string",
|
|
71
|
+
"Publish State": "string",
|
|
72
|
+
"Publish Target Version": "string",
|
|
73
|
+
"Publish Start Time": "string",
|
|
74
|
+
"Publish End Time": "string",
|
|
75
|
+
"Spark Libraries State": "string",
|
|
76
|
+
"Spark Settings State": "string",
|
|
70
77
|
}
|
|
71
78
|
df = _create_dataframe(columns=columns)
|
|
72
79
|
|
|
73
|
-
|
|
80
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
74
81
|
|
|
75
82
|
responses = _base_api(
|
|
76
83
|
request=f"/v1/workspaces/{workspace_id}/environments",
|
|
@@ -80,10 +87,21 @@ def list_environments(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
|
|
|
80
87
|
|
|
81
88
|
for r in responses:
|
|
82
89
|
for v in r.get("value", []):
|
|
90
|
+
pub = v.get("properties", {}).get("publishDetails", {})
|
|
83
91
|
new_data = {
|
|
84
92
|
"Environment Name": v.get("displayName"),
|
|
85
93
|
"Environment Id": v.get("id"),
|
|
86
94
|
"Description": v.get("description"),
|
|
95
|
+
"Publish State": pub.get("state"),
|
|
96
|
+
"Publish Target Version": pub.get("targetVersion"),
|
|
97
|
+
"Publish Start Time": pub.get("startTime"),
|
|
98
|
+
"Publish End Time": pub.get("endTime"),
|
|
99
|
+
"Spark Libraries State": pub.get("componentPublishInfo", {})
|
|
100
|
+
.get("sparkLibraries", {})
|
|
101
|
+
.get("state"),
|
|
102
|
+
"Spark Settings State": pub.get("componentPublishInfo", {})
|
|
103
|
+
.get("sparkSettings", {})
|
|
104
|
+
.get("state"),
|
|
87
105
|
}
|
|
88
106
|
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
89
107
|
|
sempy_labs/_helper_functions.py
CHANGED
|
@@ -17,6 +17,8 @@ import numpy as np
|
|
|
17
17
|
from IPython.display import display, HTML
|
|
18
18
|
import requests
|
|
19
19
|
import sempy_labs._authentication as auth
|
|
20
|
+
from jsonpath_ng.ext import parse
|
|
21
|
+
from jsonpath_ng.jsonpath import Fields, Index
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
def _build_url(url: str, params: dict) -> str:
|
|
@@ -74,6 +76,15 @@ def create_abfss_path(
|
|
|
74
76
|
return path
|
|
75
77
|
|
|
76
78
|
|
|
79
|
+
def create_abfss_path_from_path(
|
|
80
|
+
lakehouse_id: UUID, workspace_id: UUID, file_path: str
|
|
81
|
+
) -> str:
|
|
82
|
+
|
|
83
|
+
fp = _get_default_file_path()
|
|
84
|
+
|
|
85
|
+
return f"abfss://{workspace_id}@{fp}/{lakehouse_id}/{file_path}"
|
|
86
|
+
|
|
87
|
+
|
|
77
88
|
def _get_default_file_path() -> str:
|
|
78
89
|
|
|
79
90
|
default_file_storage = _get_fabric_context_setting(name="fs.defaultFS")
|
|
@@ -266,7 +277,7 @@ def create_item(
|
|
|
266
277
|
lro_return_status_code=True,
|
|
267
278
|
)
|
|
268
279
|
print(
|
|
269
|
-
f"{icons.green_dot} The '{name}' {item_type} has been successfully created within the
|
|
280
|
+
f"{icons.green_dot} The '{name}' {item_type} has been successfully created within the '{workspace_name}' workspace."
|
|
270
281
|
)
|
|
271
282
|
|
|
272
283
|
|
|
@@ -278,10 +289,9 @@ def get_item_definition(
|
|
|
278
289
|
return_dataframe: bool = True,
|
|
279
290
|
decode: bool = True,
|
|
280
291
|
):
|
|
281
|
-
|
|
282
292
|
from sempy_labs._utils import item_types
|
|
283
293
|
|
|
284
|
-
|
|
294
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
285
295
|
item_id = resolve_item_id(item, type, workspace_id)
|
|
286
296
|
item_type_url = item_types.get(type)[1]
|
|
287
297
|
path = item_types.get(type)[2]
|
|
@@ -304,92 +314,11 @@ def get_item_definition(
|
|
|
304
314
|
p.get("payload") for p in result["definition"]["parts"] if p.get("path") == path
|
|
305
315
|
)
|
|
306
316
|
if decode:
|
|
307
|
-
json.loads(_decode_b64(value))
|
|
317
|
+
return json.loads(_decode_b64(value))
|
|
308
318
|
else:
|
|
309
319
|
return value
|
|
310
320
|
|
|
311
321
|
|
|
312
|
-
def resolve_item_id(
|
|
313
|
-
item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
314
|
-
) -> UUID:
|
|
315
|
-
|
|
316
|
-
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
317
|
-
item_id = None
|
|
318
|
-
|
|
319
|
-
if _is_valid_uuid(item):
|
|
320
|
-
# Check (optional)
|
|
321
|
-
item_id = item
|
|
322
|
-
try:
|
|
323
|
-
_base_api(
|
|
324
|
-
request=f"/v1/workspaces/{workspace_id}/items/{item_id}",
|
|
325
|
-
client="fabric_sp",
|
|
326
|
-
)
|
|
327
|
-
except FabricHTTPException:
|
|
328
|
-
raise ValueError(
|
|
329
|
-
f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_name}' workspace."
|
|
330
|
-
)
|
|
331
|
-
else:
|
|
332
|
-
if type is None:
|
|
333
|
-
raise ValueError(
|
|
334
|
-
f"{icons.red_dot} The 'type' parameter is required if specifying an item name."
|
|
335
|
-
)
|
|
336
|
-
responses = _base_api(
|
|
337
|
-
request=f"/v1/workspaces/{workspace_id}/items?type={type}",
|
|
338
|
-
client="fabric_sp",
|
|
339
|
-
uses_pagination=True,
|
|
340
|
-
)
|
|
341
|
-
for r in responses:
|
|
342
|
-
for v in r.get("value", []):
|
|
343
|
-
display_name = v.get("displayName")
|
|
344
|
-
if display_name == item:
|
|
345
|
-
item_id = v.get("id")
|
|
346
|
-
break
|
|
347
|
-
|
|
348
|
-
if item_id is None:
|
|
349
|
-
raise ValueError(
|
|
350
|
-
f"{icons.red_dot} There's no item '{item}' of type '{type}' in the '{workspace_name}' workspace."
|
|
351
|
-
)
|
|
352
|
-
|
|
353
|
-
return item_id
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
def resolve_item_name_and_id(
|
|
357
|
-
item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
358
|
-
) -> Tuple[str, UUID]:
|
|
359
|
-
|
|
360
|
-
workspace_id = resolve_workspace_id(workspace)
|
|
361
|
-
item_id = resolve_item_id(item=item, type=type, workspace=workspace_id)
|
|
362
|
-
item_name = (
|
|
363
|
-
_base_api(
|
|
364
|
-
request=f"/v1/workspaces/{workspace_id}/items/{item_id}", client="fabric_sp"
|
|
365
|
-
)
|
|
366
|
-
.json()
|
|
367
|
-
.get("displayName")
|
|
368
|
-
)
|
|
369
|
-
|
|
370
|
-
return item_name, item_id
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
def resolve_item_name(item_id: UUID, workspace: Optional[str | UUID] = None) -> str:
|
|
374
|
-
|
|
375
|
-
workspace_id = resolve_workspace_id(workspace)
|
|
376
|
-
try:
|
|
377
|
-
item_name = (
|
|
378
|
-
_base_api(
|
|
379
|
-
request=f"/v1/workspaces/{workspace_id}/items/{item_id}",
|
|
380
|
-
client="fabric_sp",
|
|
381
|
-
)
|
|
382
|
-
.json()
|
|
383
|
-
.get("displayName")
|
|
384
|
-
)
|
|
385
|
-
except FabricHTTPException:
|
|
386
|
-
raise ValueError(
|
|
387
|
-
f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_id}' workspace."
|
|
388
|
-
)
|
|
389
|
-
|
|
390
|
-
return item_name
|
|
391
|
-
|
|
392
|
-
|
|
393
322
|
def resolve_lakehouse_name_and_id(
|
|
394
323
|
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
395
324
|
) -> Tuple[str, UUID]:
|
|
@@ -732,7 +661,7 @@ def save_as_delta_table(
|
|
|
732
661
|
"bool": ("pa", pa.bool_(), BooleanType()),
|
|
733
662
|
"boolean": ("pa", pa.bool_(), BooleanType()),
|
|
734
663
|
"date": ("pa", pa.date32(), DateType()),
|
|
735
|
-
"timestamp": ("pa", pa.timestamp("
|
|
664
|
+
"timestamp": ("pa", pa.timestamp("us"), TimestampType()),
|
|
736
665
|
}
|
|
737
666
|
return {k: v[1] if pure_python else v[2] for k, v in common_mapping.items()}
|
|
738
667
|
|
|
@@ -936,6 +865,87 @@ def resolve_workspace_name_and_id(
|
|
|
936
865
|
return workspace_name, workspace_id
|
|
937
866
|
|
|
938
867
|
|
|
868
|
+
def resolve_item_id(
|
|
869
|
+
item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
870
|
+
) -> UUID:
|
|
871
|
+
|
|
872
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
873
|
+
item_id = None
|
|
874
|
+
|
|
875
|
+
if _is_valid_uuid(item):
|
|
876
|
+
# Check (optional)
|
|
877
|
+
item_id = item
|
|
878
|
+
try:
|
|
879
|
+
_base_api(
|
|
880
|
+
request=f"/v1/workspaces/{workspace_id}/items/{item_id}",
|
|
881
|
+
client="fabric_sp",
|
|
882
|
+
)
|
|
883
|
+
except FabricHTTPException:
|
|
884
|
+
raise ValueError(
|
|
885
|
+
f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_name}' workspace."
|
|
886
|
+
)
|
|
887
|
+
else:
|
|
888
|
+
if type is None:
|
|
889
|
+
raise ValueError(
|
|
890
|
+
f"{icons.red_dot} The 'type' parameter is required if specifying an item name."
|
|
891
|
+
)
|
|
892
|
+
responses = _base_api(
|
|
893
|
+
request=f"/v1/workspaces/{workspace_id}/items?type={type}",
|
|
894
|
+
client="fabric_sp",
|
|
895
|
+
uses_pagination=True,
|
|
896
|
+
)
|
|
897
|
+
for r in responses:
|
|
898
|
+
for v in r.get("value", []):
|
|
899
|
+
display_name = v.get("displayName")
|
|
900
|
+
if display_name == item:
|
|
901
|
+
item_id = v.get("id")
|
|
902
|
+
break
|
|
903
|
+
|
|
904
|
+
if item_id is None:
|
|
905
|
+
raise ValueError(
|
|
906
|
+
f"{icons.red_dot} There's no item '{item}' of type '{type}' in the '{workspace_name}' workspace."
|
|
907
|
+
)
|
|
908
|
+
|
|
909
|
+
return item_id
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
def resolve_item_name_and_id(
|
|
913
|
+
item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
914
|
+
) -> Tuple[str, UUID]:
|
|
915
|
+
|
|
916
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
917
|
+
item_id = resolve_item_id(item=item, type=type, workspace=workspace_id)
|
|
918
|
+
item_name = (
|
|
919
|
+
_base_api(
|
|
920
|
+
request=f"/v1/workspaces/{workspace_id}/items/{item_id}", client="fabric_sp"
|
|
921
|
+
)
|
|
922
|
+
.json()
|
|
923
|
+
.get("displayName")
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
return item_name, item_id
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
def resolve_item_name(item_id: UUID, workspace: Optional[str | UUID] = None) -> str:
|
|
930
|
+
|
|
931
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
932
|
+
try:
|
|
933
|
+
item_name = (
|
|
934
|
+
_base_api(
|
|
935
|
+
request=f"/v1/workspaces/{workspace_id}/items/{item_id}",
|
|
936
|
+
client="fabric_sp",
|
|
937
|
+
)
|
|
938
|
+
.json()
|
|
939
|
+
.get("displayName")
|
|
940
|
+
)
|
|
941
|
+
except FabricHTTPException:
|
|
942
|
+
raise ValueError(
|
|
943
|
+
f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_id}' workspace."
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
return item_name
|
|
947
|
+
|
|
948
|
+
|
|
939
949
|
def _extract_json(dataframe: pd.DataFrame) -> dict:
|
|
940
950
|
|
|
941
951
|
payload = dataframe["payload"].iloc[0]
|
|
@@ -1540,38 +1550,18 @@ def _get_column_aggregate(
|
|
|
1540
1550
|
workspace: Optional[str | UUID] = None,
|
|
1541
1551
|
function: str = "max",
|
|
1542
1552
|
default_value: int = 0,
|
|
1553
|
+
schema_name: Optional[str] = None,
|
|
1543
1554
|
) -> int | Dict[str, int]:
|
|
1544
1555
|
|
|
1545
1556
|
workspace_id = resolve_workspace_id(workspace)
|
|
1546
1557
|
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
1547
|
-
path = create_abfss_path(lakehouse_id, workspace_id, table_name)
|
|
1558
|
+
path = create_abfss_path(lakehouse_id, workspace_id, table_name, schema_name)
|
|
1548
1559
|
df = _read_delta_table(path)
|
|
1549
1560
|
|
|
1550
|
-
|
|
1551
|
-
result = _get_aggregate(
|
|
1552
|
-
df=df,
|
|
1553
|
-
column_name=column_name,
|
|
1554
|
-
function=function,
|
|
1555
|
-
default_value=default_value,
|
|
1556
|
-
)
|
|
1557
|
-
elif isinstance(column_name, list):
|
|
1558
|
-
result = {}
|
|
1559
|
-
for col in column_name:
|
|
1560
|
-
result[col] = _get_aggregate(
|
|
1561
|
-
df=df,
|
|
1562
|
-
column_name=col,
|
|
1563
|
-
function=function,
|
|
1564
|
-
default_value=default_value,
|
|
1565
|
-
)
|
|
1566
|
-
else:
|
|
1567
|
-
raise TypeError("column_name must be a string or a list of strings.")
|
|
1568
|
-
|
|
1569
|
-
return result
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
def _get_aggregate(df, column_name, function, default_value: int = 0) -> int:
|
|
1561
|
+
function = function.lower()
|
|
1573
1562
|
|
|
1574
|
-
|
|
1563
|
+
if isinstance(column_name, str):
|
|
1564
|
+
column_name = [column_name]
|
|
1575
1565
|
|
|
1576
1566
|
if _pure_python_notebook():
|
|
1577
1567
|
import polars as pl
|
|
@@ -1581,36 +1571,82 @@ def _get_aggregate(df, column_name, function, default_value: int = 0) -> int:
|
|
|
1581
1571
|
|
|
1582
1572
|
df = pl.from_pandas(df)
|
|
1583
1573
|
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1574
|
+
def get_expr(col):
|
|
1575
|
+
col_dtype = df.schema[col]
|
|
1576
|
+
|
|
1577
|
+
if "approx" in function:
|
|
1578
|
+
return pl.col(col).unique().count().alias(col)
|
|
1579
|
+
elif "distinct" in function:
|
|
1580
|
+
if col_dtype == pl.Decimal:
|
|
1581
|
+
return pl.col(col).cast(pl.Float64).n_unique().alias(col)
|
|
1582
|
+
else:
|
|
1583
|
+
return pl.col(col).n_unique().alias(col)
|
|
1584
|
+
elif function == "sum":
|
|
1585
|
+
return pl.col(col).sum().alias(col)
|
|
1586
|
+
elif function == "min":
|
|
1587
|
+
return pl.col(col).min().alias(col)
|
|
1588
|
+
elif function == "max":
|
|
1589
|
+
return pl.col(col).max().alias(col)
|
|
1590
|
+
elif function == "count":
|
|
1591
|
+
return pl.col(col).count().alias(col)
|
|
1592
|
+
elif function in {"avg", "mean"}:
|
|
1593
|
+
return pl.col(col).mean().alias(col)
|
|
1588
1594
|
else:
|
|
1589
|
-
result = df[column_name].n_unique()
|
|
1590
|
-
elif "APPROX" in function:
|
|
1591
|
-
result = df[column_name].unique().shape[0]
|
|
1592
|
-
else:
|
|
1593
|
-
try:
|
|
1594
|
-
result = getattr(df[column_name], function.lower())()
|
|
1595
|
-
except AttributeError:
|
|
1596
1595
|
raise ValueError(f"Unsupported function: {function}")
|
|
1597
1596
|
|
|
1598
|
-
|
|
1597
|
+
exprs = [get_expr(col) for col in column_name]
|
|
1598
|
+
aggs = df.select(exprs).to_dict(as_series=False)
|
|
1599
|
+
|
|
1600
|
+
if len(column_name) == 1:
|
|
1601
|
+
result = aggs[column_name[0]][0] or default_value
|
|
1602
|
+
else:
|
|
1603
|
+
result = {col: aggs[col][0] for col in column_name}
|
|
1599
1604
|
else:
|
|
1600
|
-
from pyspark.sql.functions import
|
|
1601
|
-
|
|
1605
|
+
from pyspark.sql.functions import (
|
|
1606
|
+
count,
|
|
1607
|
+
sum,
|
|
1608
|
+
min,
|
|
1609
|
+
max,
|
|
1610
|
+
avg,
|
|
1611
|
+
approx_count_distinct,
|
|
1612
|
+
countDistinct,
|
|
1613
|
+
)
|
|
1614
|
+
|
|
1615
|
+
result = None
|
|
1616
|
+
if "approx" in function:
|
|
1617
|
+
spark_func = approx_count_distinct
|
|
1618
|
+
elif "distinct" in function:
|
|
1619
|
+
spark_func = countDistinct
|
|
1620
|
+
elif function == "count":
|
|
1621
|
+
spark_func = count
|
|
1622
|
+
elif function == "sum":
|
|
1623
|
+
spark_func = sum
|
|
1624
|
+
elif function == "min":
|
|
1625
|
+
spark_func = min
|
|
1626
|
+
elif function == "max":
|
|
1627
|
+
spark_func = max
|
|
1628
|
+
elif function == "avg":
|
|
1629
|
+
spark_func = avg
|
|
1630
|
+
else:
|
|
1631
|
+
raise ValueError(f"Unsupported function: {function}")
|
|
1602
1632
|
|
|
1603
|
-
|
|
1604
|
-
|
|
1633
|
+
agg_exprs = []
|
|
1634
|
+
for col in column_name:
|
|
1635
|
+
agg_exprs.append(spark_func(col).alias(col))
|
|
1605
1636
|
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
result = df.select(approx_count_distinct(column_name))
|
|
1637
|
+
aggs = df.agg(*agg_exprs).collect()[0]
|
|
1638
|
+
if len(column_name) == 1:
|
|
1639
|
+
result = aggs[0] or default_value
|
|
1610
1640
|
else:
|
|
1611
|
-
result =
|
|
1641
|
+
result = {col: aggs[col] for col in column_name}
|
|
1642
|
+
|
|
1643
|
+
return result
|
|
1612
1644
|
|
|
1613
|
-
|
|
1645
|
+
|
|
1646
|
+
def _create_spark_dataframe(df: pd.DataFrame):
|
|
1647
|
+
|
|
1648
|
+
spark = _create_spark_session()
|
|
1649
|
+
return spark.createDataFrame(df)
|
|
1614
1650
|
|
|
1615
1651
|
|
|
1616
1652
|
def _make_list_unique(my_list):
|
|
@@ -1705,6 +1741,9 @@ def _process_and_display_chart(df, title, widget):
|
|
|
1705
1741
|
df["Start"] = df["Start"] - Offset
|
|
1706
1742
|
df["End"] = df["End"] - Offset
|
|
1707
1743
|
|
|
1744
|
+
unique_objects = df["Object Name"].nunique()
|
|
1745
|
+
height = min(max(400, unique_objects * 30), 1000)
|
|
1746
|
+
|
|
1708
1747
|
# Vega-Lite spec for Gantt chart
|
|
1709
1748
|
spec = (
|
|
1710
1749
|
"""{
|
|
@@ -1714,7 +1753,9 @@ def _process_and_display_chart(df, title, widget):
|
|
|
1714
1753
|
+ df.to_json(orient="records")
|
|
1715
1754
|
+ """ },
|
|
1716
1755
|
"width": 700,
|
|
1717
|
-
"height":
|
|
1756
|
+
"height": """
|
|
1757
|
+
+ str(height)
|
|
1758
|
+
+ """,
|
|
1718
1759
|
"mark": "bar",
|
|
1719
1760
|
"encoding": {
|
|
1720
1761
|
"y": {
|
|
@@ -2211,3 +2252,186 @@ def _xml_to_dict(element):
|
|
|
2211
2252
|
element.text.strip() if element.text and element.text.strip() else None
|
|
2212
2253
|
)
|
|
2213
2254
|
return data
|
|
2255
|
+
|
|
2256
|
+
|
|
2257
|
+
def file_exists(file_path: str) -> bool:
|
|
2258
|
+
"""
|
|
2259
|
+
Check if a file exists in the given path.
|
|
2260
|
+
|
|
2261
|
+
Parameters
|
|
2262
|
+
----------
|
|
2263
|
+
file_path : str
|
|
2264
|
+
The path to the file.
|
|
2265
|
+
|
|
2266
|
+
Returns
|
|
2267
|
+
-------
|
|
2268
|
+
bool
|
|
2269
|
+
True if the file exists, False otherwise.
|
|
2270
|
+
"""
|
|
2271
|
+
|
|
2272
|
+
import notebookutils
|
|
2273
|
+
|
|
2274
|
+
return len(notebookutils.fs.ls(file_path)) > 0
|
|
2275
|
+
|
|
2276
|
+
|
|
2277
|
+
def generate_number_guid():
|
|
2278
|
+
|
|
2279
|
+
guid = uuid.uuid4()
|
|
2280
|
+
return str(guid.int & ((1 << 64) - 1))
|
|
2281
|
+
|
|
2282
|
+
|
|
2283
|
+
def get_url_content(url: str):
|
|
2284
|
+
|
|
2285
|
+
if "github.com" in url and "/blob/" in url:
|
|
2286
|
+
url = url.replace("github.com", "raw.githubusercontent.com")
|
|
2287
|
+
url = url.replace("/blob/", "/")
|
|
2288
|
+
|
|
2289
|
+
response = requests.get(url)
|
|
2290
|
+
if response.ok:
|
|
2291
|
+
try:
|
|
2292
|
+
data = response.json() # Only works if the response is valid JSON
|
|
2293
|
+
except ValueError:
|
|
2294
|
+
data = response.text # Fallback: get raw text content
|
|
2295
|
+
return data
|
|
2296
|
+
else:
|
|
2297
|
+
print(f"Failed to fetch raw content: {response.status_code}")
|
|
2298
|
+
|
|
2299
|
+
|
|
2300
|
+
def generate_hex(length: int = 10) -> str:
|
|
2301
|
+
"""
|
|
2302
|
+
Generate a random hex string of the specified length. Used for generating IDs for report objects (page, visual, bookmark etc.).
|
|
2303
|
+
"""
|
|
2304
|
+
import secrets
|
|
2305
|
+
|
|
2306
|
+
return secrets.token_hex(length)
|
|
2307
|
+
|
|
2308
|
+
|
|
2309
|
+
def decode_payload(payload):
|
|
2310
|
+
|
|
2311
|
+
if is_base64(payload):
|
|
2312
|
+
try:
|
|
2313
|
+
decoded_payload = json.loads(base64.b64decode(payload).decode("utf-8"))
|
|
2314
|
+
except Exception:
|
|
2315
|
+
decoded_payload = base64.b64decode(payload)
|
|
2316
|
+
elif isinstance(payload, dict):
|
|
2317
|
+
decoded_payload = payload
|
|
2318
|
+
else:
|
|
2319
|
+
raise ValueError("Payload must be a dictionary or a base64 encoded value.")
|
|
2320
|
+
|
|
2321
|
+
return decoded_payload
|
|
2322
|
+
|
|
2323
|
+
|
|
2324
|
+
def is_base64(s):
|
|
2325
|
+
try:
|
|
2326
|
+
# Add padding if needed
|
|
2327
|
+
s_padded = s + "=" * (-len(s) % 4)
|
|
2328
|
+
decoded = base64.b64decode(s_padded, validate=True)
|
|
2329
|
+
# Optional: check if re-encoding gives the original (excluding padding)
|
|
2330
|
+
return base64.b64encode(decoded).decode().rstrip("=") == s.rstrip("=")
|
|
2331
|
+
except Exception:
|
|
2332
|
+
return False
|
|
2333
|
+
|
|
2334
|
+
|
|
2335
|
+
def get_jsonpath_value(
|
|
2336
|
+
data, path, default=None, remove_quotes=False, fix_true: bool = False
|
|
2337
|
+
):
|
|
2338
|
+
matches = parse(path).find(data)
|
|
2339
|
+
result = matches[0].value if matches else default
|
|
2340
|
+
if result and remove_quotes and isinstance(result, str):
|
|
2341
|
+
if result.startswith("'") and result.endswith("'"):
|
|
2342
|
+
result = result[1:-1]
|
|
2343
|
+
if fix_true and isinstance(result, str):
|
|
2344
|
+
if result.lower() == "true":
|
|
2345
|
+
result = True
|
|
2346
|
+
elif result.lower() == "false":
|
|
2347
|
+
result = False
|
|
2348
|
+
return result
|
|
2349
|
+
|
|
2350
|
+
|
|
2351
|
+
def set_json_value(payload: dict, json_path: str, json_value: str | dict | List):
|
|
2352
|
+
|
|
2353
|
+
jsonpath_expr = parse(json_path)
|
|
2354
|
+
matches = jsonpath_expr.find(payload)
|
|
2355
|
+
|
|
2356
|
+
if matches:
|
|
2357
|
+
# Update all matches
|
|
2358
|
+
for match in matches:
|
|
2359
|
+
parent = match.context.value
|
|
2360
|
+
path = match.path
|
|
2361
|
+
if isinstance(path, Fields):
|
|
2362
|
+
parent[path.fields[0]] = json_value
|
|
2363
|
+
elif isinstance(path, Index):
|
|
2364
|
+
parent[path.index] = json_value
|
|
2365
|
+
else:
|
|
2366
|
+
# Handle creation
|
|
2367
|
+
parts = json_path.lstrip("$").strip(".").split(".")
|
|
2368
|
+
current = payload
|
|
2369
|
+
|
|
2370
|
+
for i, part in enumerate(parts):
|
|
2371
|
+
is_last = i == len(parts) - 1
|
|
2372
|
+
|
|
2373
|
+
# Detect list syntax like "lockAspect[*]"
|
|
2374
|
+
list_match = re.match(r"(\w+)\[\*\]", part)
|
|
2375
|
+
if list_match:
|
|
2376
|
+
list_key = list_match.group(1)
|
|
2377
|
+
if list_key not in current or not isinstance(current[list_key], list):
|
|
2378
|
+
# Initialize with one dict element
|
|
2379
|
+
current[list_key] = [{}]
|
|
2380
|
+
|
|
2381
|
+
for item in current[list_key]:
|
|
2382
|
+
if is_last:
|
|
2383
|
+
# Last part, assign value
|
|
2384
|
+
item = json_value
|
|
2385
|
+
else:
|
|
2386
|
+
# Proceed to next level
|
|
2387
|
+
if not isinstance(item, dict):
|
|
2388
|
+
raise ValueError(
|
|
2389
|
+
f"Expected dict in list for key '{list_key}', got {type(item)}"
|
|
2390
|
+
)
|
|
2391
|
+
next_part = ".".join(parts[i + 1 :])
|
|
2392
|
+
set_json_value(item, "$." + next_part, json_value)
|
|
2393
|
+
return payload
|
|
2394
|
+
else:
|
|
2395
|
+
if part not in current or not isinstance(current[part], dict):
|
|
2396
|
+
current[part] = {} if not is_last else json_value
|
|
2397
|
+
elif is_last:
|
|
2398
|
+
current[part] = json_value
|
|
2399
|
+
current = current[part]
|
|
2400
|
+
|
|
2401
|
+
return payload
|
|
2402
|
+
|
|
2403
|
+
|
|
2404
|
+
def remove_json_value(path: str, payload: dict, json_path: str, verbose: bool = True):
|
|
2405
|
+
|
|
2406
|
+
if not isinstance(payload, dict):
|
|
2407
|
+
raise ValueError(
|
|
2408
|
+
f"{icons.red_dot} Cannot apply json_path to non-dictionary payload in '{path}'."
|
|
2409
|
+
)
|
|
2410
|
+
|
|
2411
|
+
jsonpath_expr = parse(json_path)
|
|
2412
|
+
matches = jsonpath_expr.find(payload)
|
|
2413
|
+
|
|
2414
|
+
if not matches and verbose:
|
|
2415
|
+
print(
|
|
2416
|
+
f"{icons.red_dot} No match found for '{json_path}' in '{path}'. Skipping."
|
|
2417
|
+
)
|
|
2418
|
+
return payload
|
|
2419
|
+
|
|
2420
|
+
for match in matches:
|
|
2421
|
+
parent = match.context.value
|
|
2422
|
+
path_expr = match.path
|
|
2423
|
+
|
|
2424
|
+
if isinstance(path_expr, Fields):
|
|
2425
|
+
key = path_expr.fields[0]
|
|
2426
|
+
if key in parent:
|
|
2427
|
+
del parent[key]
|
|
2428
|
+
if verbose:
|
|
2429
|
+
print(f"{icons.green_dot} Removed key '{key}' from '{path}'.")
|
|
2430
|
+
elif isinstance(path_expr, Index):
|
|
2431
|
+
index = path_expr.index
|
|
2432
|
+
if isinstance(parent, list) and 0 <= index < len(parent):
|
|
2433
|
+
parent.pop(index)
|
|
2434
|
+
if verbose:
|
|
2435
|
+
print(f"{icons.green_dot} Removed index [{index}] from '{path}'.")
|
|
2436
|
+
|
|
2437
|
+
return payload
|