semantic-link-labs 0.9.10__py3-none-any.whl → 0.9.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.9.11.dist-info}/METADATA +27 -21
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.9.11.dist-info}/RECORD +34 -29
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.9.11.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +22 -1
- sempy_labs/_delta_analyzer.py +9 -8
- sempy_labs/_environments.py +19 -1
- sempy_labs/_generate_semantic_model.py +1 -1
- sempy_labs/_helper_functions.py +193 -134
- sempy_labs/_kusto.py +25 -23
- sempy_labs/_list_functions.py +13 -35
- sempy_labs/_model_bpa_rules.py +13 -3
- sempy_labs/_notebooks.py +44 -11
- sempy_labs/_semantic_models.py +93 -1
- sempy_labs/_sql.py +3 -2
- sempy_labs/_tags.py +194 -0
- sempy_labs/_variable_libraries.py +89 -0
- sempy_labs/_vpax.py +386 -0
- sempy_labs/admin/__init__.py +8 -0
- sempy_labs/admin/_tags.py +126 -0
- sempy_labs/directlake/_generate_shared_expression.py +5 -1
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +55 -5
- sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
- sempy_labs/lakehouse/__init__.py +16 -0
- sempy_labs/lakehouse/_blobs.py +115 -63
- sempy_labs/lakehouse/_get_lakehouse_tables.py +1 -13
- sempy_labs/lakehouse/_helper.py +211 -0
- sempy_labs/lakehouse/_lakehouse.py +1 -1
- sempy_labs/lakehouse/_livy_sessions.py +137 -0
- sempy_labs/report/_download_report.py +1 -1
- sempy_labs/report/_generate_report.py +5 -1
- sempy_labs/report/_reportwrapper.py +31 -18
- sempy_labs/tom/_model.py +83 -21
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +0 -9
- sempy_labs/report/_bpareporttemplate/.platform +0 -11
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.9.11.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.9.11.dist-info}/top_level.txt +0 -0
sempy_labs/_helper_functions.py
CHANGED
|
@@ -74,6 +74,15 @@ def create_abfss_path(
|
|
|
74
74
|
return path
|
|
75
75
|
|
|
76
76
|
|
|
77
|
+
def create_abfss_path_from_path(
|
|
78
|
+
lakehouse_id: UUID, workspace_id: UUID, file_path: str
|
|
79
|
+
) -> str:
|
|
80
|
+
|
|
81
|
+
fp = _get_default_file_path()
|
|
82
|
+
|
|
83
|
+
return f"abfss://{workspace_id}@{fp}/{lakehouse_id}/{file_path}"
|
|
84
|
+
|
|
85
|
+
|
|
77
86
|
def _get_default_file_path() -> str:
|
|
78
87
|
|
|
79
88
|
default_file_storage = _get_fabric_context_setting(name="fs.defaultFS")
|
|
@@ -266,7 +275,7 @@ def create_item(
|
|
|
266
275
|
lro_return_status_code=True,
|
|
267
276
|
)
|
|
268
277
|
print(
|
|
269
|
-
f"{icons.green_dot} The '{name}' {item_type} has been successfully created within the
|
|
278
|
+
f"{icons.green_dot} The '{name}' {item_type} has been successfully created within the '{workspace_name}' workspace."
|
|
270
279
|
)
|
|
271
280
|
|
|
272
281
|
|
|
@@ -278,10 +287,9 @@ def get_item_definition(
|
|
|
278
287
|
return_dataframe: bool = True,
|
|
279
288
|
decode: bool = True,
|
|
280
289
|
):
|
|
281
|
-
|
|
282
290
|
from sempy_labs._utils import item_types
|
|
283
291
|
|
|
284
|
-
|
|
292
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
285
293
|
item_id = resolve_item_id(item, type, workspace_id)
|
|
286
294
|
item_type_url = item_types.get(type)[1]
|
|
287
295
|
path = item_types.get(type)[2]
|
|
@@ -304,92 +312,11 @@ def get_item_definition(
|
|
|
304
312
|
p.get("payload") for p in result["definition"]["parts"] if p.get("path") == path
|
|
305
313
|
)
|
|
306
314
|
if decode:
|
|
307
|
-
json.loads(_decode_b64(value))
|
|
315
|
+
return json.loads(_decode_b64(value))
|
|
308
316
|
else:
|
|
309
317
|
return value
|
|
310
318
|
|
|
311
319
|
|
|
312
|
-
def resolve_item_id(
|
|
313
|
-
item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
314
|
-
) -> UUID:
|
|
315
|
-
|
|
316
|
-
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
317
|
-
item_id = None
|
|
318
|
-
|
|
319
|
-
if _is_valid_uuid(item):
|
|
320
|
-
# Check (optional)
|
|
321
|
-
item_id = item
|
|
322
|
-
try:
|
|
323
|
-
_base_api(
|
|
324
|
-
request=f"/v1/workspaces/{workspace_id}/items/{item_id}",
|
|
325
|
-
client="fabric_sp",
|
|
326
|
-
)
|
|
327
|
-
except FabricHTTPException:
|
|
328
|
-
raise ValueError(
|
|
329
|
-
f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_name}' workspace."
|
|
330
|
-
)
|
|
331
|
-
else:
|
|
332
|
-
if type is None:
|
|
333
|
-
raise ValueError(
|
|
334
|
-
f"{icons.red_dot} The 'type' parameter is required if specifying an item name."
|
|
335
|
-
)
|
|
336
|
-
responses = _base_api(
|
|
337
|
-
request=f"/v1/workspaces/{workspace_id}/items?type={type}",
|
|
338
|
-
client="fabric_sp",
|
|
339
|
-
uses_pagination=True,
|
|
340
|
-
)
|
|
341
|
-
for r in responses:
|
|
342
|
-
for v in r.get("value", []):
|
|
343
|
-
display_name = v.get("displayName")
|
|
344
|
-
if display_name == item:
|
|
345
|
-
item_id = v.get("id")
|
|
346
|
-
break
|
|
347
|
-
|
|
348
|
-
if item_id is None:
|
|
349
|
-
raise ValueError(
|
|
350
|
-
f"{icons.red_dot} There's no item '{item}' of type '{type}' in the '{workspace_name}' workspace."
|
|
351
|
-
)
|
|
352
|
-
|
|
353
|
-
return item_id
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
def resolve_item_name_and_id(
|
|
357
|
-
item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
358
|
-
) -> Tuple[str, UUID]:
|
|
359
|
-
|
|
360
|
-
workspace_id = resolve_workspace_id(workspace)
|
|
361
|
-
item_id = resolve_item_id(item=item, type=type, workspace=workspace_id)
|
|
362
|
-
item_name = (
|
|
363
|
-
_base_api(
|
|
364
|
-
request=f"/v1/workspaces/{workspace_id}/items/{item_id}", client="fabric_sp"
|
|
365
|
-
)
|
|
366
|
-
.json()
|
|
367
|
-
.get("displayName")
|
|
368
|
-
)
|
|
369
|
-
|
|
370
|
-
return item_name, item_id
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
def resolve_item_name(item_id: UUID, workspace: Optional[str | UUID] = None) -> str:
|
|
374
|
-
|
|
375
|
-
workspace_id = resolve_workspace_id(workspace)
|
|
376
|
-
try:
|
|
377
|
-
item_name = (
|
|
378
|
-
_base_api(
|
|
379
|
-
request=f"/v1/workspaces/{workspace_id}/items/{item_id}",
|
|
380
|
-
client="fabric_sp",
|
|
381
|
-
)
|
|
382
|
-
.json()
|
|
383
|
-
.get("displayName")
|
|
384
|
-
)
|
|
385
|
-
except FabricHTTPException:
|
|
386
|
-
raise ValueError(
|
|
387
|
-
f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_id}' workspace."
|
|
388
|
-
)
|
|
389
|
-
|
|
390
|
-
return item_name
|
|
391
|
-
|
|
392
|
-
|
|
393
320
|
def resolve_lakehouse_name_and_id(
|
|
394
321
|
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
395
322
|
) -> Tuple[str, UUID]:
|
|
@@ -732,7 +659,7 @@ def save_as_delta_table(
|
|
|
732
659
|
"bool": ("pa", pa.bool_(), BooleanType()),
|
|
733
660
|
"boolean": ("pa", pa.bool_(), BooleanType()),
|
|
734
661
|
"date": ("pa", pa.date32(), DateType()),
|
|
735
|
-
"timestamp": ("pa", pa.timestamp("
|
|
662
|
+
"timestamp": ("pa", pa.timestamp("us"), TimestampType()),
|
|
736
663
|
}
|
|
737
664
|
return {k: v[1] if pure_python else v[2] for k, v in common_mapping.items()}
|
|
738
665
|
|
|
@@ -936,6 +863,87 @@ def resolve_workspace_name_and_id(
|
|
|
936
863
|
return workspace_name, workspace_id
|
|
937
864
|
|
|
938
865
|
|
|
866
|
+
def resolve_item_id(
|
|
867
|
+
item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
868
|
+
) -> UUID:
|
|
869
|
+
|
|
870
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
871
|
+
item_id = None
|
|
872
|
+
|
|
873
|
+
if _is_valid_uuid(item):
|
|
874
|
+
# Check (optional)
|
|
875
|
+
item_id = item
|
|
876
|
+
try:
|
|
877
|
+
_base_api(
|
|
878
|
+
request=f"/v1/workspaces/{workspace_id}/items/{item_id}",
|
|
879
|
+
client="fabric_sp",
|
|
880
|
+
)
|
|
881
|
+
except FabricHTTPException:
|
|
882
|
+
raise ValueError(
|
|
883
|
+
f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_name}' workspace."
|
|
884
|
+
)
|
|
885
|
+
else:
|
|
886
|
+
if type is None:
|
|
887
|
+
raise ValueError(
|
|
888
|
+
f"{icons.red_dot} The 'type' parameter is required if specifying an item name."
|
|
889
|
+
)
|
|
890
|
+
responses = _base_api(
|
|
891
|
+
request=f"/v1/workspaces/{workspace_id}/items?type={type}",
|
|
892
|
+
client="fabric_sp",
|
|
893
|
+
uses_pagination=True,
|
|
894
|
+
)
|
|
895
|
+
for r in responses:
|
|
896
|
+
for v in r.get("value", []):
|
|
897
|
+
display_name = v.get("displayName")
|
|
898
|
+
if display_name == item:
|
|
899
|
+
item_id = v.get("id")
|
|
900
|
+
break
|
|
901
|
+
|
|
902
|
+
if item_id is None:
|
|
903
|
+
raise ValueError(
|
|
904
|
+
f"{icons.red_dot} There's no item '{item}' of type '{type}' in the '{workspace_name}' workspace."
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
return item_id
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
def resolve_item_name_and_id(
|
|
911
|
+
item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
912
|
+
) -> Tuple[str, UUID]:
|
|
913
|
+
|
|
914
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
915
|
+
item_id = resolve_item_id(item=item, type=type, workspace=workspace_id)
|
|
916
|
+
item_name = (
|
|
917
|
+
_base_api(
|
|
918
|
+
request=f"/v1/workspaces/{workspace_id}/items/{item_id}", client="fabric_sp"
|
|
919
|
+
)
|
|
920
|
+
.json()
|
|
921
|
+
.get("displayName")
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
return item_name, item_id
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
def resolve_item_name(item_id: UUID, workspace: Optional[str | UUID] = None) -> str:
|
|
928
|
+
|
|
929
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
930
|
+
try:
|
|
931
|
+
item_name = (
|
|
932
|
+
_base_api(
|
|
933
|
+
request=f"/v1/workspaces/{workspace_id}/items/{item_id}",
|
|
934
|
+
client="fabric_sp",
|
|
935
|
+
)
|
|
936
|
+
.json()
|
|
937
|
+
.get("displayName")
|
|
938
|
+
)
|
|
939
|
+
except FabricHTTPException:
|
|
940
|
+
raise ValueError(
|
|
941
|
+
f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_id}' workspace."
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
return item_name
|
|
945
|
+
|
|
946
|
+
|
|
939
947
|
def _extract_json(dataframe: pd.DataFrame) -> dict:
|
|
940
948
|
|
|
941
949
|
payload = dataframe["payload"].iloc[0]
|
|
@@ -1540,38 +1548,18 @@ def _get_column_aggregate(
|
|
|
1540
1548
|
workspace: Optional[str | UUID] = None,
|
|
1541
1549
|
function: str = "max",
|
|
1542
1550
|
default_value: int = 0,
|
|
1551
|
+
schema_name: Optional[str] = None,
|
|
1543
1552
|
) -> int | Dict[str, int]:
|
|
1544
1553
|
|
|
1545
1554
|
workspace_id = resolve_workspace_id(workspace)
|
|
1546
1555
|
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
1547
|
-
path = create_abfss_path(lakehouse_id, workspace_id, table_name)
|
|
1556
|
+
path = create_abfss_path(lakehouse_id, workspace_id, table_name, schema_name)
|
|
1548
1557
|
df = _read_delta_table(path)
|
|
1549
1558
|
|
|
1550
|
-
|
|
1551
|
-
result = _get_aggregate(
|
|
1552
|
-
df=df,
|
|
1553
|
-
column_name=column_name,
|
|
1554
|
-
function=function,
|
|
1555
|
-
default_value=default_value,
|
|
1556
|
-
)
|
|
1557
|
-
elif isinstance(column_name, list):
|
|
1558
|
-
result = {}
|
|
1559
|
-
for col in column_name:
|
|
1560
|
-
result[col] = _get_aggregate(
|
|
1561
|
-
df=df,
|
|
1562
|
-
column_name=col,
|
|
1563
|
-
function=function,
|
|
1564
|
-
default_value=default_value,
|
|
1565
|
-
)
|
|
1566
|
-
else:
|
|
1567
|
-
raise TypeError("column_name must be a string or a list of strings.")
|
|
1568
|
-
|
|
1569
|
-
return result
|
|
1559
|
+
function = function.lower()
|
|
1570
1560
|
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
function = function.upper()
|
|
1561
|
+
if isinstance(column_name, str):
|
|
1562
|
+
column_name = [column_name]
|
|
1575
1563
|
|
|
1576
1564
|
if _pure_python_notebook():
|
|
1577
1565
|
import polars as pl
|
|
@@ -1581,36 +1569,82 @@ def _get_aggregate(df, column_name, function, default_value: int = 0) -> int:
|
|
|
1581
1569
|
|
|
1582
1570
|
df = pl.from_pandas(df)
|
|
1583
1571
|
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1572
|
+
def get_expr(col):
|
|
1573
|
+
col_dtype = df.schema[col]
|
|
1574
|
+
|
|
1575
|
+
if "approx" in function:
|
|
1576
|
+
return pl.col(col).unique().count().alias(col)
|
|
1577
|
+
elif "distinct" in function:
|
|
1578
|
+
if col_dtype == pl.Decimal:
|
|
1579
|
+
return pl.col(col).cast(pl.Float64).n_unique().alias(col)
|
|
1580
|
+
else:
|
|
1581
|
+
return pl.col(col).n_unique().alias(col)
|
|
1582
|
+
elif function == "sum":
|
|
1583
|
+
return pl.col(col).sum().alias(col)
|
|
1584
|
+
elif function == "min":
|
|
1585
|
+
return pl.col(col).min().alias(col)
|
|
1586
|
+
elif function == "max":
|
|
1587
|
+
return pl.col(col).max().alias(col)
|
|
1588
|
+
elif function == "count":
|
|
1589
|
+
return pl.col(col).count().alias(col)
|
|
1590
|
+
elif function in {"avg", "mean"}:
|
|
1591
|
+
return pl.col(col).mean().alias(col)
|
|
1588
1592
|
else:
|
|
1589
|
-
result = df[column_name].n_unique()
|
|
1590
|
-
elif "APPROX" in function:
|
|
1591
|
-
result = df[column_name].unique().shape[0]
|
|
1592
|
-
else:
|
|
1593
|
-
try:
|
|
1594
|
-
result = getattr(df[column_name], function.lower())()
|
|
1595
|
-
except AttributeError:
|
|
1596
1593
|
raise ValueError(f"Unsupported function: {function}")
|
|
1597
1594
|
|
|
1598
|
-
|
|
1595
|
+
exprs = [get_expr(col) for col in column_name]
|
|
1596
|
+
aggs = df.select(exprs).to_dict(as_series=False)
|
|
1597
|
+
|
|
1598
|
+
if len(column_name) == 1:
|
|
1599
|
+
result = aggs[column_name[0]][0] or default_value
|
|
1600
|
+
else:
|
|
1601
|
+
result = {col: aggs[col][0] for col in column_name}
|
|
1599
1602
|
else:
|
|
1600
|
-
from pyspark.sql.functions import
|
|
1601
|
-
|
|
1603
|
+
from pyspark.sql.functions import (
|
|
1604
|
+
count,
|
|
1605
|
+
sum,
|
|
1606
|
+
min,
|
|
1607
|
+
max,
|
|
1608
|
+
avg,
|
|
1609
|
+
approx_count_distinct,
|
|
1610
|
+
countDistinct,
|
|
1611
|
+
)
|
|
1602
1612
|
|
|
1603
|
-
|
|
1604
|
-
|
|
1613
|
+
result = None
|
|
1614
|
+
if "approx" in function:
|
|
1615
|
+
spark_func = approx_count_distinct
|
|
1616
|
+
elif "distinct" in function:
|
|
1617
|
+
spark_func = countDistinct
|
|
1618
|
+
elif function == "count":
|
|
1619
|
+
spark_func = count
|
|
1620
|
+
elif function == "sum":
|
|
1621
|
+
spark_func = sum
|
|
1622
|
+
elif function == "min":
|
|
1623
|
+
spark_func = min
|
|
1624
|
+
elif function == "max":
|
|
1625
|
+
spark_func = max
|
|
1626
|
+
elif function == "avg":
|
|
1627
|
+
spark_func = avg
|
|
1628
|
+
else:
|
|
1629
|
+
raise ValueError(f"Unsupported function: {function}")
|
|
1605
1630
|
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1631
|
+
agg_exprs = []
|
|
1632
|
+
for col in column_name:
|
|
1633
|
+
agg_exprs.append(spark_func(col).alias(col))
|
|
1634
|
+
|
|
1635
|
+
aggs = df.agg(*agg_exprs).collect()[0]
|
|
1636
|
+
if len(column_name) == 1:
|
|
1637
|
+
result = aggs[0] or default_value
|
|
1610
1638
|
else:
|
|
1611
|
-
result =
|
|
1639
|
+
result = {col: aggs[col] for col in column_name}
|
|
1612
1640
|
|
|
1613
|
-
|
|
1641
|
+
return result
|
|
1642
|
+
|
|
1643
|
+
|
|
1644
|
+
def _create_spark_dataframe(df: pd.DataFrame):
|
|
1645
|
+
|
|
1646
|
+
spark = _create_spark_session()
|
|
1647
|
+
return spark.createDataFrame(df)
|
|
1614
1648
|
|
|
1615
1649
|
|
|
1616
1650
|
def _make_list_unique(my_list):
|
|
@@ -1705,6 +1739,9 @@ def _process_and_display_chart(df, title, widget):
|
|
|
1705
1739
|
df["Start"] = df["Start"] - Offset
|
|
1706
1740
|
df["End"] = df["End"] - Offset
|
|
1707
1741
|
|
|
1742
|
+
unique_objects = df["Object Name"].nunique()
|
|
1743
|
+
height = min(max(400, unique_objects * 30), 1000)
|
|
1744
|
+
|
|
1708
1745
|
# Vega-Lite spec for Gantt chart
|
|
1709
1746
|
spec = (
|
|
1710
1747
|
"""{
|
|
@@ -1714,7 +1751,9 @@ def _process_and_display_chart(df, title, widget):
|
|
|
1714
1751
|
+ df.to_json(orient="records")
|
|
1715
1752
|
+ """ },
|
|
1716
1753
|
"width": 700,
|
|
1717
|
-
"height":
|
|
1754
|
+
"height": """
|
|
1755
|
+
+ str(height)
|
|
1756
|
+
+ """,
|
|
1718
1757
|
"mark": "bar",
|
|
1719
1758
|
"encoding": {
|
|
1720
1759
|
"y": {
|
|
@@ -2211,3 +2250,23 @@ def _xml_to_dict(element):
|
|
|
2211
2250
|
element.text.strip() if element.text and element.text.strip() else None
|
|
2212
2251
|
)
|
|
2213
2252
|
return data
|
|
2253
|
+
|
|
2254
|
+
|
|
2255
|
+
def file_exists(file_path: str) -> bool:
|
|
2256
|
+
"""
|
|
2257
|
+
Check if a file exists in the given path.
|
|
2258
|
+
|
|
2259
|
+
Parameters
|
|
2260
|
+
----------
|
|
2261
|
+
file_path : str
|
|
2262
|
+
The path to the file.
|
|
2263
|
+
|
|
2264
|
+
Returns
|
|
2265
|
+
-------
|
|
2266
|
+
bool
|
|
2267
|
+
True if the file exists, False otherwise.
|
|
2268
|
+
"""
|
|
2269
|
+
|
|
2270
|
+
import notebookutils
|
|
2271
|
+
|
|
2272
|
+
return len(notebookutils.fs.ls(file_path)) > 0
|
sempy_labs/_kusto.py
CHANGED
|
@@ -77,33 +77,35 @@ def query_kusto(
|
|
|
77
77
|
|
|
78
78
|
df = pd.DataFrame(rows, columns=[col["ColumnName"] for col in columns_info])
|
|
79
79
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
)
|
|
102
|
-
|
|
80
|
+
return df
|
|
81
|
+
# for col_info in columns_info:
|
|
82
|
+
# col_name = col_info["ColumnName"]
|
|
83
|
+
# data_type = col_info["DataType"]
|
|
84
|
+
|
|
85
|
+
# try:
|
|
86
|
+
# if data_type == "DateTime":
|
|
87
|
+
# df[col_name] = pd.to_datetime(df[col_name])
|
|
88
|
+
# elif data_type in ["Int64", "Int32", "Long"]:
|
|
89
|
+
# df[col_name] = (
|
|
90
|
+
# pd.to_numeric(df[col_name], errors="coerce")
|
|
91
|
+
# .fillna(0)
|
|
92
|
+
# .astype("int64")
|
|
93
|
+
# )
|
|
94
|
+
# elif data_type == "Real" or data_type == "Double":
|
|
95
|
+
# df[col_name] = pd.to_numeric(df[col_name], errors="coerce")
|
|
96
|
+
# else:
|
|
97
|
+
# # Convert any other type to string, change as needed
|
|
98
|
+
# df[col_name] = df[col_name].astype(str)
|
|
99
|
+
# except Exception as e:
|
|
100
|
+
# print(
|
|
101
|
+
# f"{icons.yellow_dot} Could not convert column {col_name} to {data_type}, defaulting to string: {str(e)}"
|
|
102
|
+
# )
|
|
103
|
+
# df[col_name] = df[col_name].astype(str)
|
|
103
104
|
|
|
104
105
|
return df
|
|
105
106
|
|
|
106
107
|
|
|
108
|
+
@log
|
|
107
109
|
def query_workspace_monitoring(
|
|
108
110
|
query: str, workspace: Optional[str | UUID] = None, language: str = "kql"
|
|
109
111
|
) -> pd.DataFrame:
|
sempy_labs/_list_functions.py
CHANGED
|
@@ -41,54 +41,32 @@ def get_object_level_security(
|
|
|
41
41
|
|
|
42
42
|
from sempy_labs.tom import connect_semantic_model
|
|
43
43
|
|
|
44
|
-
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
45
|
-
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
46
|
-
|
|
47
44
|
columns = {
|
|
48
45
|
"Role Name": "string",
|
|
49
46
|
"Object Type": "string",
|
|
50
47
|
"Table Name": "string",
|
|
51
48
|
"Object Name": "string",
|
|
49
|
+
"Metadata Permission": "string",
|
|
52
50
|
}
|
|
53
51
|
df = _create_dataframe(columns=columns)
|
|
54
52
|
|
|
55
53
|
with connect_semantic_model(
|
|
56
|
-
dataset=
|
|
54
|
+
dataset=dataset, readonly=True, workspace=workspace
|
|
57
55
|
) as tom:
|
|
58
56
|
|
|
59
57
|
for r in tom.model.Roles:
|
|
60
58
|
for tp in r.TablePermissions:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
"Table Name": tp.Name,
|
|
73
|
-
"Object Name": tp.Name,
|
|
74
|
-
}
|
|
75
|
-
df = pd.concat(
|
|
76
|
-
[df, pd.DataFrame(new_data, index=[0])], ignore_index=True
|
|
77
|
-
)
|
|
78
|
-
else:
|
|
79
|
-
objectType = "Column"
|
|
80
|
-
for cp in tp.ColumnPermissions:
|
|
81
|
-
new_data = {
|
|
82
|
-
"Role Name": r.Name,
|
|
83
|
-
"Object Type": objectType,
|
|
84
|
-
"Table Name": tp.Name,
|
|
85
|
-
"Object Name": cp.Name,
|
|
86
|
-
}
|
|
87
|
-
df = pd.concat(
|
|
88
|
-
[df, pd.DataFrame(new_data, index=[0])],
|
|
89
|
-
ignore_index=True,
|
|
90
|
-
)
|
|
91
|
-
|
|
59
|
+
for cp in tp.ColumnPermissions:
|
|
60
|
+
new_data = {
|
|
61
|
+
"Role Name": r.Name,
|
|
62
|
+
"Object Type": "Column",
|
|
63
|
+
"Table Name": tp.Name,
|
|
64
|
+
"Object Name": cp.Name,
|
|
65
|
+
"Metadata Permission": cp.Permission,
|
|
66
|
+
}
|
|
67
|
+
df = pd.concat(
|
|
68
|
+
[df, pd.DataFrame(new_data, index=[0])], ignore_index=True
|
|
69
|
+
)
|
|
92
70
|
return df
|
|
93
71
|
|
|
94
72
|
|
sempy_labs/_model_bpa_rules.py
CHANGED
|
@@ -674,8 +674,18 @@ def model_bpa_rules(
|
|
|
674
674
|
"Provide format string for 'Date' columns",
|
|
675
675
|
lambda obj, tom: (re.search(r"date", obj.Name, flags=re.IGNORECASE))
|
|
676
676
|
and (obj.DataType == TOM.DataType.DateTime)
|
|
677
|
-
and (
|
|
678
|
-
|
|
677
|
+
and (
|
|
678
|
+
obj.FormatString.lower()
|
|
679
|
+
not in [
|
|
680
|
+
"mm/dd/yyyy",
|
|
681
|
+
"mm-dd-yyyy",
|
|
682
|
+
"dd/mm/yyyy",
|
|
683
|
+
"dd-mm-yyyy",
|
|
684
|
+
"yyyy-mm-dd",
|
|
685
|
+
"yyyy/mm/dd",
|
|
686
|
+
]
|
|
687
|
+
),
|
|
688
|
+
'Columns of type "DateTime" that have "Date" in their names should be formatted.',
|
|
679
689
|
),
|
|
680
690
|
(
|
|
681
691
|
"Formatting",
|
|
@@ -789,7 +799,7 @@ def model_bpa_rules(
|
|
|
789
799
|
"Formatting",
|
|
790
800
|
"Column",
|
|
791
801
|
"Warning",
|
|
792
|
-
|
|
802
|
+
"Provide format string for 'Month' columns",
|
|
793
803
|
lambda obj, tom: re.search(r"month", obj.Name, flags=re.IGNORECASE)
|
|
794
804
|
and obj.DataType == TOM.DataType.DateTime
|
|
795
805
|
and obj.FormatString != "MMMM yyyy",
|