semantic-link-labs 0.9.8__py3-none-any.whl → 0.9.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.9.8.dist-info → semantic_link_labs-0.9.10.dist-info}/METADATA +5 -3
- {semantic_link_labs-0.9.8.dist-info → semantic_link_labs-0.9.10.dist-info}/RECORD +29 -27
- {semantic_link_labs-0.9.8.dist-info → semantic_link_labs-0.9.10.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +6 -0
- sempy_labs/_clear_cache.py +12 -0
- sempy_labs/_dax.py +8 -2
- sempy_labs/_delta_analyzer.py +8 -18
- sempy_labs/_generate_semantic_model.py +6 -7
- sempy_labs/_helper_functions.py +216 -69
- sempy_labs/_kql_databases.py +18 -0
- sempy_labs/_kusto.py +135 -0
- sempy_labs/_list_functions.py +5 -1
- sempy_labs/_vertipaq.py +6 -6
- sempy_labs/_warehouses.py +3 -3
- sempy_labs/admin/__init__.py +6 -0
- sempy_labs/admin/_artifacts.py +3 -3
- sempy_labs/admin/_capacities.py +161 -1
- sempy_labs/admin/_dataflows.py +45 -0
- sempy_labs/admin/_items.py +16 -11
- sempy_labs/admin/_tenant.py +5 -5
- sempy_labs/directlake/_generate_shared_expression.py +25 -26
- sempy_labs/lakehouse/_get_lakehouse_columns.py +41 -18
- sempy_labs/lakehouse/_get_lakehouse_tables.py +66 -39
- sempy_labs/lakehouse/_lakehouse.py +44 -35
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +7 -12
- sempy_labs/migration/_refresh_calc_tables.py +7 -6
- sempy_labs/tom/_model.py +21 -14
- {semantic_link_labs-0.9.8.dist-info → semantic_link_labs-0.9.10.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.9.8.dist-info → semantic_link_labs-0.9.10.dist-info}/top_level.txt +0 -0
sempy_labs/_helper_functions.py
CHANGED
|
@@ -8,7 +8,7 @@ from sempy.fabric.exceptions import FabricHTTPException, WorkspaceNotFoundExcept
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
from functools import wraps
|
|
10
10
|
import datetime
|
|
11
|
-
from typing import Optional, Tuple, List
|
|
11
|
+
from typing import Optional, Tuple, List, Dict
|
|
12
12
|
from uuid import UUID
|
|
13
13
|
import sempy_labs._icons as icons
|
|
14
14
|
from azure.core.credentials import TokenCredential, AccessToken
|
|
@@ -65,9 +65,11 @@ def create_abfss_path(
|
|
|
65
65
|
path = f"abfss://{lakehouse_workspace_id}@{fp}/{lakehouse_id}"
|
|
66
66
|
|
|
67
67
|
if delta_table_name is not None:
|
|
68
|
+
path += "/Tables"
|
|
68
69
|
if schema is not None:
|
|
69
|
-
path += f"/{schema}"
|
|
70
|
-
|
|
70
|
+
path += f"/{schema}/{delta_table_name}"
|
|
71
|
+
else:
|
|
72
|
+
path += f"/{delta_table_name}"
|
|
71
73
|
|
|
72
74
|
return path
|
|
73
75
|
|
|
@@ -661,11 +663,13 @@ def save_as_delta_table(
|
|
|
661
663
|
workspace: Optional[str | UUID] = None,
|
|
662
664
|
):
|
|
663
665
|
"""
|
|
664
|
-
Saves a pandas dataframe as a delta table in a Fabric lakehouse.
|
|
666
|
+
Saves a pandas or spark dataframe as a delta table in a Fabric lakehouse.
|
|
667
|
+
|
|
668
|
+
This function may be executed in either a PySpark or pure Python notebook. If executing in a pure Python notebook, the dataframe must be a pandas dataframe.
|
|
665
669
|
|
|
666
670
|
Parameters
|
|
667
671
|
----------
|
|
668
|
-
dataframe : pandas.DataFrame
|
|
672
|
+
dataframe : pandas.DataFrame | spark.Dataframe
|
|
669
673
|
The dataframe to be saved as a delta table.
|
|
670
674
|
delta_table_name : str
|
|
671
675
|
The name of the delta table.
|
|
@@ -684,19 +688,6 @@ def save_as_delta_table(
|
|
|
684
688
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
685
689
|
"""
|
|
686
690
|
|
|
687
|
-
from pyspark.sql.types import (
|
|
688
|
-
StringType,
|
|
689
|
-
IntegerType,
|
|
690
|
-
FloatType,
|
|
691
|
-
DateType,
|
|
692
|
-
StructType,
|
|
693
|
-
StructField,
|
|
694
|
-
BooleanType,
|
|
695
|
-
LongType,
|
|
696
|
-
DoubleType,
|
|
697
|
-
TimestampType,
|
|
698
|
-
)
|
|
699
|
-
|
|
700
691
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
701
692
|
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
702
693
|
lakehouse=lakehouse, workspace=workspace_id
|
|
@@ -715,52 +706,101 @@ def save_as_delta_table(
|
|
|
715
706
|
f"{icons.red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names."
|
|
716
707
|
)
|
|
717
708
|
|
|
718
|
-
|
|
709
|
+
import pyarrow as pa
|
|
710
|
+
from pyspark.sql.types import (
|
|
711
|
+
StringType,
|
|
712
|
+
IntegerType,
|
|
713
|
+
FloatType,
|
|
714
|
+
DateType,
|
|
715
|
+
StructType,
|
|
716
|
+
StructField,
|
|
717
|
+
BooleanType,
|
|
718
|
+
LongType,
|
|
719
|
+
DoubleType,
|
|
720
|
+
TimestampType,
|
|
721
|
+
)
|
|
719
722
|
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
723
|
+
def get_type_mapping(pure_python):
|
|
724
|
+
common_mapping = {
|
|
725
|
+
"string": ("pa", pa.string(), StringType()),
|
|
726
|
+
"str": ("pa", pa.string(), StringType()),
|
|
727
|
+
"integer": ("pa", pa.int32(), IntegerType()),
|
|
728
|
+
"int": ("pa", pa.int32(), IntegerType()),
|
|
729
|
+
"float": ("pa", pa.float32(), FloatType()),
|
|
730
|
+
"double": ("pa", pa.float64(), DoubleType()),
|
|
731
|
+
"long": ("pa", pa.int64(), LongType()),
|
|
732
|
+
"bool": ("pa", pa.bool_(), BooleanType()),
|
|
733
|
+
"boolean": ("pa", pa.bool_(), BooleanType()),
|
|
734
|
+
"date": ("pa", pa.date32(), DateType()),
|
|
735
|
+
"timestamp": ("pa", pa.timestamp("ms"), TimestampType()),
|
|
736
|
+
}
|
|
737
|
+
return {k: v[1] if pure_python else v[2] for k, v in common_mapping.items()}
|
|
733
738
|
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
739
|
+
def build_schema(schema_dict, type_mapping, use_arrow=True):
|
|
740
|
+
if use_arrow:
|
|
741
|
+
fields = [
|
|
742
|
+
pa.field(name, type_mapping.get(dtype.lower()))
|
|
743
|
+
for name, dtype in schema_dict.items()
|
|
744
|
+
]
|
|
745
|
+
return pa.schema(fields)
|
|
738
746
|
else:
|
|
739
|
-
|
|
747
|
+
return StructType(
|
|
740
748
|
[
|
|
741
|
-
StructField(
|
|
742
|
-
for
|
|
749
|
+
StructField(name, type_mapping.get(dtype.lower()), True)
|
|
750
|
+
for name, dtype in schema_dict.items()
|
|
743
751
|
]
|
|
744
752
|
)
|
|
745
|
-
|
|
753
|
+
|
|
754
|
+
# Main logic
|
|
755
|
+
schema_map = None
|
|
756
|
+
if schema is not None:
|
|
757
|
+
use_arrow = _pure_python_notebook()
|
|
758
|
+
type_mapping = get_type_mapping(use_arrow)
|
|
759
|
+
schema_map = build_schema(schema, type_mapping, use_arrow)
|
|
760
|
+
|
|
761
|
+
if isinstance(dataframe, pd.DataFrame):
|
|
762
|
+
dataframe.columns = [col.replace(" ", "_") for col in dataframe.columns]
|
|
763
|
+
if _pure_python_notebook():
|
|
764
|
+
spark_df = dataframe
|
|
765
|
+
else:
|
|
766
|
+
spark = _create_spark_session()
|
|
767
|
+
if schema is None:
|
|
768
|
+
spark_df = spark.createDataFrame(dataframe)
|
|
769
|
+
else:
|
|
770
|
+
spark_df = spark.createDataFrame(dataframe, schema_map)
|
|
746
771
|
else:
|
|
747
772
|
for col_name in dataframe.columns:
|
|
748
773
|
new_name = col_name.replace(" ", "_")
|
|
749
774
|
dataframe = dataframe.withColumnRenamed(col_name, new_name)
|
|
750
775
|
spark_df = dataframe
|
|
751
776
|
|
|
752
|
-
|
|
777
|
+
file_path = create_abfss_path(
|
|
753
778
|
lakehouse_id=lakehouse_id,
|
|
754
779
|
lakehouse_workspace_id=workspace_id,
|
|
755
780
|
delta_table_name=delta_table_name,
|
|
756
781
|
)
|
|
757
782
|
|
|
758
|
-
if
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
783
|
+
if _pure_python_notebook():
|
|
784
|
+
from deltalake import write_deltalake
|
|
785
|
+
|
|
786
|
+
write_args = {
|
|
787
|
+
"table_or_uri": file_path,
|
|
788
|
+
"data": spark_df,
|
|
789
|
+
"mode": write_mode,
|
|
790
|
+
"schema": schema_map,
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
if merge_schema:
|
|
794
|
+
write_args["schema_mode"] = "merge"
|
|
795
|
+
|
|
796
|
+
write_deltalake(**write_args)
|
|
762
797
|
else:
|
|
763
|
-
spark_df.write.mode(write_mode).format("delta")
|
|
798
|
+
writer = spark_df.write.mode(write_mode).format("delta")
|
|
799
|
+
if merge_schema:
|
|
800
|
+
writer = writer.option("mergeSchema", "true")
|
|
801
|
+
|
|
802
|
+
writer.save(file_path)
|
|
803
|
+
|
|
764
804
|
print(
|
|
765
805
|
f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace."
|
|
766
806
|
)
|
|
@@ -1495,32 +1535,82 @@ def generate_guid():
|
|
|
1495
1535
|
|
|
1496
1536
|
def _get_column_aggregate(
|
|
1497
1537
|
table_name: str,
|
|
1498
|
-
column_name: str = "RunId",
|
|
1538
|
+
column_name: str | List[str] = "RunId",
|
|
1499
1539
|
lakehouse: Optional[str | UUID] = None,
|
|
1500
1540
|
workspace: Optional[str | UUID] = None,
|
|
1501
1541
|
function: str = "max",
|
|
1502
1542
|
default_value: int = 0,
|
|
1503
|
-
) -> int:
|
|
1543
|
+
) -> int | Dict[str, int]:
|
|
1504
1544
|
|
|
1505
|
-
|
|
1506
|
-
|
|
1545
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
1546
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
1547
|
+
path = create_abfss_path(lakehouse_id, workspace_id, table_name)
|
|
1548
|
+
df = _read_delta_table(path)
|
|
1549
|
+
|
|
1550
|
+
if isinstance(column_name, str):
|
|
1551
|
+
result = _get_aggregate(
|
|
1552
|
+
df=df,
|
|
1553
|
+
column_name=column_name,
|
|
1554
|
+
function=function,
|
|
1555
|
+
default_value=default_value,
|
|
1556
|
+
)
|
|
1557
|
+
elif isinstance(column_name, list):
|
|
1558
|
+
result = {}
|
|
1559
|
+
for col in column_name:
|
|
1560
|
+
result[col] = _get_aggregate(
|
|
1561
|
+
df=df,
|
|
1562
|
+
column_name=col,
|
|
1563
|
+
function=function,
|
|
1564
|
+
default_value=default_value,
|
|
1565
|
+
)
|
|
1566
|
+
else:
|
|
1567
|
+
raise TypeError("column_name must be a string or a list of strings.")
|
|
1568
|
+
|
|
1569
|
+
return result
|
|
1570
|
+
|
|
1571
|
+
|
|
1572
|
+
def _get_aggregate(df, column_name, function, default_value: int = 0) -> int:
|
|
1507
1573
|
|
|
1508
1574
|
function = function.upper()
|
|
1509
|
-
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
1510
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
1511
|
-
path = create_abfss_path(lakehouse_id, workspace_id, table_name)
|
|
1512
1575
|
|
|
1513
|
-
|
|
1514
|
-
|
|
1576
|
+
if _pure_python_notebook():
|
|
1577
|
+
import polars as pl
|
|
1515
1578
|
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1579
|
+
if not isinstance(df, pd.DataFrame):
|
|
1580
|
+
df.to_pandas()
|
|
1581
|
+
|
|
1582
|
+
df = pl.from_pandas(df)
|
|
1583
|
+
|
|
1584
|
+
# Perform aggregation
|
|
1585
|
+
if "DISTINCT" in function:
|
|
1586
|
+
if isinstance(df[column_name].dtype, pl.Decimal):
|
|
1587
|
+
result = df[column_name].cast(pl.Float64).n_unique()
|
|
1588
|
+
else:
|
|
1589
|
+
result = df[column_name].n_unique()
|
|
1590
|
+
elif "APPROX" in function:
|
|
1591
|
+
result = df[column_name].unique().shape[0]
|
|
1592
|
+
else:
|
|
1593
|
+
try:
|
|
1594
|
+
result = getattr(df[column_name], function.lower())()
|
|
1595
|
+
except AttributeError:
|
|
1596
|
+
raise ValueError(f"Unsupported function: {function}")
|
|
1597
|
+
|
|
1598
|
+
return result if result is not None else default_value
|
|
1520
1599
|
else:
|
|
1521
|
-
|
|
1600
|
+
from pyspark.sql.functions import approx_count_distinct
|
|
1601
|
+
from pyspark.sql import functions as F
|
|
1602
|
+
|
|
1603
|
+
if isinstance(df, pd.DataFrame):
|
|
1604
|
+
df = _create_spark_dataframe(df)
|
|
1605
|
+
|
|
1606
|
+
if "DISTINCT" in function:
|
|
1607
|
+
result = df.select(F.count_distinct(F.col(column_name)))
|
|
1608
|
+
elif "APPROX" in function:
|
|
1609
|
+
result = df.select(approx_count_distinct(column_name))
|
|
1610
|
+
else:
|
|
1611
|
+
result = df.selectExpr(f"{function}({column_name})")
|
|
1522
1612
|
|
|
1523
|
-
|
|
1613
|
+
return result.collect()[0][0] or default_value
|
|
1524
1614
|
|
|
1525
1615
|
|
|
1526
1616
|
def _make_list_unique(my_list):
|
|
@@ -1685,6 +1775,7 @@ def _convert_data_type(input_data_type: str) -> str:
|
|
|
1685
1775
|
"double": "Double",
|
|
1686
1776
|
"float": "Double",
|
|
1687
1777
|
"binary": "Boolean",
|
|
1778
|
+
"long": "Int64",
|
|
1688
1779
|
}
|
|
1689
1780
|
|
|
1690
1781
|
if "decimal" in input_data_type:
|
|
@@ -1739,19 +1830,23 @@ def _base_api(
|
|
|
1739
1830
|
lro_return_json: bool = False,
|
|
1740
1831
|
lro_return_status_code: bool = False,
|
|
1741
1832
|
):
|
|
1742
|
-
|
|
1833
|
+
import notebookutils
|
|
1743
1834
|
from sempy_labs._authentication import _get_headers
|
|
1744
1835
|
|
|
1745
1836
|
if (lro_return_json or lro_return_status_code) and status_codes is None:
|
|
1746
1837
|
status_codes = [200, 202]
|
|
1747
1838
|
|
|
1839
|
+
def get_token(audience="pbi"):
|
|
1840
|
+
return notebookutils.credentials.getToken(audience)
|
|
1841
|
+
|
|
1748
1842
|
if isinstance(status_codes, int):
|
|
1749
1843
|
status_codes = [status_codes]
|
|
1750
1844
|
|
|
1751
1845
|
if client == "fabric":
|
|
1752
|
-
c = fabric.FabricRestClient()
|
|
1846
|
+
c = fabric.FabricRestClient(token_provider=get_token)
|
|
1753
1847
|
elif client == "fabric_sp":
|
|
1754
|
-
|
|
1848
|
+
token = auth.token_provider.get() or get_token
|
|
1849
|
+
c = fabric.FabricRestClient(token_provider=token)
|
|
1755
1850
|
elif client in ["azure", "graph"]:
|
|
1756
1851
|
pass
|
|
1757
1852
|
else:
|
|
@@ -1836,6 +1931,18 @@ def _update_dataframe_datatypes(dataframe: pd.DataFrame, column_map: dict):
|
|
|
1836
1931
|
dataframe[column] = dataframe[column].fillna(0).astype(int)
|
|
1837
1932
|
elif data_type in ["str", "string"]:
|
|
1838
1933
|
dataframe[column] = dataframe[column].astype(str)
|
|
1934
|
+
# Avoid having empty lists or lists with a value of None.
|
|
1935
|
+
elif data_type in ["list"]:
|
|
1936
|
+
dataframe[column] = dataframe[column].apply(
|
|
1937
|
+
lambda x: (
|
|
1938
|
+
None
|
|
1939
|
+
if (type(x) == list and len(x) == 1 and x[0] == None)
|
|
1940
|
+
or (type(x) == list and len(x) == 0)
|
|
1941
|
+
else x
|
|
1942
|
+
)
|
|
1943
|
+
)
|
|
1944
|
+
elif data_type in ["dict"]:
|
|
1945
|
+
dataframe[column] = dataframe[column]
|
|
1839
1946
|
else:
|
|
1840
1947
|
raise NotImplementedError
|
|
1841
1948
|
|
|
@@ -1872,18 +1979,58 @@ def _create_spark_session():
|
|
|
1872
1979
|
return SparkSession.builder.getOrCreate()
|
|
1873
1980
|
|
|
1874
1981
|
|
|
1875
|
-
def
|
|
1982
|
+
def _get_delta_table(path: str) -> str:
|
|
1983
|
+
|
|
1984
|
+
from delta import DeltaTable
|
|
1876
1985
|
|
|
1877
1986
|
spark = _create_spark_session()
|
|
1878
1987
|
|
|
1879
|
-
return
|
|
1988
|
+
return DeltaTable.forPath(spark, path)
|
|
1880
1989
|
|
|
1881
1990
|
|
|
1882
|
-
def
|
|
1991
|
+
def _read_delta_table(path: str, to_pandas: bool = True, to_df: bool = False):
|
|
1883
1992
|
|
|
1884
|
-
|
|
1993
|
+
if _pure_python_notebook():
|
|
1994
|
+
from deltalake import DeltaTable
|
|
1995
|
+
|
|
1996
|
+
df = DeltaTable(table_uri=path)
|
|
1997
|
+
if to_pandas:
|
|
1998
|
+
df = df.to_pandas()
|
|
1999
|
+
else:
|
|
2000
|
+
spark = _create_spark_session()
|
|
2001
|
+
df = spark.read.format("delta").load(path)
|
|
2002
|
+
if to_df:
|
|
2003
|
+
df = df.toDF()
|
|
2004
|
+
|
|
2005
|
+
return df
|
|
2006
|
+
|
|
2007
|
+
|
|
2008
|
+
def _read_delta_table_history(path) -> pd.DataFrame:
|
|
1885
2009
|
|
|
1886
|
-
|
|
2010
|
+
if _pure_python_notebook():
|
|
2011
|
+
from deltalake import DeltaTable
|
|
2012
|
+
|
|
2013
|
+
df = pd.DataFrame(DeltaTable(table_uri=path).history())
|
|
2014
|
+
else:
|
|
2015
|
+
from delta import DeltaTable
|
|
2016
|
+
|
|
2017
|
+
spark = _create_spark_session()
|
|
2018
|
+
delta_table = DeltaTable.forPath(spark, path)
|
|
2019
|
+
df = delta_table.history().toPandas()
|
|
2020
|
+
|
|
2021
|
+
return df
|
|
2022
|
+
|
|
2023
|
+
|
|
2024
|
+
def _delta_table_row_count(path: str) -> int:
|
|
2025
|
+
|
|
2026
|
+
if _pure_python_notebook():
|
|
2027
|
+
from deltalake import DeltaTable
|
|
2028
|
+
|
|
2029
|
+
dt = DeltaTable(path)
|
|
2030
|
+
arrow_table = dt.to_pyarrow_table()
|
|
2031
|
+
return arrow_table.num_rows
|
|
2032
|
+
else:
|
|
2033
|
+
return _read_delta_table(path).count()
|
|
1887
2034
|
|
|
1888
2035
|
|
|
1889
2036
|
def _run_spark_sql_query(query):
|
sempy_labs/_kql_databases.py
CHANGED
|
@@ -6,6 +6,8 @@ from sempy_labs._helper_functions import (
|
|
|
6
6
|
_create_dataframe,
|
|
7
7
|
delete_item,
|
|
8
8
|
create_item,
|
|
9
|
+
resolve_item_id,
|
|
10
|
+
resolve_workspace_id,
|
|
9
11
|
)
|
|
10
12
|
from uuid import UUID
|
|
11
13
|
import sempy_labs._icons as icons
|
|
@@ -121,3 +123,19 @@ def delete_kql_database(
|
|
|
121
123
|
)
|
|
122
124
|
|
|
123
125
|
delete_item(item=kql_database, type="KQLDatabase", workspace=workspace)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _resolve_cluster_uri(
|
|
129
|
+
kql_database: str | UUID, workspace: Optional[str | UUID] = None
|
|
130
|
+
) -> str:
|
|
131
|
+
|
|
132
|
+
workspace_id = resolve_workspace_id(workspace=workspace)
|
|
133
|
+
item_id = resolve_item_id(
|
|
134
|
+
item=kql_database, type="KQLDatabase", workspace=workspace
|
|
135
|
+
)
|
|
136
|
+
response = _base_api(
|
|
137
|
+
request=f"/v1/workspaces/{workspace_id}/kqlDatabases/{item_id}",
|
|
138
|
+
client="fabric_sp",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return response.json().get("properties", {}).get("queryServiceUri")
|
sempy_labs/_kusto.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
4
|
+
from sempy._utils._log import log
|
|
5
|
+
import sempy_labs._icons as icons
|
|
6
|
+
from typing import Optional
|
|
7
|
+
from uuid import UUID
|
|
8
|
+
from sempy_labs._kql_databases import _resolve_cluster_uri
|
|
9
|
+
from sempy_labs._helper_functions import resolve_item_id
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@log
|
|
13
|
+
def query_kusto(
|
|
14
|
+
query: str,
|
|
15
|
+
kql_database: str | UUID,
|
|
16
|
+
workspace: Optional[str | UUID] = None,
|
|
17
|
+
language: str = "kql",
|
|
18
|
+
) -> pd.DataFrame:
|
|
19
|
+
"""
|
|
20
|
+
Runs a KQL query against a KQL database.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
query : str
|
|
25
|
+
The query (supports KQL or SQL - make sure to specify the language parameter accordingly).
|
|
26
|
+
kql_database : str | uuid.UUID
|
|
27
|
+
The KQL database name or ID.
|
|
28
|
+
workspace : str | uuid.UUID, default=None
|
|
29
|
+
The Fabric workspace name or ID.
|
|
30
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
31
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
32
|
+
language : str, default="kql"
|
|
33
|
+
The language of the query. Currently "kql' and "sql" are supported.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
pandas.DataFrame
|
|
38
|
+
A pandas dataframe showing the result of the KQL query.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
import notebookutils
|
|
42
|
+
|
|
43
|
+
language = language.lower()
|
|
44
|
+
if language not in ["kql", "sql"]:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"{icons._red_dot} Invalid language '{language}'. Only 'kql' and 'sql' are supported."
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
cluster_uri = _resolve_cluster_uri(kql_database=kql_database, workspace=workspace)
|
|
50
|
+
token = notebookutils.credentials.getToken(cluster_uri)
|
|
51
|
+
|
|
52
|
+
headers = {
|
|
53
|
+
"Authorization": f"Bearer {token}",
|
|
54
|
+
"Content-Type": "application/json",
|
|
55
|
+
"Accept": "application/json",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
kql_database_id = resolve_item_id(
|
|
59
|
+
item=kql_database, type="KQLDatabase", workspace=workspace
|
|
60
|
+
)
|
|
61
|
+
payload = {"db": kql_database_id, "csl": query}
|
|
62
|
+
if language == "sql":
|
|
63
|
+
payload["properties"] = {"Options": {"query_language": "sql"}}
|
|
64
|
+
|
|
65
|
+
response = requests.post(
|
|
66
|
+
f"{cluster_uri}/v1/rest/query",
|
|
67
|
+
headers=headers,
|
|
68
|
+
json=payload,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if response.status_code != 200:
|
|
72
|
+
raise FabricHTTPException(response)
|
|
73
|
+
|
|
74
|
+
results = response.json()
|
|
75
|
+
columns_info = results["Tables"][0]["Columns"]
|
|
76
|
+
rows = results["Tables"][0]["Rows"]
|
|
77
|
+
|
|
78
|
+
df = pd.DataFrame(rows, columns=[col["ColumnName"] for col in columns_info])
|
|
79
|
+
|
|
80
|
+
for col_info in columns_info:
|
|
81
|
+
col_name = col_info["ColumnName"]
|
|
82
|
+
data_type = col_info["DataType"]
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
if data_type == "DateTime":
|
|
86
|
+
df[col_name] = pd.to_datetime(df[col_name])
|
|
87
|
+
elif data_type in ["Int64", "Int32", "Long"]:
|
|
88
|
+
df[col_name] = (
|
|
89
|
+
pd.to_numeric(df[col_name], errors="coerce")
|
|
90
|
+
.fillna(0)
|
|
91
|
+
.astype("int64")
|
|
92
|
+
)
|
|
93
|
+
elif data_type == "Real" or data_type == "Double":
|
|
94
|
+
df[col_name] = pd.to_numeric(df[col_name], errors="coerce")
|
|
95
|
+
else:
|
|
96
|
+
# Convert any other type to string, change as needed
|
|
97
|
+
df[col_name] = df[col_name].astype(str)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
print(
|
|
100
|
+
f"{icons.yellow_dot} Could not convert column {col_name} to {data_type}, defaulting to string: {str(e)}"
|
|
101
|
+
)
|
|
102
|
+
df[col_name] = df[col_name].astype(str)
|
|
103
|
+
|
|
104
|
+
return df
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def query_workspace_monitoring(
|
|
108
|
+
query: str, workspace: Optional[str | UUID] = None, language: str = "kql"
|
|
109
|
+
) -> pd.DataFrame:
|
|
110
|
+
"""
|
|
111
|
+
Runs a query against the Fabric workspace monitoring database. Workspace monitoring must be enabled on the workspace to use this function.
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
query : str
|
|
116
|
+
The query (supports KQL or SQL - make sure to specify the language parameter accordingly).
|
|
117
|
+
workspace : str | uuid.UUID, default=None
|
|
118
|
+
The Fabric workspace name or ID.
|
|
119
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
120
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
121
|
+
language : str, default="kql"
|
|
122
|
+
The language of the query. Currently "kql' and "sql" are supported.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
pandas.DataFrame
|
|
127
|
+
A pandas dataframe showing the result of the query.
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
return query_kusto(
|
|
131
|
+
query=query,
|
|
132
|
+
kql_database="Monitoring KQL database",
|
|
133
|
+
workspace=workspace,
|
|
134
|
+
language=language,
|
|
135
|
+
)
|
sempy_labs/_list_functions.py
CHANGED
|
@@ -240,7 +240,11 @@ def list_tables(
|
|
|
240
240
|
"Columns": sum(
|
|
241
241
|
1 for c in t.Columns if str(c.Type) != "RowNumber"
|
|
242
242
|
),
|
|
243
|
-
"% DB":
|
|
243
|
+
"% DB": (
|
|
244
|
+
round((total_size / model_size) * 100, 2)
|
|
245
|
+
if model_size not in (0, None, float("nan"))
|
|
246
|
+
else 0.0
|
|
247
|
+
),
|
|
244
248
|
}
|
|
245
249
|
)
|
|
246
250
|
|
sempy_labs/_vertipaq.py
CHANGED
|
@@ -8,7 +8,6 @@ import datetime
|
|
|
8
8
|
import warnings
|
|
9
9
|
from sempy_labs._helper_functions import (
|
|
10
10
|
format_dax_object_name,
|
|
11
|
-
resolve_lakehouse_name,
|
|
12
11
|
save_as_delta_table,
|
|
13
12
|
resolve_workspace_capacity,
|
|
14
13
|
_get_column_aggregate,
|
|
@@ -20,7 +19,6 @@ from sempy_labs._helper_functions import (
|
|
|
20
19
|
)
|
|
21
20
|
from sempy_labs._list_functions import list_relationships, list_tables
|
|
22
21
|
from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
|
|
23
|
-
from sempy_labs.directlake import get_direct_lake_source
|
|
24
22
|
from typing import Optional
|
|
25
23
|
from sempy._utils._log import log
|
|
26
24
|
import sempy_labs._icons as icons
|
|
@@ -176,10 +174,12 @@ def vertipaq_analyzer(
|
|
|
176
174
|
)
|
|
177
175
|
|
|
178
176
|
artifact_type = None
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
177
|
+
lakehouse_workspace_id = None
|
|
178
|
+
lakehouse_name = None
|
|
179
|
+
# if is_direct_lake:
|
|
180
|
+
# artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
|
|
181
|
+
# get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
|
|
182
|
+
# )
|
|
183
183
|
|
|
184
184
|
dfR["Missing Rows"] = 0
|
|
185
185
|
dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
|
sempy_labs/_warehouses.py
CHANGED
|
@@ -53,11 +53,11 @@ def create_warehouse(
|
|
|
53
53
|
"defaultCollation"
|
|
54
54
|
] = "Latin1_General_100_CI_AS_KS_WS_SC_UTF8"
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
result = _base_api(
|
|
57
57
|
request=f"/v1/workspaces/{workspace_id}/warehouses",
|
|
58
58
|
payload=payload,
|
|
59
59
|
method="post",
|
|
60
|
-
|
|
60
|
+
lro_return_json=True,
|
|
61
61
|
status_codes=[201, 202],
|
|
62
62
|
)
|
|
63
63
|
|
|
@@ -65,7 +65,7 @@ def create_warehouse(
|
|
|
65
65
|
f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace_name}' workspace."
|
|
66
66
|
)
|
|
67
67
|
|
|
68
|
-
return
|
|
68
|
+
return result.get("id")
|
|
69
69
|
|
|
70
70
|
|
|
71
71
|
def list_warehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
|
sempy_labs/admin/__init__.py
CHANGED
|
@@ -38,6 +38,7 @@ from sempy_labs.admin._capacities import (
|
|
|
38
38
|
get_capacity_assignment_status,
|
|
39
39
|
get_capacity_state,
|
|
40
40
|
list_capacity_users,
|
|
41
|
+
get_refreshables,
|
|
41
42
|
)
|
|
42
43
|
from sempy_labs.admin._tenant import (
|
|
43
44
|
list_tenant_settings,
|
|
@@ -80,6 +81,9 @@ from sempy_labs.admin._external_data_share import (
|
|
|
80
81
|
from sempy_labs.admin._git import (
|
|
81
82
|
list_git_connections,
|
|
82
83
|
)
|
|
84
|
+
from sempy_labs.admin._dataflows import (
|
|
85
|
+
export_dataflow,
|
|
86
|
+
)
|
|
83
87
|
|
|
84
88
|
__all__ = [
|
|
85
89
|
"list_items",
|
|
@@ -133,4 +137,6 @@ __all__ = [
|
|
|
133
137
|
"list_capacity_users",
|
|
134
138
|
"list_user_subscriptions",
|
|
135
139
|
"list_report_subscriptions",
|
|
140
|
+
"get_refreshables",
|
|
141
|
+
"export_dataflow",
|
|
136
142
|
]
|
sempy_labs/admin/_artifacts.py
CHANGED
|
@@ -31,7 +31,7 @@ def list_unused_artifacts(workspace: Optional[str | UUID] = None) -> pd.DataFram
|
|
|
31
31
|
"Artifact Name": "string",
|
|
32
32
|
"Artifact Id": "string",
|
|
33
33
|
"Artifact Type": "string",
|
|
34
|
-
"Artifact Size in MB": "
|
|
34
|
+
"Artifact Size in MB": "string",
|
|
35
35
|
"Created Date Time": "datetime",
|
|
36
36
|
"Last Accessed Date Time": "datetime",
|
|
37
37
|
}
|
|
@@ -47,8 +47,8 @@ def list_unused_artifacts(workspace: Optional[str | UUID] = None) -> pd.DataFram
|
|
|
47
47
|
for r in responses:
|
|
48
48
|
for i in r.get("unusedArtifactEntities", []):
|
|
49
49
|
new_data = {
|
|
50
|
-
"Artifact Name": i.get("
|
|
51
|
-
"Artifact Id": i.get("
|
|
50
|
+
"Artifact Name": i.get("displayName"),
|
|
51
|
+
"Artifact Id": i.get("artifactId"),
|
|
52
52
|
"Artifact Type": i.get("artifactType"),
|
|
53
53
|
"Artifact Size in MB": i.get("artifactSizeInMB"),
|
|
54
54
|
"Created Date Time": i.get("createdDateTime"),
|