semantic-link-labs 0.9.9__py3-none-any.whl → 0.9.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.10.dist-info}/METADATA +5 -3
- {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.10.dist-info}/RECORD +29 -27
- {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.10.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +6 -0
- sempy_labs/_clear_cache.py +12 -0
- sempy_labs/_dax.py +8 -2
- sempy_labs/_delta_analyzer.py +8 -18
- sempy_labs/_generate_semantic_model.py +6 -7
- sempy_labs/_helper_functions.py +205 -64
- sempy_labs/_kql_databases.py +18 -0
- sempy_labs/_kusto.py +135 -0
- sempy_labs/_list_functions.py +5 -1
- sempy_labs/_vertipaq.py +6 -6
- sempy_labs/_warehouses.py +3 -3
- sempy_labs/admin/__init__.py +6 -0
- sempy_labs/admin/_artifacts.py +3 -3
- sempy_labs/admin/_capacities.py +161 -1
- sempy_labs/admin/_dataflows.py +45 -0
- sempy_labs/admin/_items.py +16 -11
- sempy_labs/admin/_tenant.py +5 -5
- sempy_labs/directlake/_generate_shared_expression.py +25 -26
- sempy_labs/lakehouse/_get_lakehouse_columns.py +41 -18
- sempy_labs/lakehouse/_get_lakehouse_tables.py +66 -39
- sempy_labs/lakehouse/_lakehouse.py +44 -35
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +7 -12
- sempy_labs/migration/_refresh_calc_tables.py +7 -6
- sempy_labs/tom/_model.py +21 -14
- {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.10.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.10.dist-info}/top_level.txt +0 -0
sempy_labs/_helper_functions.py
CHANGED
|
@@ -8,7 +8,7 @@ from sempy.fabric.exceptions import FabricHTTPException, WorkspaceNotFoundExcept
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
from functools import wraps
|
|
10
10
|
import datetime
|
|
11
|
-
from typing import Optional, Tuple, List
|
|
11
|
+
from typing import Optional, Tuple, List, Dict
|
|
12
12
|
from uuid import UUID
|
|
13
13
|
import sempy_labs._icons as icons
|
|
14
14
|
from azure.core.credentials import TokenCredential, AccessToken
|
|
@@ -663,11 +663,13 @@ def save_as_delta_table(
|
|
|
663
663
|
workspace: Optional[str | UUID] = None,
|
|
664
664
|
):
|
|
665
665
|
"""
|
|
666
|
-
Saves a pandas dataframe as a delta table in a Fabric lakehouse.
|
|
666
|
+
Saves a pandas or spark dataframe as a delta table in a Fabric lakehouse.
|
|
667
|
+
|
|
668
|
+
This function may be executed in either a PySpark or pure Python notebook. If executing in a pure Python notebook, the dataframe must be a pandas dataframe.
|
|
667
669
|
|
|
668
670
|
Parameters
|
|
669
671
|
----------
|
|
670
|
-
dataframe : pandas.DataFrame
|
|
672
|
+
dataframe : pandas.DataFrame | spark.Dataframe
|
|
671
673
|
The dataframe to be saved as a delta table.
|
|
672
674
|
delta_table_name : str
|
|
673
675
|
The name of the delta table.
|
|
@@ -686,19 +688,6 @@ def save_as_delta_table(
|
|
|
686
688
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
687
689
|
"""
|
|
688
690
|
|
|
689
|
-
from pyspark.sql.types import (
|
|
690
|
-
StringType,
|
|
691
|
-
IntegerType,
|
|
692
|
-
FloatType,
|
|
693
|
-
DateType,
|
|
694
|
-
StructType,
|
|
695
|
-
StructField,
|
|
696
|
-
BooleanType,
|
|
697
|
-
LongType,
|
|
698
|
-
DoubleType,
|
|
699
|
-
TimestampType,
|
|
700
|
-
)
|
|
701
|
-
|
|
702
691
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
703
692
|
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
704
693
|
lakehouse=lakehouse, workspace=workspace_id
|
|
@@ -717,52 +706,101 @@ def save_as_delta_table(
|
|
|
717
706
|
f"{icons.red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names."
|
|
718
707
|
)
|
|
719
708
|
|
|
720
|
-
|
|
709
|
+
import pyarrow as pa
|
|
710
|
+
from pyspark.sql.types import (
|
|
711
|
+
StringType,
|
|
712
|
+
IntegerType,
|
|
713
|
+
FloatType,
|
|
714
|
+
DateType,
|
|
715
|
+
StructType,
|
|
716
|
+
StructField,
|
|
717
|
+
BooleanType,
|
|
718
|
+
LongType,
|
|
719
|
+
DoubleType,
|
|
720
|
+
TimestampType,
|
|
721
|
+
)
|
|
721
722
|
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
723
|
+
def get_type_mapping(pure_python):
|
|
724
|
+
common_mapping = {
|
|
725
|
+
"string": ("pa", pa.string(), StringType()),
|
|
726
|
+
"str": ("pa", pa.string(), StringType()),
|
|
727
|
+
"integer": ("pa", pa.int32(), IntegerType()),
|
|
728
|
+
"int": ("pa", pa.int32(), IntegerType()),
|
|
729
|
+
"float": ("pa", pa.float32(), FloatType()),
|
|
730
|
+
"double": ("pa", pa.float64(), DoubleType()),
|
|
731
|
+
"long": ("pa", pa.int64(), LongType()),
|
|
732
|
+
"bool": ("pa", pa.bool_(), BooleanType()),
|
|
733
|
+
"boolean": ("pa", pa.bool_(), BooleanType()),
|
|
734
|
+
"date": ("pa", pa.date32(), DateType()),
|
|
735
|
+
"timestamp": ("pa", pa.timestamp("ms"), TimestampType()),
|
|
736
|
+
}
|
|
737
|
+
return {k: v[1] if pure_python else v[2] for k, v in common_mapping.items()}
|
|
735
738
|
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
739
|
+
def build_schema(schema_dict, type_mapping, use_arrow=True):
|
|
740
|
+
if use_arrow:
|
|
741
|
+
fields = [
|
|
742
|
+
pa.field(name, type_mapping.get(dtype.lower()))
|
|
743
|
+
for name, dtype in schema_dict.items()
|
|
744
|
+
]
|
|
745
|
+
return pa.schema(fields)
|
|
740
746
|
else:
|
|
741
|
-
|
|
747
|
+
return StructType(
|
|
742
748
|
[
|
|
743
|
-
StructField(
|
|
744
|
-
for
|
|
749
|
+
StructField(name, type_mapping.get(dtype.lower()), True)
|
|
750
|
+
for name, dtype in schema_dict.items()
|
|
745
751
|
]
|
|
746
752
|
)
|
|
747
|
-
|
|
753
|
+
|
|
754
|
+
# Main logic
|
|
755
|
+
schema_map = None
|
|
756
|
+
if schema is not None:
|
|
757
|
+
use_arrow = _pure_python_notebook()
|
|
758
|
+
type_mapping = get_type_mapping(use_arrow)
|
|
759
|
+
schema_map = build_schema(schema, type_mapping, use_arrow)
|
|
760
|
+
|
|
761
|
+
if isinstance(dataframe, pd.DataFrame):
|
|
762
|
+
dataframe.columns = [col.replace(" ", "_") for col in dataframe.columns]
|
|
763
|
+
if _pure_python_notebook():
|
|
764
|
+
spark_df = dataframe
|
|
765
|
+
else:
|
|
766
|
+
spark = _create_spark_session()
|
|
767
|
+
if schema is None:
|
|
768
|
+
spark_df = spark.createDataFrame(dataframe)
|
|
769
|
+
else:
|
|
770
|
+
spark_df = spark.createDataFrame(dataframe, schema_map)
|
|
748
771
|
else:
|
|
749
772
|
for col_name in dataframe.columns:
|
|
750
773
|
new_name = col_name.replace(" ", "_")
|
|
751
774
|
dataframe = dataframe.withColumnRenamed(col_name, new_name)
|
|
752
775
|
spark_df = dataframe
|
|
753
776
|
|
|
754
|
-
|
|
777
|
+
file_path = create_abfss_path(
|
|
755
778
|
lakehouse_id=lakehouse_id,
|
|
756
779
|
lakehouse_workspace_id=workspace_id,
|
|
757
780
|
delta_table_name=delta_table_name,
|
|
758
781
|
)
|
|
759
782
|
|
|
760
|
-
if
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
783
|
+
if _pure_python_notebook():
|
|
784
|
+
from deltalake import write_deltalake
|
|
785
|
+
|
|
786
|
+
write_args = {
|
|
787
|
+
"table_or_uri": file_path,
|
|
788
|
+
"data": spark_df,
|
|
789
|
+
"mode": write_mode,
|
|
790
|
+
"schema": schema_map,
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
if merge_schema:
|
|
794
|
+
write_args["schema_mode"] = "merge"
|
|
795
|
+
|
|
796
|
+
write_deltalake(**write_args)
|
|
764
797
|
else:
|
|
765
|
-
spark_df.write.mode(write_mode).format("delta")
|
|
798
|
+
writer = spark_df.write.mode(write_mode).format("delta")
|
|
799
|
+
if merge_schema:
|
|
800
|
+
writer = writer.option("mergeSchema", "true")
|
|
801
|
+
|
|
802
|
+
writer.save(file_path)
|
|
803
|
+
|
|
766
804
|
print(
|
|
767
805
|
f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace."
|
|
768
806
|
)
|
|
@@ -1497,32 +1535,82 @@ def generate_guid():
|
|
|
1497
1535
|
|
|
1498
1536
|
def _get_column_aggregate(
|
|
1499
1537
|
table_name: str,
|
|
1500
|
-
column_name: str = "RunId",
|
|
1538
|
+
column_name: str | List[str] = "RunId",
|
|
1501
1539
|
lakehouse: Optional[str | UUID] = None,
|
|
1502
1540
|
workspace: Optional[str | UUID] = None,
|
|
1503
1541
|
function: str = "max",
|
|
1504
1542
|
default_value: int = 0,
|
|
1505
|
-
) -> int:
|
|
1543
|
+
) -> int | Dict[str, int]:
|
|
1544
|
+
|
|
1545
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
1546
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
1547
|
+
path = create_abfss_path(lakehouse_id, workspace_id, table_name)
|
|
1548
|
+
df = _read_delta_table(path)
|
|
1549
|
+
|
|
1550
|
+
if isinstance(column_name, str):
|
|
1551
|
+
result = _get_aggregate(
|
|
1552
|
+
df=df,
|
|
1553
|
+
column_name=column_name,
|
|
1554
|
+
function=function,
|
|
1555
|
+
default_value=default_value,
|
|
1556
|
+
)
|
|
1557
|
+
elif isinstance(column_name, list):
|
|
1558
|
+
result = {}
|
|
1559
|
+
for col in column_name:
|
|
1560
|
+
result[col] = _get_aggregate(
|
|
1561
|
+
df=df,
|
|
1562
|
+
column_name=col,
|
|
1563
|
+
function=function,
|
|
1564
|
+
default_value=default_value,
|
|
1565
|
+
)
|
|
1566
|
+
else:
|
|
1567
|
+
raise TypeError("column_name must be a string or a list of strings.")
|
|
1568
|
+
|
|
1569
|
+
return result
|
|
1570
|
+
|
|
1506
1571
|
|
|
1507
|
-
|
|
1508
|
-
from pyspark.sql import functions as F
|
|
1572
|
+
def _get_aggregate(df, column_name, function, default_value: int = 0) -> int:
|
|
1509
1573
|
|
|
1510
1574
|
function = function.upper()
|
|
1511
|
-
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
1512
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
1513
|
-
path = create_abfss_path(lakehouse_id, workspace_id, table_name)
|
|
1514
1575
|
|
|
1515
|
-
|
|
1516
|
-
|
|
1576
|
+
if _pure_python_notebook():
|
|
1577
|
+
import polars as pl
|
|
1578
|
+
|
|
1579
|
+
if not isinstance(df, pd.DataFrame):
|
|
1580
|
+
df.to_pandas()
|
|
1517
1581
|
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1582
|
+
df = pl.from_pandas(df)
|
|
1583
|
+
|
|
1584
|
+
# Perform aggregation
|
|
1585
|
+
if "DISTINCT" in function:
|
|
1586
|
+
if isinstance(df[column_name].dtype, pl.Decimal):
|
|
1587
|
+
result = df[column_name].cast(pl.Float64).n_unique()
|
|
1588
|
+
else:
|
|
1589
|
+
result = df[column_name].n_unique()
|
|
1590
|
+
elif "APPROX" in function:
|
|
1591
|
+
result = df[column_name].unique().shape[0]
|
|
1592
|
+
else:
|
|
1593
|
+
try:
|
|
1594
|
+
result = getattr(df[column_name], function.lower())()
|
|
1595
|
+
except AttributeError:
|
|
1596
|
+
raise ValueError(f"Unsupported function: {function}")
|
|
1597
|
+
|
|
1598
|
+
return result if result is not None else default_value
|
|
1522
1599
|
else:
|
|
1523
|
-
|
|
1600
|
+
from pyspark.sql.functions import approx_count_distinct
|
|
1601
|
+
from pyspark.sql import functions as F
|
|
1602
|
+
|
|
1603
|
+
if isinstance(df, pd.DataFrame):
|
|
1604
|
+
df = _create_spark_dataframe(df)
|
|
1524
1605
|
|
|
1525
|
-
|
|
1606
|
+
if "DISTINCT" in function:
|
|
1607
|
+
result = df.select(F.count_distinct(F.col(column_name)))
|
|
1608
|
+
elif "APPROX" in function:
|
|
1609
|
+
result = df.select(approx_count_distinct(column_name))
|
|
1610
|
+
else:
|
|
1611
|
+
result = df.selectExpr(f"{function}({column_name})")
|
|
1612
|
+
|
|
1613
|
+
return result.collect()[0][0] or default_value
|
|
1526
1614
|
|
|
1527
1615
|
|
|
1528
1616
|
def _make_list_unique(my_list):
|
|
@@ -1687,6 +1775,7 @@ def _convert_data_type(input_data_type: str) -> str:
|
|
|
1687
1775
|
"double": "Double",
|
|
1688
1776
|
"float": "Double",
|
|
1689
1777
|
"binary": "Boolean",
|
|
1778
|
+
"long": "Int64",
|
|
1690
1779
|
}
|
|
1691
1780
|
|
|
1692
1781
|
if "decimal" in input_data_type:
|
|
@@ -1842,6 +1931,18 @@ def _update_dataframe_datatypes(dataframe: pd.DataFrame, column_map: dict):
|
|
|
1842
1931
|
dataframe[column] = dataframe[column].fillna(0).astype(int)
|
|
1843
1932
|
elif data_type in ["str", "string"]:
|
|
1844
1933
|
dataframe[column] = dataframe[column].astype(str)
|
|
1934
|
+
# Avoid having empty lists or lists with a value of None.
|
|
1935
|
+
elif data_type in ["list"]:
|
|
1936
|
+
dataframe[column] = dataframe[column].apply(
|
|
1937
|
+
lambda x: (
|
|
1938
|
+
None
|
|
1939
|
+
if (type(x) == list and len(x) == 1 and x[0] == None)
|
|
1940
|
+
or (type(x) == list and len(x) == 0)
|
|
1941
|
+
else x
|
|
1942
|
+
)
|
|
1943
|
+
)
|
|
1944
|
+
elif data_type in ["dict"]:
|
|
1945
|
+
dataframe[column] = dataframe[column]
|
|
1845
1946
|
else:
|
|
1846
1947
|
raise NotImplementedError
|
|
1847
1948
|
|
|
@@ -1878,18 +1979,58 @@ def _create_spark_session():
|
|
|
1878
1979
|
return SparkSession.builder.getOrCreate()
|
|
1879
1980
|
|
|
1880
1981
|
|
|
1881
|
-
def
|
|
1982
|
+
def _get_delta_table(path: str) -> str:
|
|
1983
|
+
|
|
1984
|
+
from delta import DeltaTable
|
|
1882
1985
|
|
|
1883
1986
|
spark = _create_spark_session()
|
|
1884
1987
|
|
|
1885
|
-
return
|
|
1988
|
+
return DeltaTable.forPath(spark, path)
|
|
1886
1989
|
|
|
1887
1990
|
|
|
1888
|
-
def
|
|
1991
|
+
def _read_delta_table(path: str, to_pandas: bool = True, to_df: bool = False):
|
|
1889
1992
|
|
|
1890
|
-
|
|
1993
|
+
if _pure_python_notebook():
|
|
1994
|
+
from deltalake import DeltaTable
|
|
1891
1995
|
|
|
1892
|
-
|
|
1996
|
+
df = DeltaTable(table_uri=path)
|
|
1997
|
+
if to_pandas:
|
|
1998
|
+
df = df.to_pandas()
|
|
1999
|
+
else:
|
|
2000
|
+
spark = _create_spark_session()
|
|
2001
|
+
df = spark.read.format("delta").load(path)
|
|
2002
|
+
if to_df:
|
|
2003
|
+
df = df.toDF()
|
|
2004
|
+
|
|
2005
|
+
return df
|
|
2006
|
+
|
|
2007
|
+
|
|
2008
|
+
def _read_delta_table_history(path) -> pd.DataFrame:
|
|
2009
|
+
|
|
2010
|
+
if _pure_python_notebook():
|
|
2011
|
+
from deltalake import DeltaTable
|
|
2012
|
+
|
|
2013
|
+
df = pd.DataFrame(DeltaTable(table_uri=path).history())
|
|
2014
|
+
else:
|
|
2015
|
+
from delta import DeltaTable
|
|
2016
|
+
|
|
2017
|
+
spark = _create_spark_session()
|
|
2018
|
+
delta_table = DeltaTable.forPath(spark, path)
|
|
2019
|
+
df = delta_table.history().toPandas()
|
|
2020
|
+
|
|
2021
|
+
return df
|
|
2022
|
+
|
|
2023
|
+
|
|
2024
|
+
def _delta_table_row_count(path: str) -> int:
|
|
2025
|
+
|
|
2026
|
+
if _pure_python_notebook():
|
|
2027
|
+
from deltalake import DeltaTable
|
|
2028
|
+
|
|
2029
|
+
dt = DeltaTable(path)
|
|
2030
|
+
arrow_table = dt.to_pyarrow_table()
|
|
2031
|
+
return arrow_table.num_rows
|
|
2032
|
+
else:
|
|
2033
|
+
return _read_delta_table(path).count()
|
|
1893
2034
|
|
|
1894
2035
|
|
|
1895
2036
|
def _run_spark_sql_query(query):
|
sempy_labs/_kql_databases.py
CHANGED
|
@@ -6,6 +6,8 @@ from sempy_labs._helper_functions import (
|
|
|
6
6
|
_create_dataframe,
|
|
7
7
|
delete_item,
|
|
8
8
|
create_item,
|
|
9
|
+
resolve_item_id,
|
|
10
|
+
resolve_workspace_id,
|
|
9
11
|
)
|
|
10
12
|
from uuid import UUID
|
|
11
13
|
import sempy_labs._icons as icons
|
|
@@ -121,3 +123,19 @@ def delete_kql_database(
|
|
|
121
123
|
)
|
|
122
124
|
|
|
123
125
|
delete_item(item=kql_database, type="KQLDatabase", workspace=workspace)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _resolve_cluster_uri(
|
|
129
|
+
kql_database: str | UUID, workspace: Optional[str | UUID] = None
|
|
130
|
+
) -> str:
|
|
131
|
+
|
|
132
|
+
workspace_id = resolve_workspace_id(workspace=workspace)
|
|
133
|
+
item_id = resolve_item_id(
|
|
134
|
+
item=kql_database, type="KQLDatabase", workspace=workspace
|
|
135
|
+
)
|
|
136
|
+
response = _base_api(
|
|
137
|
+
request=f"/v1/workspaces/{workspace_id}/kqlDatabases/{item_id}",
|
|
138
|
+
client="fabric_sp",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return response.json().get("properties", {}).get("queryServiceUri")
|
sempy_labs/_kusto.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
4
|
+
from sempy._utils._log import log
|
|
5
|
+
import sempy_labs._icons as icons
|
|
6
|
+
from typing import Optional
|
|
7
|
+
from uuid import UUID
|
|
8
|
+
from sempy_labs._kql_databases import _resolve_cluster_uri
|
|
9
|
+
from sempy_labs._helper_functions import resolve_item_id
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@log
|
|
13
|
+
def query_kusto(
|
|
14
|
+
query: str,
|
|
15
|
+
kql_database: str | UUID,
|
|
16
|
+
workspace: Optional[str | UUID] = None,
|
|
17
|
+
language: str = "kql",
|
|
18
|
+
) -> pd.DataFrame:
|
|
19
|
+
"""
|
|
20
|
+
Runs a KQL query against a KQL database.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
query : str
|
|
25
|
+
The query (supports KQL or SQL - make sure to specify the language parameter accordingly).
|
|
26
|
+
kql_database : str | uuid.UUID
|
|
27
|
+
The KQL database name or ID.
|
|
28
|
+
workspace : str | uuid.UUID, default=None
|
|
29
|
+
The Fabric workspace name or ID.
|
|
30
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
31
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
32
|
+
language : str, default="kql"
|
|
33
|
+
The language of the query. Currently "kql' and "sql" are supported.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
pandas.DataFrame
|
|
38
|
+
A pandas dataframe showing the result of the KQL query.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
import notebookutils
|
|
42
|
+
|
|
43
|
+
language = language.lower()
|
|
44
|
+
if language not in ["kql", "sql"]:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"{icons._red_dot} Invalid language '{language}'. Only 'kql' and 'sql' are supported."
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
cluster_uri = _resolve_cluster_uri(kql_database=kql_database, workspace=workspace)
|
|
50
|
+
token = notebookutils.credentials.getToken(cluster_uri)
|
|
51
|
+
|
|
52
|
+
headers = {
|
|
53
|
+
"Authorization": f"Bearer {token}",
|
|
54
|
+
"Content-Type": "application/json",
|
|
55
|
+
"Accept": "application/json",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
kql_database_id = resolve_item_id(
|
|
59
|
+
item=kql_database, type="KQLDatabase", workspace=workspace
|
|
60
|
+
)
|
|
61
|
+
payload = {"db": kql_database_id, "csl": query}
|
|
62
|
+
if language == "sql":
|
|
63
|
+
payload["properties"] = {"Options": {"query_language": "sql"}}
|
|
64
|
+
|
|
65
|
+
response = requests.post(
|
|
66
|
+
f"{cluster_uri}/v1/rest/query",
|
|
67
|
+
headers=headers,
|
|
68
|
+
json=payload,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if response.status_code != 200:
|
|
72
|
+
raise FabricHTTPException(response)
|
|
73
|
+
|
|
74
|
+
results = response.json()
|
|
75
|
+
columns_info = results["Tables"][0]["Columns"]
|
|
76
|
+
rows = results["Tables"][0]["Rows"]
|
|
77
|
+
|
|
78
|
+
df = pd.DataFrame(rows, columns=[col["ColumnName"] for col in columns_info])
|
|
79
|
+
|
|
80
|
+
for col_info in columns_info:
|
|
81
|
+
col_name = col_info["ColumnName"]
|
|
82
|
+
data_type = col_info["DataType"]
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
if data_type == "DateTime":
|
|
86
|
+
df[col_name] = pd.to_datetime(df[col_name])
|
|
87
|
+
elif data_type in ["Int64", "Int32", "Long"]:
|
|
88
|
+
df[col_name] = (
|
|
89
|
+
pd.to_numeric(df[col_name], errors="coerce")
|
|
90
|
+
.fillna(0)
|
|
91
|
+
.astype("int64")
|
|
92
|
+
)
|
|
93
|
+
elif data_type == "Real" or data_type == "Double":
|
|
94
|
+
df[col_name] = pd.to_numeric(df[col_name], errors="coerce")
|
|
95
|
+
else:
|
|
96
|
+
# Convert any other type to string, change as needed
|
|
97
|
+
df[col_name] = df[col_name].astype(str)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
print(
|
|
100
|
+
f"{icons.yellow_dot} Could not convert column {col_name} to {data_type}, defaulting to string: {str(e)}"
|
|
101
|
+
)
|
|
102
|
+
df[col_name] = df[col_name].astype(str)
|
|
103
|
+
|
|
104
|
+
return df
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def query_workspace_monitoring(
|
|
108
|
+
query: str, workspace: Optional[str | UUID] = None, language: str = "kql"
|
|
109
|
+
) -> pd.DataFrame:
|
|
110
|
+
"""
|
|
111
|
+
Runs a query against the Fabric workspace monitoring database. Workspace monitoring must be enabled on the workspace to use this function.
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
query : str
|
|
116
|
+
The query (supports KQL or SQL - make sure to specify the language parameter accordingly).
|
|
117
|
+
workspace : str | uuid.UUID, default=None
|
|
118
|
+
The Fabric workspace name or ID.
|
|
119
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
120
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
121
|
+
language : str, default="kql"
|
|
122
|
+
The language of the query. Currently "kql' and "sql" are supported.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
pandas.DataFrame
|
|
127
|
+
A pandas dataframe showing the result of the query.
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
return query_kusto(
|
|
131
|
+
query=query,
|
|
132
|
+
kql_database="Monitoring KQL database",
|
|
133
|
+
workspace=workspace,
|
|
134
|
+
language=language,
|
|
135
|
+
)
|
sempy_labs/_list_functions.py
CHANGED
|
@@ -240,7 +240,11 @@ def list_tables(
|
|
|
240
240
|
"Columns": sum(
|
|
241
241
|
1 for c in t.Columns if str(c.Type) != "RowNumber"
|
|
242
242
|
),
|
|
243
|
-
"% DB":
|
|
243
|
+
"% DB": (
|
|
244
|
+
round((total_size / model_size) * 100, 2)
|
|
245
|
+
if model_size not in (0, None, float("nan"))
|
|
246
|
+
else 0.0
|
|
247
|
+
),
|
|
244
248
|
}
|
|
245
249
|
)
|
|
246
250
|
|
sempy_labs/_vertipaq.py
CHANGED
|
@@ -8,7 +8,6 @@ import datetime
|
|
|
8
8
|
import warnings
|
|
9
9
|
from sempy_labs._helper_functions import (
|
|
10
10
|
format_dax_object_name,
|
|
11
|
-
resolve_lakehouse_name,
|
|
12
11
|
save_as_delta_table,
|
|
13
12
|
resolve_workspace_capacity,
|
|
14
13
|
_get_column_aggregate,
|
|
@@ -20,7 +19,6 @@ from sempy_labs._helper_functions import (
|
|
|
20
19
|
)
|
|
21
20
|
from sempy_labs._list_functions import list_relationships, list_tables
|
|
22
21
|
from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
|
|
23
|
-
from sempy_labs.directlake import get_direct_lake_source
|
|
24
22
|
from typing import Optional
|
|
25
23
|
from sempy._utils._log import log
|
|
26
24
|
import sempy_labs._icons as icons
|
|
@@ -176,10 +174,12 @@ def vertipaq_analyzer(
|
|
|
176
174
|
)
|
|
177
175
|
|
|
178
176
|
artifact_type = None
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
177
|
+
lakehouse_workspace_id = None
|
|
178
|
+
lakehouse_name = None
|
|
179
|
+
# if is_direct_lake:
|
|
180
|
+
# artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
|
|
181
|
+
# get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
|
|
182
|
+
# )
|
|
183
183
|
|
|
184
184
|
dfR["Missing Rows"] = 0
|
|
185
185
|
dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
|
sempy_labs/_warehouses.py
CHANGED
|
@@ -53,11 +53,11 @@ def create_warehouse(
|
|
|
53
53
|
"defaultCollation"
|
|
54
54
|
] = "Latin1_General_100_CI_AS_KS_WS_SC_UTF8"
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
result = _base_api(
|
|
57
57
|
request=f"/v1/workspaces/{workspace_id}/warehouses",
|
|
58
58
|
payload=payload,
|
|
59
59
|
method="post",
|
|
60
|
-
|
|
60
|
+
lro_return_json=True,
|
|
61
61
|
status_codes=[201, 202],
|
|
62
62
|
)
|
|
63
63
|
|
|
@@ -65,7 +65,7 @@ def create_warehouse(
|
|
|
65
65
|
f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace_name}' workspace."
|
|
66
66
|
)
|
|
67
67
|
|
|
68
|
-
return
|
|
68
|
+
return result.get("id")
|
|
69
69
|
|
|
70
70
|
|
|
71
71
|
def list_warehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
|
sempy_labs/admin/__init__.py
CHANGED
|
@@ -38,6 +38,7 @@ from sempy_labs.admin._capacities import (
|
|
|
38
38
|
get_capacity_assignment_status,
|
|
39
39
|
get_capacity_state,
|
|
40
40
|
list_capacity_users,
|
|
41
|
+
get_refreshables,
|
|
41
42
|
)
|
|
42
43
|
from sempy_labs.admin._tenant import (
|
|
43
44
|
list_tenant_settings,
|
|
@@ -80,6 +81,9 @@ from sempy_labs.admin._external_data_share import (
|
|
|
80
81
|
from sempy_labs.admin._git import (
|
|
81
82
|
list_git_connections,
|
|
82
83
|
)
|
|
84
|
+
from sempy_labs.admin._dataflows import (
|
|
85
|
+
export_dataflow,
|
|
86
|
+
)
|
|
83
87
|
|
|
84
88
|
__all__ = [
|
|
85
89
|
"list_items",
|
|
@@ -133,4 +137,6 @@ __all__ = [
|
|
|
133
137
|
"list_capacity_users",
|
|
134
138
|
"list_user_subscriptions",
|
|
135
139
|
"list_report_subscriptions",
|
|
140
|
+
"get_refreshables",
|
|
141
|
+
"export_dataflow",
|
|
136
142
|
]
|
sempy_labs/admin/_artifacts.py
CHANGED
|
@@ -31,7 +31,7 @@ def list_unused_artifacts(workspace: Optional[str | UUID] = None) -> pd.DataFram
|
|
|
31
31
|
"Artifact Name": "string",
|
|
32
32
|
"Artifact Id": "string",
|
|
33
33
|
"Artifact Type": "string",
|
|
34
|
-
"Artifact Size in MB": "
|
|
34
|
+
"Artifact Size in MB": "string",
|
|
35
35
|
"Created Date Time": "datetime",
|
|
36
36
|
"Last Accessed Date Time": "datetime",
|
|
37
37
|
}
|
|
@@ -47,8 +47,8 @@ def list_unused_artifacts(workspace: Optional[str | UUID] = None) -> pd.DataFram
|
|
|
47
47
|
for r in responses:
|
|
48
48
|
for i in r.get("unusedArtifactEntities", []):
|
|
49
49
|
new_data = {
|
|
50
|
-
"Artifact Name": i.get("
|
|
51
|
-
"Artifact Id": i.get("
|
|
50
|
+
"Artifact Name": i.get("displayName"),
|
|
51
|
+
"Artifact Id": i.get("artifactId"),
|
|
52
52
|
"Artifact Type": i.get("artifactType"),
|
|
53
53
|
"Artifact Size in MB": i.get("artifactSizeInMB"),
|
|
54
54
|
"Created Date Time": i.get("createdDateTime"),
|