semantic-link-labs 0.9.8__py3-none-any.whl → 0.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

@@ -8,7 +8,7 @@ from sempy.fabric.exceptions import FabricHTTPException, WorkspaceNotFoundExcept
8
8
  import pandas as pd
9
9
  from functools import wraps
10
10
  import datetime
11
- from typing import Optional, Tuple, List
11
+ from typing import Optional, Tuple, List, Dict
12
12
  from uuid import UUID
13
13
  import sempy_labs._icons as icons
14
14
  from azure.core.credentials import TokenCredential, AccessToken
@@ -65,9 +65,11 @@ def create_abfss_path(
65
65
  path = f"abfss://{lakehouse_workspace_id}@{fp}/{lakehouse_id}"
66
66
 
67
67
  if delta_table_name is not None:
68
+ path += "/Tables"
68
69
  if schema is not None:
69
- path += f"/{schema}"
70
- path += f"/Tables/{delta_table_name}"
70
+ path += f"/{schema}/{delta_table_name}"
71
+ else:
72
+ path += f"/{delta_table_name}"
71
73
 
72
74
  return path
73
75
 
@@ -661,11 +663,13 @@ def save_as_delta_table(
661
663
  workspace: Optional[str | UUID] = None,
662
664
  ):
663
665
  """
664
- Saves a pandas dataframe as a delta table in a Fabric lakehouse.
666
+ Saves a pandas or spark dataframe as a delta table in a Fabric lakehouse.
667
+
668
+ This function may be executed in either a PySpark or pure Python notebook. If executing in a pure Python notebook, the dataframe must be a pandas dataframe.
665
669
 
666
670
  Parameters
667
671
  ----------
668
- dataframe : pandas.DataFrame
672
+ dataframe : pandas.DataFrame | spark.Dataframe
669
673
  The dataframe to be saved as a delta table.
670
674
  delta_table_name : str
671
675
  The name of the delta table.
@@ -684,19 +688,6 @@ def save_as_delta_table(
684
688
  or if no lakehouse attached, resolves to the workspace of the notebook.
685
689
  """
686
690
 
687
- from pyspark.sql.types import (
688
- StringType,
689
- IntegerType,
690
- FloatType,
691
- DateType,
692
- StructType,
693
- StructField,
694
- BooleanType,
695
- LongType,
696
- DoubleType,
697
- TimestampType,
698
- )
699
-
700
691
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
701
692
  (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
702
693
  lakehouse=lakehouse, workspace=workspace_id
@@ -715,52 +706,101 @@ def save_as_delta_table(
715
706
  f"{icons.red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names."
716
707
  )
717
708
 
718
- spark = _create_spark_session()
709
+ import pyarrow as pa
710
+ from pyspark.sql.types import (
711
+ StringType,
712
+ IntegerType,
713
+ FloatType,
714
+ DateType,
715
+ StructType,
716
+ StructField,
717
+ BooleanType,
718
+ LongType,
719
+ DoubleType,
720
+ TimestampType,
721
+ )
719
722
 
720
- type_mapping = {
721
- "string": StringType(),
722
- "str": StringType(),
723
- "integer": IntegerType(),
724
- "int": IntegerType(),
725
- "float": FloatType(),
726
- "date": DateType(),
727
- "bool": BooleanType(),
728
- "boolean": BooleanType(),
729
- "long": LongType(),
730
- "double": DoubleType(),
731
- "timestamp": TimestampType(),
732
- }
723
+ def get_type_mapping(pure_python):
724
+ common_mapping = {
725
+ "string": ("pa", pa.string(), StringType()),
726
+ "str": ("pa", pa.string(), StringType()),
727
+ "integer": ("pa", pa.int32(), IntegerType()),
728
+ "int": ("pa", pa.int32(), IntegerType()),
729
+ "float": ("pa", pa.float32(), FloatType()),
730
+ "double": ("pa", pa.float64(), DoubleType()),
731
+ "long": ("pa", pa.int64(), LongType()),
732
+ "bool": ("pa", pa.bool_(), BooleanType()),
733
+ "boolean": ("pa", pa.bool_(), BooleanType()),
734
+ "date": ("pa", pa.date32(), DateType()),
735
+ "timestamp": ("pa", pa.timestamp("ms"), TimestampType()),
736
+ }
737
+ return {k: v[1] if pure_python else v[2] for k, v in common_mapping.items()}
733
738
 
734
- if isinstance(dataframe, pd.DataFrame):
735
- dataframe.columns = [col.replace(" ", "_") for col in dataframe.columns]
736
- if schema is None:
737
- spark_df = spark.createDataFrame(dataframe)
739
+ def build_schema(schema_dict, type_mapping, use_arrow=True):
740
+ if use_arrow:
741
+ fields = [
742
+ pa.field(name, type_mapping.get(dtype.lower()))
743
+ for name, dtype in schema_dict.items()
744
+ ]
745
+ return pa.schema(fields)
738
746
  else:
739
- schema_map = StructType(
747
+ return StructType(
740
748
  [
741
- StructField(column_name, type_mapping[data_type], True)
742
- for column_name, data_type in schema.items()
749
+ StructField(name, type_mapping.get(dtype.lower()), True)
750
+ for name, dtype in schema_dict.items()
743
751
  ]
744
752
  )
745
- spark_df = spark.createDataFrame(dataframe, schema_map)
753
+
754
+ # Main logic
755
+ schema_map = None
756
+ if schema is not None:
757
+ use_arrow = _pure_python_notebook()
758
+ type_mapping = get_type_mapping(use_arrow)
759
+ schema_map = build_schema(schema, type_mapping, use_arrow)
760
+
761
+ if isinstance(dataframe, pd.DataFrame):
762
+ dataframe.columns = [col.replace(" ", "_") for col in dataframe.columns]
763
+ if _pure_python_notebook():
764
+ spark_df = dataframe
765
+ else:
766
+ spark = _create_spark_session()
767
+ if schema is None:
768
+ spark_df = spark.createDataFrame(dataframe)
769
+ else:
770
+ spark_df = spark.createDataFrame(dataframe, schema_map)
746
771
  else:
747
772
  for col_name in dataframe.columns:
748
773
  new_name = col_name.replace(" ", "_")
749
774
  dataframe = dataframe.withColumnRenamed(col_name, new_name)
750
775
  spark_df = dataframe
751
776
 
752
- filePath = create_abfss_path(
777
+ file_path = create_abfss_path(
753
778
  lakehouse_id=lakehouse_id,
754
779
  lakehouse_workspace_id=workspace_id,
755
780
  delta_table_name=delta_table_name,
756
781
  )
757
782
 
758
- if merge_schema:
759
- spark_df.write.mode(write_mode).format("delta").option(
760
- "mergeSchema", "true"
761
- ).save(filePath)
783
+ if _pure_python_notebook():
784
+ from deltalake import write_deltalake
785
+
786
+ write_args = {
787
+ "table_or_uri": file_path,
788
+ "data": spark_df,
789
+ "mode": write_mode,
790
+ "schema": schema_map,
791
+ }
792
+
793
+ if merge_schema:
794
+ write_args["schema_mode"] = "merge"
795
+
796
+ write_deltalake(**write_args)
762
797
  else:
763
- spark_df.write.mode(write_mode).format("delta").save(filePath)
798
+ writer = spark_df.write.mode(write_mode).format("delta")
799
+ if merge_schema:
800
+ writer = writer.option("mergeSchema", "true")
801
+
802
+ writer.save(file_path)
803
+
764
804
  print(
765
805
  f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace."
766
806
  )
@@ -1495,32 +1535,82 @@ def generate_guid():
1495
1535
 
1496
1536
  def _get_column_aggregate(
1497
1537
  table_name: str,
1498
- column_name: str = "RunId",
1538
+ column_name: str | List[str] = "RunId",
1499
1539
  lakehouse: Optional[str | UUID] = None,
1500
1540
  workspace: Optional[str | UUID] = None,
1501
1541
  function: str = "max",
1502
1542
  default_value: int = 0,
1503
- ) -> int:
1543
+ ) -> int | Dict[str, int]:
1504
1544
 
1505
- from pyspark.sql.functions import approx_count_distinct
1506
- from pyspark.sql import functions as F
1545
+ workspace_id = resolve_workspace_id(workspace)
1546
+ lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
1547
+ path = create_abfss_path(lakehouse_id, workspace_id, table_name)
1548
+ df = _read_delta_table(path)
1549
+
1550
+ if isinstance(column_name, str):
1551
+ result = _get_aggregate(
1552
+ df=df,
1553
+ column_name=column_name,
1554
+ function=function,
1555
+ default_value=default_value,
1556
+ )
1557
+ elif isinstance(column_name, list):
1558
+ result = {}
1559
+ for col in column_name:
1560
+ result[col] = _get_aggregate(
1561
+ df=df,
1562
+ column_name=col,
1563
+ function=function,
1564
+ default_value=default_value,
1565
+ )
1566
+ else:
1567
+ raise TypeError("column_name must be a string or a list of strings.")
1568
+
1569
+ return result
1570
+
1571
+
1572
+ def _get_aggregate(df, column_name, function, default_value: int = 0) -> int:
1507
1573
 
1508
1574
  function = function.upper()
1509
- (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
1510
- lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
1511
- path = create_abfss_path(lakehouse_id, workspace_id, table_name)
1512
1575
 
1513
- spark = _create_spark_session()
1514
- df = spark.read.format("delta").load(path)
1576
+ if _pure_python_notebook():
1577
+ import polars as pl
1515
1578
 
1516
- if function in {"COUNTDISTINCT", "DISTINCTCOUNT"}:
1517
- result = df.select(F.count_distinct(F.col(column_name)))
1518
- elif "APPROX" in function:
1519
- result = df.select(approx_count_distinct(column_name))
1579
+ if not isinstance(df, pd.DataFrame):
1580
+ df.to_pandas()
1581
+
1582
+ df = pl.from_pandas(df)
1583
+
1584
+ # Perform aggregation
1585
+ if "DISTINCT" in function:
1586
+ if isinstance(df[column_name].dtype, pl.Decimal):
1587
+ result = df[column_name].cast(pl.Float64).n_unique()
1588
+ else:
1589
+ result = df[column_name].n_unique()
1590
+ elif "APPROX" in function:
1591
+ result = df[column_name].unique().shape[0]
1592
+ else:
1593
+ try:
1594
+ result = getattr(df[column_name], function.lower())()
1595
+ except AttributeError:
1596
+ raise ValueError(f"Unsupported function: {function}")
1597
+
1598
+ return result if result is not None else default_value
1520
1599
  else:
1521
- result = df.selectExpr(f"{function}({column_name})")
1600
+ from pyspark.sql.functions import approx_count_distinct
1601
+ from pyspark.sql import functions as F
1602
+
1603
+ if isinstance(df, pd.DataFrame):
1604
+ df = _create_spark_dataframe(df)
1605
+
1606
+ if "DISTINCT" in function:
1607
+ result = df.select(F.count_distinct(F.col(column_name)))
1608
+ elif "APPROX" in function:
1609
+ result = df.select(approx_count_distinct(column_name))
1610
+ else:
1611
+ result = df.selectExpr(f"{function}({column_name})")
1522
1612
 
1523
- return result.collect()[0][0] or default_value
1613
+ return result.collect()[0][0] or default_value
1524
1614
 
1525
1615
 
1526
1616
  def _make_list_unique(my_list):
@@ -1685,6 +1775,7 @@ def _convert_data_type(input_data_type: str) -> str:
1685
1775
  "double": "Double",
1686
1776
  "float": "Double",
1687
1777
  "binary": "Boolean",
1778
+ "long": "Int64",
1688
1779
  }
1689
1780
 
1690
1781
  if "decimal" in input_data_type:
@@ -1739,19 +1830,23 @@ def _base_api(
1739
1830
  lro_return_json: bool = False,
1740
1831
  lro_return_status_code: bool = False,
1741
1832
  ):
1742
-
1833
+ import notebookutils
1743
1834
  from sempy_labs._authentication import _get_headers
1744
1835
 
1745
1836
  if (lro_return_json or lro_return_status_code) and status_codes is None:
1746
1837
  status_codes = [200, 202]
1747
1838
 
1839
+ def get_token(audience="pbi"):
1840
+ return notebookutils.credentials.getToken(audience)
1841
+
1748
1842
  if isinstance(status_codes, int):
1749
1843
  status_codes = [status_codes]
1750
1844
 
1751
1845
  if client == "fabric":
1752
- c = fabric.FabricRestClient()
1846
+ c = fabric.FabricRestClient(token_provider=get_token)
1753
1847
  elif client == "fabric_sp":
1754
- c = fabric.FabricRestClient(token_provider=auth.token_provider.get())
1848
+ token = auth.token_provider.get() or get_token
1849
+ c = fabric.FabricRestClient(token_provider=token)
1755
1850
  elif client in ["azure", "graph"]:
1756
1851
  pass
1757
1852
  else:
@@ -1836,6 +1931,18 @@ def _update_dataframe_datatypes(dataframe: pd.DataFrame, column_map: dict):
1836
1931
  dataframe[column] = dataframe[column].fillna(0).astype(int)
1837
1932
  elif data_type in ["str", "string"]:
1838
1933
  dataframe[column] = dataframe[column].astype(str)
1934
+ # Avoid having empty lists or lists with a value of None.
1935
+ elif data_type in ["list"]:
1936
+ dataframe[column] = dataframe[column].apply(
1937
+ lambda x: (
1938
+ None
1939
+ if (type(x) == list and len(x) == 1 and x[0] == None)
1940
+ or (type(x) == list and len(x) == 0)
1941
+ else x
1942
+ )
1943
+ )
1944
+ elif data_type in ["dict"]:
1945
+ dataframe[column] = dataframe[column]
1839
1946
  else:
1840
1947
  raise NotImplementedError
1841
1948
 
@@ -1872,18 +1979,58 @@ def _create_spark_session():
1872
1979
  return SparkSession.builder.getOrCreate()
1873
1980
 
1874
1981
 
1875
- def _read_delta_table(path: str):
1982
+ def _get_delta_table(path: str) -> str:
1983
+
1984
+ from delta import DeltaTable
1876
1985
 
1877
1986
  spark = _create_spark_session()
1878
1987
 
1879
- return spark.read.format("delta").load(path)
1988
+ return DeltaTable.forPath(spark, path)
1880
1989
 
1881
1990
 
1882
- def _delta_table_row_count(table_name: str) -> int:
1991
+ def _read_delta_table(path: str, to_pandas: bool = True, to_df: bool = False):
1883
1992
 
1884
- spark = _create_spark_session()
1993
+ if _pure_python_notebook():
1994
+ from deltalake import DeltaTable
1995
+
1996
+ df = DeltaTable(table_uri=path)
1997
+ if to_pandas:
1998
+ df = df.to_pandas()
1999
+ else:
2000
+ spark = _create_spark_session()
2001
+ df = spark.read.format("delta").load(path)
2002
+ if to_df:
2003
+ df = df.toDF()
2004
+
2005
+ return df
2006
+
2007
+
2008
+ def _read_delta_table_history(path) -> pd.DataFrame:
1885
2009
 
1886
- return spark.table(table_name).count()
2010
+ if _pure_python_notebook():
2011
+ from deltalake import DeltaTable
2012
+
2013
+ df = pd.DataFrame(DeltaTable(table_uri=path).history())
2014
+ else:
2015
+ from delta import DeltaTable
2016
+
2017
+ spark = _create_spark_session()
2018
+ delta_table = DeltaTable.forPath(spark, path)
2019
+ df = delta_table.history().toPandas()
2020
+
2021
+ return df
2022
+
2023
+
2024
+ def _delta_table_row_count(path: str) -> int:
2025
+
2026
+ if _pure_python_notebook():
2027
+ from deltalake import DeltaTable
2028
+
2029
+ dt = DeltaTable(path)
2030
+ arrow_table = dt.to_pyarrow_table()
2031
+ return arrow_table.num_rows
2032
+ else:
2033
+ return _read_delta_table(path).count()
1887
2034
 
1888
2035
 
1889
2036
  def _run_spark_sql_query(query):
@@ -6,6 +6,8 @@ from sempy_labs._helper_functions import (
6
6
  _create_dataframe,
7
7
  delete_item,
8
8
  create_item,
9
+ resolve_item_id,
10
+ resolve_workspace_id,
9
11
  )
10
12
  from uuid import UUID
11
13
  import sempy_labs._icons as icons
@@ -121,3 +123,19 @@ def delete_kql_database(
121
123
  )
122
124
 
123
125
  delete_item(item=kql_database, type="KQLDatabase", workspace=workspace)
126
+
127
+
128
+ def _resolve_cluster_uri(
129
+ kql_database: str | UUID, workspace: Optional[str | UUID] = None
130
+ ) -> str:
131
+
132
+ workspace_id = resolve_workspace_id(workspace=workspace)
133
+ item_id = resolve_item_id(
134
+ item=kql_database, type="KQLDatabase", workspace=workspace
135
+ )
136
+ response = _base_api(
137
+ request=f"/v1/workspaces/{workspace_id}/kqlDatabases/{item_id}",
138
+ client="fabric_sp",
139
+ )
140
+
141
+ return response.json().get("properties", {}).get("queryServiceUri")
sempy_labs/_kusto.py ADDED
@@ -0,0 +1,135 @@
1
+ import requests
2
+ import pandas as pd
3
+ from sempy.fabric.exceptions import FabricHTTPException
4
+ from sempy._utils._log import log
5
+ import sempy_labs._icons as icons
6
+ from typing import Optional
7
+ from uuid import UUID
8
+ from sempy_labs._kql_databases import _resolve_cluster_uri
9
+ from sempy_labs._helper_functions import resolve_item_id
10
+
11
+
12
+ @log
13
+ def query_kusto(
14
+ query: str,
15
+ kql_database: str | UUID,
16
+ workspace: Optional[str | UUID] = None,
17
+ language: str = "kql",
18
+ ) -> pd.DataFrame:
19
+ """
20
+ Runs a KQL query against a KQL database.
21
+
22
+ Parameters
23
+ ----------
24
+ query : str
25
+ The query (supports KQL or SQL - make sure to specify the language parameter accordingly).
26
+ kql_database : str | uuid.UUID
27
+ The KQL database name or ID.
28
+ workspace : str | uuid.UUID, default=None
29
+ The Fabric workspace name or ID.
30
+ Defaults to None which resolves to the workspace of the attached lakehouse
31
+ or if no lakehouse attached, resolves to the workspace of the notebook.
32
+ language : str, default="kql"
33
+ The language of the query. Currently "kql' and "sql" are supported.
34
+
35
+ Returns
36
+ -------
37
+ pandas.DataFrame
38
+ A pandas dataframe showing the result of the KQL query.
39
+ """
40
+
41
+ import notebookutils
42
+
43
+ language = language.lower()
44
+ if language not in ["kql", "sql"]:
45
+ raise ValueError(
46
+ f"{icons._red_dot} Invalid language '{language}'. Only 'kql' and 'sql' are supported."
47
+ )
48
+
49
+ cluster_uri = _resolve_cluster_uri(kql_database=kql_database, workspace=workspace)
50
+ token = notebookutils.credentials.getToken(cluster_uri)
51
+
52
+ headers = {
53
+ "Authorization": f"Bearer {token}",
54
+ "Content-Type": "application/json",
55
+ "Accept": "application/json",
56
+ }
57
+
58
+ kql_database_id = resolve_item_id(
59
+ item=kql_database, type="KQLDatabase", workspace=workspace
60
+ )
61
+ payload = {"db": kql_database_id, "csl": query}
62
+ if language == "sql":
63
+ payload["properties"] = {"Options": {"query_language": "sql"}}
64
+
65
+ response = requests.post(
66
+ f"{cluster_uri}/v1/rest/query",
67
+ headers=headers,
68
+ json=payload,
69
+ )
70
+
71
+ if response.status_code != 200:
72
+ raise FabricHTTPException(response)
73
+
74
+ results = response.json()
75
+ columns_info = results["Tables"][0]["Columns"]
76
+ rows = results["Tables"][0]["Rows"]
77
+
78
+ df = pd.DataFrame(rows, columns=[col["ColumnName"] for col in columns_info])
79
+
80
+ for col_info in columns_info:
81
+ col_name = col_info["ColumnName"]
82
+ data_type = col_info["DataType"]
83
+
84
+ try:
85
+ if data_type == "DateTime":
86
+ df[col_name] = pd.to_datetime(df[col_name])
87
+ elif data_type in ["Int64", "Int32", "Long"]:
88
+ df[col_name] = (
89
+ pd.to_numeric(df[col_name], errors="coerce")
90
+ .fillna(0)
91
+ .astype("int64")
92
+ )
93
+ elif data_type == "Real" or data_type == "Double":
94
+ df[col_name] = pd.to_numeric(df[col_name], errors="coerce")
95
+ else:
96
+ # Convert any other type to string, change as needed
97
+ df[col_name] = df[col_name].astype(str)
98
+ except Exception as e:
99
+ print(
100
+ f"{icons.yellow_dot} Could not convert column {col_name} to {data_type}, defaulting to string: {str(e)}"
101
+ )
102
+ df[col_name] = df[col_name].astype(str)
103
+
104
+ return df
105
+
106
+
107
+ def query_workspace_monitoring(
108
+ query: str, workspace: Optional[str | UUID] = None, language: str = "kql"
109
+ ) -> pd.DataFrame:
110
+ """
111
+ Runs a query against the Fabric workspace monitoring database. Workspace monitoring must be enabled on the workspace to use this function.
112
+
113
+ Parameters
114
+ ----------
115
+ query : str
116
+ The query (supports KQL or SQL - make sure to specify the language parameter accordingly).
117
+ workspace : str | uuid.UUID, default=None
118
+ The Fabric workspace name or ID.
119
+ Defaults to None which resolves to the workspace of the attached lakehouse
120
+ or if no lakehouse attached, resolves to the workspace of the notebook.
121
+ language : str, default="kql"
122
+ The language of the query. Currently "kql' and "sql" are supported.
123
+
124
+ Returns
125
+ -------
126
+ pandas.DataFrame
127
+ A pandas dataframe showing the result of the query.
128
+ """
129
+
130
+ return query_kusto(
131
+ query=query,
132
+ kql_database="Monitoring KQL database",
133
+ workspace=workspace,
134
+ language=language,
135
+ )
@@ -240,7 +240,11 @@ def list_tables(
240
240
  "Columns": sum(
241
241
  1 for c in t.Columns if str(c.Type) != "RowNumber"
242
242
  ),
243
- "% DB": round((total_size / model_size) * 100, 2),
243
+ "% DB": (
244
+ round((total_size / model_size) * 100, 2)
245
+ if model_size not in (0, None, float("nan"))
246
+ else 0.0
247
+ ),
244
248
  }
245
249
  )
246
250
 
sempy_labs/_vertipaq.py CHANGED
@@ -8,7 +8,6 @@ import datetime
8
8
  import warnings
9
9
  from sempy_labs._helper_functions import (
10
10
  format_dax_object_name,
11
- resolve_lakehouse_name,
12
11
  save_as_delta_table,
13
12
  resolve_workspace_capacity,
14
13
  _get_column_aggregate,
@@ -20,7 +19,6 @@ from sempy_labs._helper_functions import (
20
19
  )
21
20
  from sempy_labs._list_functions import list_relationships, list_tables
22
21
  from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
23
- from sempy_labs.directlake import get_direct_lake_source
24
22
  from typing import Optional
25
23
  from sempy._utils._log import log
26
24
  import sempy_labs._icons as icons
@@ -176,10 +174,12 @@ def vertipaq_analyzer(
176
174
  )
177
175
 
178
176
  artifact_type = None
179
- if is_direct_lake:
180
- artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
181
- get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
182
- )
177
+ lakehouse_workspace_id = None
178
+ lakehouse_name = None
179
+ # if is_direct_lake:
180
+ # artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
181
+ # get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
182
+ # )
183
183
 
184
184
  dfR["Missing Rows"] = 0
185
185
  dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
sempy_labs/_warehouses.py CHANGED
@@ -53,11 +53,11 @@ def create_warehouse(
53
53
  "defaultCollation"
54
54
  ] = "Latin1_General_100_CI_AS_KS_WS_SC_UTF8"
55
55
 
56
- response = _base_api(
56
+ result = _base_api(
57
57
  request=f"/v1/workspaces/{workspace_id}/warehouses",
58
58
  payload=payload,
59
59
  method="post",
60
- lro_return_status_code=True,
60
+ lro_return_json=True,
61
61
  status_codes=[201, 202],
62
62
  )
63
63
 
@@ -65,7 +65,7 @@ def create_warehouse(
65
65
  f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace_name}' workspace."
66
66
  )
67
67
 
68
- return response.get("id")
68
+ return result.get("id")
69
69
 
70
70
 
71
71
  def list_warehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
@@ -38,6 +38,7 @@ from sempy_labs.admin._capacities import (
38
38
  get_capacity_assignment_status,
39
39
  get_capacity_state,
40
40
  list_capacity_users,
41
+ get_refreshables,
41
42
  )
42
43
  from sempy_labs.admin._tenant import (
43
44
  list_tenant_settings,
@@ -80,6 +81,9 @@ from sempy_labs.admin._external_data_share import (
80
81
  from sempy_labs.admin._git import (
81
82
  list_git_connections,
82
83
  )
84
+ from sempy_labs.admin._dataflows import (
85
+ export_dataflow,
86
+ )
83
87
 
84
88
  __all__ = [
85
89
  "list_items",
@@ -133,4 +137,6 @@ __all__ = [
133
137
  "list_capacity_users",
134
138
  "list_user_subscriptions",
135
139
  "list_report_subscriptions",
140
+ "get_refreshables",
141
+ "export_dataflow",
136
142
  ]
@@ -31,7 +31,7 @@ def list_unused_artifacts(workspace: Optional[str | UUID] = None) -> pd.DataFram
31
31
  "Artifact Name": "string",
32
32
  "Artifact Id": "string",
33
33
  "Artifact Type": "string",
34
- "Artifact Size in MB": "int",
34
+ "Artifact Size in MB": "string",
35
35
  "Created Date Time": "datetime",
36
36
  "Last Accessed Date Time": "datetime",
37
37
  }
@@ -47,8 +47,8 @@ def list_unused_artifacts(workspace: Optional[str | UUID] = None) -> pd.DataFram
47
47
  for r in responses:
48
48
  for i in r.get("unusedArtifactEntities", []):
49
49
  new_data = {
50
- "Artifact Name": i.get("artifactId"),
51
- "Artifact Id": i.get("displayName"),
50
+ "Artifact Name": i.get("displayName"),
51
+ "Artifact Id": i.get("artifactId"),
52
52
  "Artifact Type": i.get("artifactType"),
53
53
  "Artifact Size in MB": i.get("artifactSizeInMB"),
54
54
  "Created Date Time": i.get("createdDateTime"),