semantic-link-labs 0.9.9__py3-none-any.whl → 0.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

@@ -8,7 +8,7 @@ from sempy.fabric.exceptions import FabricHTTPException, WorkspaceNotFoundExcept
8
8
  import pandas as pd
9
9
  from functools import wraps
10
10
  import datetime
11
- from typing import Optional, Tuple, List
11
+ from typing import Optional, Tuple, List, Dict
12
12
  from uuid import UUID
13
13
  import sempy_labs._icons as icons
14
14
  from azure.core.credentials import TokenCredential, AccessToken
@@ -663,11 +663,13 @@ def save_as_delta_table(
663
663
  workspace: Optional[str | UUID] = None,
664
664
  ):
665
665
  """
666
- Saves a pandas dataframe as a delta table in a Fabric lakehouse.
666
+ Saves a pandas or spark dataframe as a delta table in a Fabric lakehouse.
667
+
668
+ This function may be executed in either a PySpark or pure Python notebook. If executing in a pure Python notebook, the dataframe must be a pandas dataframe.
667
669
 
668
670
  Parameters
669
671
  ----------
670
- dataframe : pandas.DataFrame
672
+ dataframe : pandas.DataFrame | spark.Dataframe
671
673
  The dataframe to be saved as a delta table.
672
674
  delta_table_name : str
673
675
  The name of the delta table.
@@ -686,19 +688,6 @@ def save_as_delta_table(
686
688
  or if no lakehouse attached, resolves to the workspace of the notebook.
687
689
  """
688
690
 
689
- from pyspark.sql.types import (
690
- StringType,
691
- IntegerType,
692
- FloatType,
693
- DateType,
694
- StructType,
695
- StructField,
696
- BooleanType,
697
- LongType,
698
- DoubleType,
699
- TimestampType,
700
- )
701
-
702
691
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
703
692
  (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
704
693
  lakehouse=lakehouse, workspace=workspace_id
@@ -717,52 +706,101 @@ def save_as_delta_table(
717
706
  f"{icons.red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names."
718
707
  )
719
708
 
720
- spark = _create_spark_session()
709
+ import pyarrow as pa
710
+ from pyspark.sql.types import (
711
+ StringType,
712
+ IntegerType,
713
+ FloatType,
714
+ DateType,
715
+ StructType,
716
+ StructField,
717
+ BooleanType,
718
+ LongType,
719
+ DoubleType,
720
+ TimestampType,
721
+ )
721
722
 
722
- type_mapping = {
723
- "string": StringType(),
724
- "str": StringType(),
725
- "integer": IntegerType(),
726
- "int": IntegerType(),
727
- "float": FloatType(),
728
- "date": DateType(),
729
- "bool": BooleanType(),
730
- "boolean": BooleanType(),
731
- "long": LongType(),
732
- "double": DoubleType(),
733
- "timestamp": TimestampType(),
734
- }
723
+ def get_type_mapping(pure_python):
724
+ common_mapping = {
725
+ "string": ("pa", pa.string(), StringType()),
726
+ "str": ("pa", pa.string(), StringType()),
727
+ "integer": ("pa", pa.int32(), IntegerType()),
728
+ "int": ("pa", pa.int32(), IntegerType()),
729
+ "float": ("pa", pa.float32(), FloatType()),
730
+ "double": ("pa", pa.float64(), DoubleType()),
731
+ "long": ("pa", pa.int64(), LongType()),
732
+ "bool": ("pa", pa.bool_(), BooleanType()),
733
+ "boolean": ("pa", pa.bool_(), BooleanType()),
734
+ "date": ("pa", pa.date32(), DateType()),
735
+ "timestamp": ("pa", pa.timestamp("ms"), TimestampType()),
736
+ }
737
+ return {k: v[1] if pure_python else v[2] for k, v in common_mapping.items()}
735
738
 
736
- if isinstance(dataframe, pd.DataFrame):
737
- dataframe.columns = [col.replace(" ", "_") for col in dataframe.columns]
738
- if schema is None:
739
- spark_df = spark.createDataFrame(dataframe)
739
+ def build_schema(schema_dict, type_mapping, use_arrow=True):
740
+ if use_arrow:
741
+ fields = [
742
+ pa.field(name, type_mapping.get(dtype.lower()))
743
+ for name, dtype in schema_dict.items()
744
+ ]
745
+ return pa.schema(fields)
740
746
  else:
741
- schema_map = StructType(
747
+ return StructType(
742
748
  [
743
- StructField(column_name, type_mapping[data_type], True)
744
- for column_name, data_type in schema.items()
749
+ StructField(name, type_mapping.get(dtype.lower()), True)
750
+ for name, dtype in schema_dict.items()
745
751
  ]
746
752
  )
747
- spark_df = spark.createDataFrame(dataframe, schema_map)
753
+
754
+ # Main logic
755
+ schema_map = None
756
+ if schema is not None:
757
+ use_arrow = _pure_python_notebook()
758
+ type_mapping = get_type_mapping(use_arrow)
759
+ schema_map = build_schema(schema, type_mapping, use_arrow)
760
+
761
+ if isinstance(dataframe, pd.DataFrame):
762
+ dataframe.columns = [col.replace(" ", "_") for col in dataframe.columns]
763
+ if _pure_python_notebook():
764
+ spark_df = dataframe
765
+ else:
766
+ spark = _create_spark_session()
767
+ if schema is None:
768
+ spark_df = spark.createDataFrame(dataframe)
769
+ else:
770
+ spark_df = spark.createDataFrame(dataframe, schema_map)
748
771
  else:
749
772
  for col_name in dataframe.columns:
750
773
  new_name = col_name.replace(" ", "_")
751
774
  dataframe = dataframe.withColumnRenamed(col_name, new_name)
752
775
  spark_df = dataframe
753
776
 
754
- filePath = create_abfss_path(
777
+ file_path = create_abfss_path(
755
778
  lakehouse_id=lakehouse_id,
756
779
  lakehouse_workspace_id=workspace_id,
757
780
  delta_table_name=delta_table_name,
758
781
  )
759
782
 
760
- if merge_schema:
761
- spark_df.write.mode(write_mode).format("delta").option(
762
- "mergeSchema", "true"
763
- ).save(filePath)
783
+ if _pure_python_notebook():
784
+ from deltalake import write_deltalake
785
+
786
+ write_args = {
787
+ "table_or_uri": file_path,
788
+ "data": spark_df,
789
+ "mode": write_mode,
790
+ "schema": schema_map,
791
+ }
792
+
793
+ if merge_schema:
794
+ write_args["schema_mode"] = "merge"
795
+
796
+ write_deltalake(**write_args)
764
797
  else:
765
- spark_df.write.mode(write_mode).format("delta").save(filePath)
798
+ writer = spark_df.write.mode(write_mode).format("delta")
799
+ if merge_schema:
800
+ writer = writer.option("mergeSchema", "true")
801
+
802
+ writer.save(file_path)
803
+
766
804
  print(
767
805
  f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace."
768
806
  )
@@ -1497,32 +1535,82 @@ def generate_guid():
1497
1535
 
1498
1536
  def _get_column_aggregate(
1499
1537
  table_name: str,
1500
- column_name: str = "RunId",
1538
+ column_name: str | List[str] = "RunId",
1501
1539
  lakehouse: Optional[str | UUID] = None,
1502
1540
  workspace: Optional[str | UUID] = None,
1503
1541
  function: str = "max",
1504
1542
  default_value: int = 0,
1505
- ) -> int:
1543
+ ) -> int | Dict[str, int]:
1544
+
1545
+ workspace_id = resolve_workspace_id(workspace)
1546
+ lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
1547
+ path = create_abfss_path(lakehouse_id, workspace_id, table_name)
1548
+ df = _read_delta_table(path)
1549
+
1550
+ if isinstance(column_name, str):
1551
+ result = _get_aggregate(
1552
+ df=df,
1553
+ column_name=column_name,
1554
+ function=function,
1555
+ default_value=default_value,
1556
+ )
1557
+ elif isinstance(column_name, list):
1558
+ result = {}
1559
+ for col in column_name:
1560
+ result[col] = _get_aggregate(
1561
+ df=df,
1562
+ column_name=col,
1563
+ function=function,
1564
+ default_value=default_value,
1565
+ )
1566
+ else:
1567
+ raise TypeError("column_name must be a string or a list of strings.")
1568
+
1569
+ return result
1570
+
1506
1571
 
1507
- from pyspark.sql.functions import approx_count_distinct
1508
- from pyspark.sql import functions as F
1572
+ def _get_aggregate(df, column_name, function, default_value: int = 0) -> int:
1509
1573
 
1510
1574
  function = function.upper()
1511
- (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
1512
- lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
1513
- path = create_abfss_path(lakehouse_id, workspace_id, table_name)
1514
1575
 
1515
- spark = _create_spark_session()
1516
- df = spark.read.format("delta").load(path)
1576
+ if _pure_python_notebook():
1577
+ import polars as pl
1578
+
1579
+ if not isinstance(df, pd.DataFrame):
1580
+ df.to_pandas()
1517
1581
 
1518
- if function in {"COUNTDISTINCT", "DISTINCTCOUNT"}:
1519
- result = df.select(F.count_distinct(F.col(column_name)))
1520
- elif "APPROX" in function:
1521
- result = df.select(approx_count_distinct(column_name))
1582
+ df = pl.from_pandas(df)
1583
+
1584
+ # Perform aggregation
1585
+ if "DISTINCT" in function:
1586
+ if isinstance(df[column_name].dtype, pl.Decimal):
1587
+ result = df[column_name].cast(pl.Float64).n_unique()
1588
+ else:
1589
+ result = df[column_name].n_unique()
1590
+ elif "APPROX" in function:
1591
+ result = df[column_name].unique().shape[0]
1592
+ else:
1593
+ try:
1594
+ result = getattr(df[column_name], function.lower())()
1595
+ except AttributeError:
1596
+ raise ValueError(f"Unsupported function: {function}")
1597
+
1598
+ return result if result is not None else default_value
1522
1599
  else:
1523
- result = df.selectExpr(f"{function}({column_name})")
1600
+ from pyspark.sql.functions import approx_count_distinct
1601
+ from pyspark.sql import functions as F
1602
+
1603
+ if isinstance(df, pd.DataFrame):
1604
+ df = _create_spark_dataframe(df)
1524
1605
 
1525
- return result.collect()[0][0] or default_value
1606
+ if "DISTINCT" in function:
1607
+ result = df.select(F.count_distinct(F.col(column_name)))
1608
+ elif "APPROX" in function:
1609
+ result = df.select(approx_count_distinct(column_name))
1610
+ else:
1611
+ result = df.selectExpr(f"{function}({column_name})")
1612
+
1613
+ return result.collect()[0][0] or default_value
1526
1614
 
1527
1615
 
1528
1616
  def _make_list_unique(my_list):
@@ -1687,6 +1775,7 @@ def _convert_data_type(input_data_type: str) -> str:
1687
1775
  "double": "Double",
1688
1776
  "float": "Double",
1689
1777
  "binary": "Boolean",
1778
+ "long": "Int64",
1690
1779
  }
1691
1780
 
1692
1781
  if "decimal" in input_data_type:
@@ -1842,6 +1931,18 @@ def _update_dataframe_datatypes(dataframe: pd.DataFrame, column_map: dict):
1842
1931
  dataframe[column] = dataframe[column].fillna(0).astype(int)
1843
1932
  elif data_type in ["str", "string"]:
1844
1933
  dataframe[column] = dataframe[column].astype(str)
1934
+ # Avoid having empty lists or lists with a value of None.
1935
+ elif data_type in ["list"]:
1936
+ dataframe[column] = dataframe[column].apply(
1937
+ lambda x: (
1938
+ None
1939
+ if (type(x) == list and len(x) == 1 and x[0] == None)
1940
+ or (type(x) == list and len(x) == 0)
1941
+ else x
1942
+ )
1943
+ )
1944
+ elif data_type in ["dict"]:
1945
+ dataframe[column] = dataframe[column]
1845
1946
  else:
1846
1947
  raise NotImplementedError
1847
1948
 
@@ -1878,18 +1979,58 @@ def _create_spark_session():
1878
1979
  return SparkSession.builder.getOrCreate()
1879
1980
 
1880
1981
 
1881
- def _read_delta_table(path: str):
1982
+ def _get_delta_table(path: str) -> str:
1983
+
1984
+ from delta import DeltaTable
1882
1985
 
1883
1986
  spark = _create_spark_session()
1884
1987
 
1885
- return spark.read.format("delta").load(path)
1988
+ return DeltaTable.forPath(spark, path)
1886
1989
 
1887
1990
 
1888
- def _delta_table_row_count(table_name: str) -> int:
1991
+ def _read_delta_table(path: str, to_pandas: bool = True, to_df: bool = False):
1889
1992
 
1890
- spark = _create_spark_session()
1993
+ if _pure_python_notebook():
1994
+ from deltalake import DeltaTable
1891
1995
 
1892
- return spark.table(table_name).count()
1996
+ df = DeltaTable(table_uri=path)
1997
+ if to_pandas:
1998
+ df = df.to_pandas()
1999
+ else:
2000
+ spark = _create_spark_session()
2001
+ df = spark.read.format("delta").load(path)
2002
+ if to_df:
2003
+ df = df.toDF()
2004
+
2005
+ return df
2006
+
2007
+
2008
+ def _read_delta_table_history(path) -> pd.DataFrame:
2009
+
2010
+ if _pure_python_notebook():
2011
+ from deltalake import DeltaTable
2012
+
2013
+ df = pd.DataFrame(DeltaTable(table_uri=path).history())
2014
+ else:
2015
+ from delta import DeltaTable
2016
+
2017
+ spark = _create_spark_session()
2018
+ delta_table = DeltaTable.forPath(spark, path)
2019
+ df = delta_table.history().toPandas()
2020
+
2021
+ return df
2022
+
2023
+
2024
+ def _delta_table_row_count(path: str) -> int:
2025
+
2026
+ if _pure_python_notebook():
2027
+ from deltalake import DeltaTable
2028
+
2029
+ dt = DeltaTable(path)
2030
+ arrow_table = dt.to_pyarrow_table()
2031
+ return arrow_table.num_rows
2032
+ else:
2033
+ return _read_delta_table(path).count()
1893
2034
 
1894
2035
 
1895
2036
  def _run_spark_sql_query(query):
@@ -6,6 +6,8 @@ from sempy_labs._helper_functions import (
6
6
  _create_dataframe,
7
7
  delete_item,
8
8
  create_item,
9
+ resolve_item_id,
10
+ resolve_workspace_id,
9
11
  )
10
12
  from uuid import UUID
11
13
  import sempy_labs._icons as icons
@@ -121,3 +123,19 @@ def delete_kql_database(
121
123
  )
122
124
 
123
125
  delete_item(item=kql_database, type="KQLDatabase", workspace=workspace)
126
+
127
+
128
+ def _resolve_cluster_uri(
129
+ kql_database: str | UUID, workspace: Optional[str | UUID] = None
130
+ ) -> str:
131
+
132
+ workspace_id = resolve_workspace_id(workspace=workspace)
133
+ item_id = resolve_item_id(
134
+ item=kql_database, type="KQLDatabase", workspace=workspace
135
+ )
136
+ response = _base_api(
137
+ request=f"/v1/workspaces/{workspace_id}/kqlDatabases/{item_id}",
138
+ client="fabric_sp",
139
+ )
140
+
141
+ return response.json().get("properties", {}).get("queryServiceUri")
sempy_labs/_kusto.py ADDED
@@ -0,0 +1,135 @@
1
+ import requests
2
+ import pandas as pd
3
+ from sempy.fabric.exceptions import FabricHTTPException
4
+ from sempy._utils._log import log
5
+ import sempy_labs._icons as icons
6
+ from typing import Optional
7
+ from uuid import UUID
8
+ from sempy_labs._kql_databases import _resolve_cluster_uri
9
+ from sempy_labs._helper_functions import resolve_item_id
10
+
11
+
12
+ @log
13
+ def query_kusto(
14
+ query: str,
15
+ kql_database: str | UUID,
16
+ workspace: Optional[str | UUID] = None,
17
+ language: str = "kql",
18
+ ) -> pd.DataFrame:
19
+ """
20
+ Runs a KQL query against a KQL database.
21
+
22
+ Parameters
23
+ ----------
24
+ query : str
25
+ The query (supports KQL or SQL - make sure to specify the language parameter accordingly).
26
+ kql_database : str | uuid.UUID
27
+ The KQL database name or ID.
28
+ workspace : str | uuid.UUID, default=None
29
+ The Fabric workspace name or ID.
30
+ Defaults to None which resolves to the workspace of the attached lakehouse
31
+ or if no lakehouse attached, resolves to the workspace of the notebook.
32
+ language : str, default="kql"
33
+ The language of the query. Currently "kql' and "sql" are supported.
34
+
35
+ Returns
36
+ -------
37
+ pandas.DataFrame
38
+ A pandas dataframe showing the result of the KQL query.
39
+ """
40
+
41
+ import notebookutils
42
+
43
+ language = language.lower()
44
+ if language not in ["kql", "sql"]:
45
+ raise ValueError(
46
+ f"{icons._red_dot} Invalid language '{language}'. Only 'kql' and 'sql' are supported."
47
+ )
48
+
49
+ cluster_uri = _resolve_cluster_uri(kql_database=kql_database, workspace=workspace)
50
+ token = notebookutils.credentials.getToken(cluster_uri)
51
+
52
+ headers = {
53
+ "Authorization": f"Bearer {token}",
54
+ "Content-Type": "application/json",
55
+ "Accept": "application/json",
56
+ }
57
+
58
+ kql_database_id = resolve_item_id(
59
+ item=kql_database, type="KQLDatabase", workspace=workspace
60
+ )
61
+ payload = {"db": kql_database_id, "csl": query}
62
+ if language == "sql":
63
+ payload["properties"] = {"Options": {"query_language": "sql"}}
64
+
65
+ response = requests.post(
66
+ f"{cluster_uri}/v1/rest/query",
67
+ headers=headers,
68
+ json=payload,
69
+ )
70
+
71
+ if response.status_code != 200:
72
+ raise FabricHTTPException(response)
73
+
74
+ results = response.json()
75
+ columns_info = results["Tables"][0]["Columns"]
76
+ rows = results["Tables"][0]["Rows"]
77
+
78
+ df = pd.DataFrame(rows, columns=[col["ColumnName"] for col in columns_info])
79
+
80
+ for col_info in columns_info:
81
+ col_name = col_info["ColumnName"]
82
+ data_type = col_info["DataType"]
83
+
84
+ try:
85
+ if data_type == "DateTime":
86
+ df[col_name] = pd.to_datetime(df[col_name])
87
+ elif data_type in ["Int64", "Int32", "Long"]:
88
+ df[col_name] = (
89
+ pd.to_numeric(df[col_name], errors="coerce")
90
+ .fillna(0)
91
+ .astype("int64")
92
+ )
93
+ elif data_type == "Real" or data_type == "Double":
94
+ df[col_name] = pd.to_numeric(df[col_name], errors="coerce")
95
+ else:
96
+ # Convert any other type to string, change as needed
97
+ df[col_name] = df[col_name].astype(str)
98
+ except Exception as e:
99
+ print(
100
+ f"{icons.yellow_dot} Could not convert column {col_name} to {data_type}, defaulting to string: {str(e)}"
101
+ )
102
+ df[col_name] = df[col_name].astype(str)
103
+
104
+ return df
105
+
106
+
107
+ def query_workspace_monitoring(
108
+ query: str, workspace: Optional[str | UUID] = None, language: str = "kql"
109
+ ) -> pd.DataFrame:
110
+ """
111
+ Runs a query against the Fabric workspace monitoring database. Workspace monitoring must be enabled on the workspace to use this function.
112
+
113
+ Parameters
114
+ ----------
115
+ query : str
116
+ The query (supports KQL or SQL - make sure to specify the language parameter accordingly).
117
+ workspace : str | uuid.UUID, default=None
118
+ The Fabric workspace name or ID.
119
+ Defaults to None which resolves to the workspace of the attached lakehouse
120
+ or if no lakehouse attached, resolves to the workspace of the notebook.
121
+ language : str, default="kql"
122
+ The language of the query. Currently "kql' and "sql" are supported.
123
+
124
+ Returns
125
+ -------
126
+ pandas.DataFrame
127
+ A pandas dataframe showing the result of the query.
128
+ """
129
+
130
+ return query_kusto(
131
+ query=query,
132
+ kql_database="Monitoring KQL database",
133
+ workspace=workspace,
134
+ language=language,
135
+ )
@@ -240,7 +240,11 @@ def list_tables(
240
240
  "Columns": sum(
241
241
  1 for c in t.Columns if str(c.Type) != "RowNumber"
242
242
  ),
243
- "% DB": round((total_size / model_size) * 100, 2),
243
+ "% DB": (
244
+ round((total_size / model_size) * 100, 2)
245
+ if model_size not in (0, None, float("nan"))
246
+ else 0.0
247
+ ),
244
248
  }
245
249
  )
246
250
 
sempy_labs/_vertipaq.py CHANGED
@@ -8,7 +8,6 @@ import datetime
8
8
  import warnings
9
9
  from sempy_labs._helper_functions import (
10
10
  format_dax_object_name,
11
- resolve_lakehouse_name,
12
11
  save_as_delta_table,
13
12
  resolve_workspace_capacity,
14
13
  _get_column_aggregate,
@@ -20,7 +19,6 @@ from sempy_labs._helper_functions import (
20
19
  )
21
20
  from sempy_labs._list_functions import list_relationships, list_tables
22
21
  from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
23
- from sempy_labs.directlake import get_direct_lake_source
24
22
  from typing import Optional
25
23
  from sempy._utils._log import log
26
24
  import sempy_labs._icons as icons
@@ -176,10 +174,12 @@ def vertipaq_analyzer(
176
174
  )
177
175
 
178
176
  artifact_type = None
179
- if is_direct_lake:
180
- artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
181
- get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
182
- )
177
+ lakehouse_workspace_id = None
178
+ lakehouse_name = None
179
+ # if is_direct_lake:
180
+ # artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
181
+ # get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
182
+ # )
183
183
 
184
184
  dfR["Missing Rows"] = 0
185
185
  dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
sempy_labs/_warehouses.py CHANGED
@@ -53,11 +53,11 @@ def create_warehouse(
53
53
  "defaultCollation"
54
54
  ] = "Latin1_General_100_CI_AS_KS_WS_SC_UTF8"
55
55
 
56
- response = _base_api(
56
+ result = _base_api(
57
57
  request=f"/v1/workspaces/{workspace_id}/warehouses",
58
58
  payload=payload,
59
59
  method="post",
60
- lro_return_status_code=True,
60
+ lro_return_json=True,
61
61
  status_codes=[201, 202],
62
62
  )
63
63
 
@@ -65,7 +65,7 @@ def create_warehouse(
65
65
  f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace_name}' workspace."
66
66
  )
67
67
 
68
- return response.get("id")
68
+ return result.get("id")
69
69
 
70
70
 
71
71
  def list_warehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
@@ -38,6 +38,7 @@ from sempy_labs.admin._capacities import (
38
38
  get_capacity_assignment_status,
39
39
  get_capacity_state,
40
40
  list_capacity_users,
41
+ get_refreshables,
41
42
  )
42
43
  from sempy_labs.admin._tenant import (
43
44
  list_tenant_settings,
@@ -80,6 +81,9 @@ from sempy_labs.admin._external_data_share import (
80
81
  from sempy_labs.admin._git import (
81
82
  list_git_connections,
82
83
  )
84
+ from sempy_labs.admin._dataflows import (
85
+ export_dataflow,
86
+ )
83
87
 
84
88
  __all__ = [
85
89
  "list_items",
@@ -133,4 +137,6 @@ __all__ = [
133
137
  "list_capacity_users",
134
138
  "list_user_subscriptions",
135
139
  "list_report_subscriptions",
140
+ "get_refreshables",
141
+ "export_dataflow",
136
142
  ]
@@ -31,7 +31,7 @@ def list_unused_artifacts(workspace: Optional[str | UUID] = None) -> pd.DataFram
31
31
  "Artifact Name": "string",
32
32
  "Artifact Id": "string",
33
33
  "Artifact Type": "string",
34
- "Artifact Size in MB": "int",
34
+ "Artifact Size in MB": "string",
35
35
  "Created Date Time": "datetime",
36
36
  "Last Accessed Date Time": "datetime",
37
37
  }
@@ -47,8 +47,8 @@ def list_unused_artifacts(workspace: Optional[str | UUID] = None) -> pd.DataFram
47
47
  for r in responses:
48
48
  for i in r.get("unusedArtifactEntities", []):
49
49
  new_data = {
50
- "Artifact Name": i.get("artifactId"),
51
- "Artifact Id": i.get("displayName"),
50
+ "Artifact Name": i.get("displayName"),
51
+ "Artifact Id": i.get("artifactId"),
52
52
  "Artifact Type": i.get("artifactType"),
53
53
  "Artifact Size in MB": i.get("artifactSizeInMB"),
54
54
  "Created Date Time": i.get("createdDateTime"),