semantic-link-labs 0.9.2__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

sempy_labs/_gateways.py CHANGED
@@ -21,6 +21,8 @@ def list_gateways() -> pd.DataFrame:
21
21
 
22
22
  This is a wrapper function for the following API: `Gateways - List Gateways <https://learn.microsoft.com/rest/api/fabric/core/gateways/list-gateways>`_.
23
23
 
24
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
25
+
24
26
  Returns
25
27
  -------
26
28
  pandas.DataFrame
@@ -41,7 +43,9 @@ def list_gateways() -> pd.DataFrame:
41
43
  }
42
44
  df = _create_dataframe(columns=columns)
43
45
 
44
- responses = _base_api(request="/v1/gateways", uses_pagination=True)
46
+ responses = _base_api(
47
+ request="/v1/gateways", client="fabric_sp", uses_pagination=True
48
+ )
45
49
 
46
50
  for r in responses:
47
51
  for v in r.get("value", []):
@@ -85,6 +89,8 @@ def delete_gateway(gateway: str | UUID):
85
89
 
86
90
  This is a wrapper function for the following API: `Gateways - Delete Gateway <https://learn.microsoft.com/rest/api/fabric/core/gateways/delete-gateway>`_.
87
91
 
92
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
93
+
88
94
  Parameters
89
95
  ----------
90
96
  gateway : str | uuid.UUID
@@ -92,7 +98,7 @@ def delete_gateway(gateway: str | UUID):
92
98
  """
93
99
 
94
100
  gateway_id = _resolve_gateway_id(gateway)
95
- _base_api(request=f"/v1/gateways/{gateway_id}", method="delete")
101
+ _base_api(request=f"/v1/gateways/{gateway_id}", client="fabric_sp", method="delete")
96
102
  print(f"{icons.green_dot} The '{gateway}' gateway has been deleted.")
97
103
 
98
104
 
@@ -102,6 +108,8 @@ def list_gateway_role_assigments(gateway: str | UUID) -> pd.DataFrame:
102
108
 
103
109
  This is a wrapper function for the following API: `Gateways - List Gateway Role Assignments <https://learn.microsoft.com/rest/api/fabric/core/gateways/list-gateway-role-assignments>`_.
104
110
 
111
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
112
+
105
113
  Parameters
106
114
  ----------
107
115
  gateway : str | uuid.UUID
@@ -122,7 +130,9 @@ def list_gateway_role_assigments(gateway: str | UUID) -> pd.DataFrame:
122
130
  df = _create_dataframe(columns=columns)
123
131
  gateway_id = _resolve_gateway_id(gateway)
124
132
  responses = _base_api(
125
- request=f"/v1/gateways/{gateway_id}/roleAssignments", uses_pagination=True
133
+ request=f"/v1/gateways/{gateway_id}/roleAssignments",
134
+ client="fabric_sp",
135
+ uses_pagination=True,
126
136
  )
127
137
 
128
138
  for r in responses:
@@ -145,6 +155,8 @@ def delete_gateway_role_assignment(gateway: str | UUID, role_assignment_id: UUID
145
155
 
146
156
  This is a wrapper function for the following API: `Gateways - Delete Gateway Role Assignment <https://learn.microsoft.com/rest/api/fabric/core/gateways/delete-gateway-role-assignment>`_.
147
157
 
158
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
159
+
148
160
  Parameters
149
161
  ----------
150
162
  gateway : str | uuid.UUID
@@ -156,6 +168,7 @@ def delete_gateway_role_assignment(gateway: str | UUID, role_assignment_id: UUID
156
168
  gateway_id = _resolve_gateway_id(gateway)
157
169
  _base_api(
158
170
  request=f"/v1/gateways/{gateway_id}/roleAssignments/{role_assignment_id}",
171
+ client="fabric_sp",
159
172
  method="delete",
160
173
  )
161
174
 
@@ -187,6 +200,8 @@ def delete_gateway_member(gateway: str | UUID, gateway_member: str | UUID):
187
200
 
188
201
  This is a wrapper function for the following API: `Gateways - Delete Gateway Member <https://learn.microsoft.com/rest/api/fabric/core/gateways/delete-gateway-member>`_.
189
202
 
203
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
204
+
190
205
  Parameters
191
206
  ----------
192
207
  gateway : str | uuid.UUID
@@ -200,7 +215,11 @@ def delete_gateway_member(gateway: str | UUID, gateway_member: str | UUID):
200
215
  gateway=gateway_id, gateway_member=gateway_member
201
216
  )
202
217
 
203
- _base_api(request=f"/v1/gateways/{gateway_id}/members/{member_id}", method="delete")
218
+ _base_api(
219
+ request=f"/v1/gateways/{gateway_id}/members/{member_id}",
220
+ client="fabric_sp",
221
+ method="delete",
222
+ )
204
223
  print(
205
224
  f"{icons.green_dot} The '{member_id}' member for the '{gateway}' gateway has been deleted."
206
225
  )
@@ -212,6 +231,8 @@ def list_gateway_members(gateway: str | UUID) -> pd.DataFrame:
212
231
 
213
232
  This is a wrapper function for the following API: `Gateways - List Gateway Members <https://learn.microsoft.com/rest/api/fabric/core/gateways/list-gateway-members>`_.
214
233
 
234
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
235
+
215
236
  Parameters
216
237
  ----------
217
238
  gateway : str | uuid.UUID
@@ -235,7 +256,9 @@ def list_gateway_members(gateway: str | UUID) -> pd.DataFrame:
235
256
  }
236
257
  df = _create_dataframe(columns=columns)
237
258
 
238
- response = _base_api(request=f"/v1/gateways/{gateway_id}/members")
259
+ response = _base_api(
260
+ request=f"/v1/gateways/{gateway_id}/members", client="fabric_sp"
261
+ )
239
262
 
240
263
  for v in response.json().get("value", []):
241
264
  new_data = {
@@ -269,6 +292,8 @@ def create_vnet_gateway(
269
292
 
270
293
  This is a wrapper function for the following API: `Gateways - Create Gateway <https://learn.microsoft.com/rest/api/fabric/core/gateways/create-gateway>`_.
271
294
 
295
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
296
+
272
297
  Parameters
273
298
  ----------
274
299
  name : str
@@ -304,7 +329,13 @@ def create_vnet_gateway(
304
329
  "numberOfMemberGateways": number_of_member_gateways,
305
330
  }
306
331
 
307
- _base_api(request="/v1/gateways", method="post", payload=payload, status_codes=201)
332
+ _base_api(
333
+ request="/v1/gateways",
334
+ client="fabric_sp",
335
+ method="post",
336
+ payload=payload,
337
+ status_codes=201,
338
+ )
308
339
 
309
340
  print(
310
341
  f"{icons.green_dot} The '{name}' gateway was created within the '{capacity}' capacity."
@@ -322,6 +353,8 @@ def update_on_premises_gateway(
322
353
 
323
354
  This is a wrapper function for the following API: `Gateways - Update Gateway <https://learn.microsoft.com/rest/api/fabric/core/gateways/update-gateway>`_.
324
355
 
356
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
357
+
325
358
  Parameters
326
359
  ----------
327
360
  gateway : str | uuid.UUID
@@ -352,7 +385,12 @@ def update_on_premises_gateway(
352
385
 
353
386
  payload["type"] = "OnPremises"
354
387
 
355
- _base_api(request=f"/v1/gateways/{gateway_id}", method="patch", payload=payload)
388
+ _base_api(
389
+ request=f"/v1/gateways/{gateway_id}",
390
+ client="fabric_sp",
391
+ method="patch",
392
+ payload=payload,
393
+ )
356
394
 
357
395
  print(f"{icons.green_dot} The '{gateway}' has been updated accordingly.")
358
396
 
@@ -368,6 +406,8 @@ def update_vnet_gateway(
368
406
 
369
407
  This is a wrapper function for the following API: `Gateways - Update Gateway <https://learn.microsoft.com/rest/api/fabric/core/gateways/update-gateway>`_.
370
408
 
409
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
410
+
371
411
  Parameters
372
412
  ----------
373
413
  gateway : str | uuid.UUID
@@ -399,7 +439,12 @@ def update_vnet_gateway(
399
439
 
400
440
  payload["type"] = "VirtualNetwork"
401
441
 
402
- _base_api(request=f"/v1/gateways/{gateway_id}", method="patch", payload=payload)
442
+ _base_api(
443
+ request=f"/v1/gateways/{gateway_id}",
444
+ client="fabric_sp",
445
+ method="patch",
446
+ payload=payload,
447
+ )
403
448
  print(f"{icons.green_dot} The '{gateway}' has been updated accordingly.")
404
449
 
405
450
 
@@ -411,6 +456,8 @@ def bind_semantic_model_to_gateway(
411
456
 
412
457
  This is a wrapper function for the following API: `Datasets - Bind To Gateway In Group <https://learn.microsoft.com/rest/api/power-bi/datasets/bind-to-gateway-in-group>`_.
413
458
 
459
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
460
+
414
461
  Parameters
415
462
  ----------
416
463
  dataset : str | uuid.UUID
@@ -435,6 +482,7 @@ def bind_semantic_model_to_gateway(
435
482
 
436
483
  _base_api(
437
484
  request=f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/Default.BindToGateway",
485
+ client="fabric_sp",
438
486
  method="post",
439
487
  payload=payload,
440
488
  )
@@ -31,7 +31,9 @@ def _build_url(url: str, params: dict) -> str:
31
31
 
32
32
 
33
33
  def create_abfss_path(
34
- lakehouse_id: UUID, lakehouse_workspace_id: UUID, delta_table_name: str
34
+ lakehouse_id: UUID,
35
+ lakehouse_workspace_id: UUID,
36
+ delta_table_name: Optional[str] = None,
35
37
  ) -> str:
36
38
  """
37
39
  Creates an abfss path for a delta table in a Fabric lakehouse.
@@ -42,18 +44,22 @@ def create_abfss_path(
42
44
  ID of the Fabric lakehouse.
43
45
  lakehouse_workspace_id : uuid.UUID
44
46
  ID of the Fabric workspace.
45
- delta_table_name : str
47
+ delta_table_name : str, default=None
46
48
  Name of the delta table name.
47
49
 
48
50
  Returns
49
51
  -------
50
52
  str
51
- An abfss path which can be used to save/reference a delta table in a Fabric lakehouse.
53
+ An abfss path which can be used to save/reference a delta table in a Fabric lakehouse or lakehouse.
52
54
  """
53
55
 
54
56
  fp = _get_default_file_path()
57
+ path = f"abfss://{lakehouse_workspace_id}@{fp}/{lakehouse_id}"
58
+
59
+ if delta_table_name is not None:
60
+ path += f"/Tables/{delta_table_name}"
55
61
 
56
- return f"abfss://{lakehouse_workspace_id}@{fp}/{lakehouse_id}/Tables/{delta_table_name}"
62
+ return path
57
63
 
58
64
 
59
65
  def _get_default_file_path() -> str:
@@ -538,8 +544,9 @@ def save_as_delta_table(
538
544
  f"{icons.red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names."
539
545
  )
540
546
 
541
- dataframe.columns = dataframe.columns.str.replace(" ", "_")
542
- spark = SparkSession.builder.getOrCreate()
547
+ dataframe.columns = [col.replace(" ", "_") for col in dataframe.columns]
548
+
549
+ spark = _create_spark_session()
543
550
 
544
551
  type_mapping = {
545
552
  "string": StringType(),
@@ -1248,7 +1255,6 @@ def _get_column_aggregate(
1248
1255
  default_value: int = 0,
1249
1256
  ) -> int:
1250
1257
 
1251
- from pyspark.sql import SparkSession
1252
1258
  from pyspark.sql.functions import approx_count_distinct
1253
1259
  from pyspark.sql import functions as F
1254
1260
 
@@ -1257,7 +1263,7 @@ def _get_column_aggregate(
1257
1263
  lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
1258
1264
  path = create_abfss_path(lakehouse_id, workspace_id, table_name)
1259
1265
 
1260
- spark = SparkSession.builder.getOrCreate()
1266
+ spark = _create_spark_session()
1261
1267
  df = spark.read.format("delta").load(path)
1262
1268
 
1263
1269
  if function in {"COUNTDISTINCT", "DISTINCTCOUNT"}:
@@ -1591,3 +1597,43 @@ def _print_success(item_name, item_type, workspace_name, action="created"):
1591
1597
  )
1592
1598
  else:
1593
1599
  raise NotImplementedError
1600
+
1601
+
1602
+ def _pure_python_notebook() -> bool:
1603
+
1604
+ from sempy.fabric._environment import _on_jupyter
1605
+
1606
+ return _on_jupyter()
1607
+
1608
+
1609
+ def _create_spark_session():
1610
+
1611
+ if _pure_python_notebook():
1612
+ raise ValueError(
1613
+ f"{icons.red_dot} This function is only available in a PySpark notebook."
1614
+ )
1615
+
1616
+ from pyspark.sql import SparkSession
1617
+
1618
+ return SparkSession.builder.getOrCreate()
1619
+
1620
+
1621
+ def _read_delta_table(path: str):
1622
+
1623
+ spark = _create_spark_session()
1624
+
1625
+ return spark.read.format("delta").load(path)
1626
+
1627
+
1628
+ def _delta_table_row_count(table_name: str) -> int:
1629
+
1630
+ spark = _create_spark_session()
1631
+
1632
+ return spark.table(table_name).count()
1633
+
1634
+
1635
+ def _run_spark_sql_query(query):
1636
+
1637
+ spark = _create_spark_session()
1638
+
1639
+ return spark.sql(query)
@@ -9,6 +9,7 @@ from sempy_labs._helper_functions import (
9
9
  _update_dataframe_datatypes,
10
10
  _base_api,
11
11
  _create_dataframe,
12
+ _run_spark_sql_query,
12
13
  )
13
14
  from sempy._utils._log import log
14
15
  import pandas as pd
@@ -584,14 +585,12 @@ def list_columns(
584
585
  query = f"{query} FROM {lakehouse}.{lakeTName}"
585
586
  sql_statements.append((table_name, query))
586
587
 
587
- spark = SparkSession.builder.getOrCreate()
588
-
589
588
  for o in sql_statements:
590
589
  tName = o[0]
591
590
  query = o[1]
592
591
 
593
592
  # Run the query
594
- df = spark.sql(query)
593
+ df = _run_spark_sql_query(query)
595
594
 
596
595
  for column in df.columns:
597
596
  x = df.collect()[0][column]
@@ -91,7 +91,7 @@ def create_ml_experiment(
91
91
  _base_api(
92
92
  request=f"/v1/workspaces/{workspace_id}/mlExperiments",
93
93
  method="post",
94
- json=payload,
94
+ payload=payload,
95
95
  status_codes=[201, 202],
96
96
  lro_return_status_code=True,
97
97
  )
sempy_labs/_model_bpa.py CHANGED
@@ -14,6 +14,7 @@ from sempy_labs._helper_functions import (
14
14
  get_language_codes,
15
15
  _get_column_aggregate,
16
16
  resolve_workspace_name_and_id,
17
+ _create_spark_session,
17
18
  )
18
19
  from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
19
20
  from sempy_labs.tom import connect_semantic_model
@@ -181,7 +182,6 @@ def run_model_bpa(
181
182
  def translate_using_spark(rule_file):
182
183
 
183
184
  from synapse.ml.services import Translate
184
- from pyspark.sql import SparkSession
185
185
 
186
186
  rules_temp = rule_file.copy()
187
187
  rules_temp = rules_temp.drop(["Expression", "URL", "Severity"], axis=1)
@@ -195,7 +195,7 @@ def run_model_bpa(
195
195
  ]
196
196
  )
197
197
 
198
- spark = SparkSession.builder.getOrCreate()
198
+ spark = _create_spark_session()
199
199
  dfRules = spark.createDataFrame(rules_temp, schema)
200
200
 
201
201
  columns = ["Category", "Rule Name", "Description"]
@@ -416,7 +416,7 @@ def model_bpa_rules(
416
416
  lambda obj, tom: any(
417
417
  re.search(
418
418
  r"USERELATIONSHIP\s*\(\s*.+?(?=])\]\s*,\s*'*"
419
- + obj.Name
419
+ + re.escape(obj.Name)
420
420
  + r"'*\[",
421
421
  m.Expression,
422
422
  flags=re.IGNORECASE,
@@ -455,7 +455,9 @@ def model_bpa_rules(
455
455
  "Warning",
456
456
  "The EVALUATEANDLOG function should not be used in production models",
457
457
  lambda obj, tom: re.search(
458
- r"evaluateandlog\s*\(", obj.Expression, flags=re.IGNORECASE
458
+ r"evaluateandlog\s*\(",
459
+ obj.Expression,
460
+ flags=re.IGNORECASE,
459
461
  ),
460
462
  "The EVALUATEANDLOG function is meant to be used only in development/test environments and should not be used in production models.",
461
463
  "https://pbidax.wordpress.com/2022/08/16/introduce-the-dax-evaluateandlog-function",
@@ -592,13 +594,13 @@ def model_bpa_rules(
592
594
  and not any(
593
595
  re.search(
594
596
  r"USERELATIONSHIP\s*\(\s*\'*"
595
- + obj.FromTable.Name
597
+ + re.escape(obj.FromTable.Name)
596
598
  + r"'*\["
597
- + obj.FromColumn.Name
599
+ + re.escape(obj.FromColumn.Name)
598
600
  + r"\]\s*,\s*'*"
599
- + obj.ToTable.Name
601
+ + re.escape(obj.ToTable.Name)
600
602
  + r"'*\["
601
- + obj.ToColumn.Name
603
+ + re.escape(obj.ToColumn.Name)
602
604
  + r"\]",
603
605
  m.Expression,
604
606
  flags=re.IGNORECASE,
@@ -5,6 +5,7 @@ from sempy._utils._log import log
5
5
  import sempy_labs._icons as icons
6
6
  from sempy_labs._helper_functions import (
7
7
  get_language_codes,
8
+ _create_spark_session,
8
9
  )
9
10
  from uuid import UUID
10
11
 
@@ -40,7 +41,6 @@ def translate_semantic_model(
40
41
 
41
42
  from synapse.ml.services import Translate
42
43
  from pyspark.sql.functions import col, flatten
43
- from pyspark.sql import SparkSession
44
44
  from sempy_labs.tom import connect_semantic_model
45
45
 
46
46
  icons.sll_tags.append("TranslateSemanticModel")
@@ -145,7 +145,7 @@ def translate_semantic_model(
145
145
  [df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
146
146
  )
147
147
 
148
- spark = SparkSession.builder.getOrCreate()
148
+ spark = _create_spark_session()
149
149
  df = spark.createDataFrame(df_prep)
150
150
 
151
151
  columns = ["Name", "Description", "Display Folder"]
sempy_labs/_vertipaq.py CHANGED
@@ -6,7 +6,6 @@ import os
6
6
  import shutil
7
7
  import datetime
8
8
  import warnings
9
- from pyspark.sql import SparkSession
10
9
  from sempy_labs._helper_functions import (
11
10
  format_dax_object_name,
12
11
  resolve_lakehouse_name,
@@ -15,6 +14,7 @@ from sempy_labs._helper_functions import (
15
14
  _get_column_aggregate,
16
15
  resolve_workspace_name_and_id,
17
16
  resolve_dataset_name_and_id,
17
+ _create_spark_session,
18
18
  )
19
19
  from sempy_labs._list_functions import list_relationships, list_tables
20
20
  from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
@@ -197,7 +197,7 @@ def vertipaq_analyzer(
197
197
  )
198
198
 
199
199
  sql_statements = []
200
- spark = SparkSession.builder.getOrCreate()
200
+ spark = _create_spark_session()
201
201
  # Loop through tables
202
202
  for lakeTName in dfC_flt["Query"].unique():
203
203
  query = "SELECT "
@@ -275,7 +275,7 @@ def vertipaq_analyzer(
275
275
  dfR.rename(columns={"Source": "To Lake Column"}, inplace=True)
276
276
  dfR.drop(columns=["Column Object"], inplace=True)
277
277
 
278
- spark = SparkSession.builder.getOrCreate()
278
+ spark = _create_spark_session()
279
279
  for i, r in dfR.iterrows():
280
280
  fromTable = r["From Lake Table"]
281
281
  fromColumn = r["From Lake Column"]
sempy_labs/_warehouses.py CHANGED
@@ -93,7 +93,7 @@ def list_warehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
93
93
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
94
94
 
95
95
  responses = _base_api(
96
- reqeust=f"/v1/workspaces/{workspace_id}/warehouses", uses_pagination=True
96
+ request=f"/v1/workspaces/{workspace_id}/warehouses", uses_pagination=True
97
97
  )
98
98
 
99
99
  for r in responses:
@@ -413,8 +413,6 @@ def assign_domain_workspaces(domain: str | UUID, workspace_names: str | List[str
413
413
  request=f"/v1/admin/domains/{domain_id}/assignWorkspaces",
414
414
  method="post",
415
415
  payload=payload,
416
- lro_return_status_code=True,
417
- status_codes=200,
418
416
  )
419
417
 
420
418
  print(
@@ -4,10 +4,13 @@ from uuid import UUID
4
4
  from sempy.fabric.exceptions import FabricHTTPException
5
5
  import numpy as np
6
6
  import time
7
+ import sempy_labs._icons as icons
7
8
  from sempy_labs.admin._basic_functions import list_workspaces
8
9
  from sempy._utils._log import log
9
10
  from sempy_labs._helper_functions import (
10
11
  _base_api,
12
+ _is_valid_uuid,
13
+ _build_url,
11
14
  )
12
15
 
13
16
 
@@ -39,22 +42,17 @@ def scan_workspaces(
39
42
  dataset_expressions : bool, default=False
40
43
  Whether to return data source details.
41
44
  lineage : bool, default=False
42
- Whether to return lineage info (upstream dataflows, tiles, data source IDs).
45
+ Whether to return lineage info (upstream dataflows, tiles, data source IDs)
43
46
  artifact_users : bool, default=False
44
47
  Whether to return user details for a Power BI item (such as a report or a dashboard).
45
- workspace : str | List[str] | uuid.UUID | List[uuid.UUID], default=None
46
- The required workspace name(s) or id(s) to be scanned
48
+ workspace : str | List[str] | UUID | List[UUID], default=None
49
+ The required workspace name(s) or id(s) to be scanned. It supports a limit of 100 workspaces and only IDs in GUID format.
47
50
 
48
51
  Returns
49
52
  -------
50
53
  dict
51
54
  A json object with the scan result.
52
55
  """
53
- scan_result = {
54
- "workspaces": [],
55
- "datasourceInstances": [],
56
- "misconfiguredDatasourceInstances": [],
57
- }
58
56
 
59
57
  if workspace is None:
60
58
  workspace = fabric.resolve_workspace_name()
@@ -62,55 +60,62 @@ def scan_workspaces(
62
60
  if isinstance(workspace, str):
63
61
  workspace = [workspace]
64
62
 
63
+ if len(workspace) > 100:
64
+ print(
65
+ f"{icons.yellow_dot} More than 100 workspaces where provided. Truncating to the fist 100."
66
+ )
67
+ workspace = workspace[:100]
68
+
65
69
  workspace_list = []
66
70
 
67
- dfW = list_workspaces()
68
- workspace_list = dfW[dfW["Name"].isin(workspace)]["Id"].tolist()
69
- workspace_list = workspace_list + dfW[dfW["Id"].isin(workspace)]["Id"].tolist()
71
+ for w in workspace:
72
+ if _is_valid_uuid(w):
73
+ workspace_list.append(w)
74
+ else:
75
+ dfW = list_workspaces(workspace=w)
76
+ workspace_list = (
77
+ workspace_list + dfW[dfW["Name"].isin(workspace)]["Id"].tolist()
78
+ )
70
79
 
71
- workspaces = np.array(workspace_list)
72
- batch_size = 99
73
- for i in range(0, len(workspaces), batch_size):
74
- batch = workspaces[i : i + batch_size].tolist()
75
- payload = {"workspaces": batch}
80
+ url = "/v1.0/myorg/admin/workspaces/getInfo"
81
+ params = {}
82
+ params["lineage"] = lineage
83
+ params["datasourceDetails"] = data_source_details
84
+ params["datasetSchema"] = dataset_schema
85
+ params["datasetExpressions"] = dataset_expressions
86
+ params["getArtifactUsers"] = artifact_users
76
87
 
77
- url = f"/v1.0/myorg/admin/workspaces/getInfo?lineage={lineage}&datasourceDetails={data_source_details}&datasetSchema={dataset_schema}&datasetExpressions={dataset_expressions}&getArtifactUsers={artifact_users}"
78
- response = _base_api(
79
- request=url,
80
- method="post",
81
- payload=payload,
82
- status_codes=202,
83
- client="fabric_sp",
84
- )
88
+ url = _build_url(url, params)
85
89
 
86
- scan_id = response.json()["id"]
87
- scan_status = response.json().get("status")
88
- while scan_status not in ["Succeeded", "Failed"]:
89
- time.sleep(1)
90
- response = _base_api(
91
- request=f"/v1.0/myorg/admin/workspaces/scanStatus/{scan_id}",
92
- client="fabric_sp",
93
- )
94
- scan_status = response.json().get("status")
95
- if scan_status == "Failed":
96
- raise FabricHTTPException(response)
90
+ payload = {"workspaces": workspace_list}
91
+
92
+ response = _base_api(
93
+ request=url,
94
+ method="post",
95
+ payload=payload,
96
+ status_codes=202,
97
+ client="fabric_sp",
98
+ )
99
+
100
+ scan_id = response.json()["id"]
101
+ scan_status = response.json().get("status")
102
+
103
+ while scan_status not in ["Succeeded", "Failed"]:
104
+ time.sleep(1)
97
105
  response = _base_api(
98
- request=f"/v1.0/myorg/admin/workspaces/scanResult/{scan_id}",
106
+ request=f"/v1.0/myorg/admin/workspaces/scanStatus/{scan_id}",
99
107
  client="fabric_sp",
100
108
  )
101
- responseJson = response.json()
109
+ scan_status = response.json().get("status")
102
110
 
103
- if "workspaces" in responseJson:
104
- scan_result["workspaces"].extend(responseJson["workspaces"])
111
+ if scan_status == "Failed":
112
+ raise FabricHTTPException(response)
105
113
 
106
- if "datasourceInstances" in responseJson:
107
- scan_result["datasourceInstances"].extend(
108
- responseJson["datasourceInstances"]
109
- )
114
+ response = _base_api(
115
+ request=f"/v1.0/myorg/admin/workspaces/scanResult/{scan_id}",
116
+ client="fabric_sp",
117
+ )
110
118
 
111
- if "misconfiguredDatasourceInstances" in responseJson:
112
- scan_result["misconfiguredDatasourceInstances"].extend(
113
- responseJson["misconfiguredDatasourceInstances"]
114
- )
119
+ print(f"{icons.green_dot} Status: {scan_status}")
115
120
 
116
- return scan_result
121
+ return response.json()
@@ -1,10 +1,10 @@
1
1
  import pandas as pd
2
- from pyspark.sql import SparkSession
3
2
  from sempy_labs._helper_functions import (
4
3
  format_dax_object_name,
5
4
  resolve_workspace_name_and_id,
6
5
  resolve_lakehouse_name_and_id,
7
6
  _create_dataframe,
7
+ _create_spark_session,
8
8
  )
9
9
  from typing import Optional
10
10
  from sempy._utils._log import log
@@ -51,7 +51,7 @@ def get_lakehouse_columns(
51
51
  lakehouse=lakehouse, workspace=workspace_id
52
52
  )
53
53
 
54
- spark = SparkSession.builder.getOrCreate()
54
+ spark = _create_spark_session()
55
55
 
56
56
  tables = get_lakehouse_tables(
57
57
  lakehouse=lakehouse_id, workspace=workspace_id, extended=False, count_rows=False
@@ -1,6 +1,5 @@
1
1
  import sempy.fabric as fabric
2
2
  import pandas as pd
3
- from pyspark.sql import SparkSession
4
3
  import pyarrow.parquet as pq
5
4
  import datetime
6
5
  from sempy_labs._helper_functions import (
@@ -10,6 +9,7 @@ from sempy_labs._helper_functions import (
10
9
  save_as_delta_table,
11
10
  _base_api,
12
11
  _create_dataframe,
12
+ _create_spark_session,
13
13
  )
14
14
  from sempy_labs.directlake._guardrails import (
15
15
  get_sku_size,
@@ -112,7 +112,7 @@ def get_lakehouse_tables(
112
112
  if extended:
113
113
  sku_value = get_sku_size(workspace_id)
114
114
  guardrail = get_directlake_guardrails_for_sku(sku_value)
115
- spark = SparkSession.builder.getOrCreate()
115
+ spark = _create_spark_session()
116
116
  df["Files"] = None
117
117
  df["Row Groups"] = None
118
118
  df["Table Size"] = None