semantic-link-labs 0.12.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- semantic_link_labs-0.12.8.dist-info/METADATA +354 -0
- semantic_link_labs-0.12.8.dist-info/RECORD +243 -0
- semantic_link_labs-0.12.8.dist-info/WHEEL +5 -0
- semantic_link_labs-0.12.8.dist-info/licenses/LICENSE +21 -0
- semantic_link_labs-0.12.8.dist-info/top_level.txt +1 -0
- sempy_labs/__init__.py +606 -0
- sempy_labs/_a_lib_info.py +2 -0
- sempy_labs/_ai.py +437 -0
- sempy_labs/_authentication.py +264 -0
- sempy_labs/_bpa_translation/_model/_translations_am-ET.po +869 -0
- sempy_labs/_bpa_translation/_model/_translations_ar-AE.po +908 -0
- sempy_labs/_bpa_translation/_model/_translations_bg-BG.po +968 -0
- sempy_labs/_bpa_translation/_model/_translations_ca-ES.po +963 -0
- sempy_labs/_bpa_translation/_model/_translations_cs-CZ.po +943 -0
- sempy_labs/_bpa_translation/_model/_translations_da-DK.po +945 -0
- sempy_labs/_bpa_translation/_model/_translations_de-DE.po +988 -0
- sempy_labs/_bpa_translation/_model/_translations_el-GR.po +993 -0
- sempy_labs/_bpa_translation/_model/_translations_es-ES.po +971 -0
- sempy_labs/_bpa_translation/_model/_translations_fa-IR.po +933 -0
- sempy_labs/_bpa_translation/_model/_translations_fi-FI.po +942 -0
- sempy_labs/_bpa_translation/_model/_translations_fr-FR.po +994 -0
- sempy_labs/_bpa_translation/_model/_translations_ga-IE.po +967 -0
- sempy_labs/_bpa_translation/_model/_translations_he-IL.po +902 -0
- sempy_labs/_bpa_translation/_model/_translations_hi-IN.po +944 -0
- sempy_labs/_bpa_translation/_model/_translations_hu-HU.po +963 -0
- sempy_labs/_bpa_translation/_model/_translations_id-ID.po +946 -0
- sempy_labs/_bpa_translation/_model/_translations_is-IS.po +939 -0
- sempy_labs/_bpa_translation/_model/_translations_it-IT.po +986 -0
- sempy_labs/_bpa_translation/_model/_translations_ja-JP.po +846 -0
- sempy_labs/_bpa_translation/_model/_translations_ko-KR.po +839 -0
- sempy_labs/_bpa_translation/_model/_translations_mt-MT.po +967 -0
- sempy_labs/_bpa_translation/_model/_translations_nl-NL.po +978 -0
- sempy_labs/_bpa_translation/_model/_translations_pl-PL.po +962 -0
- sempy_labs/_bpa_translation/_model/_translations_pt-BR.po +962 -0
- sempy_labs/_bpa_translation/_model/_translations_pt-PT.po +957 -0
- sempy_labs/_bpa_translation/_model/_translations_ro-RO.po +968 -0
- sempy_labs/_bpa_translation/_model/_translations_ru-RU.po +964 -0
- sempy_labs/_bpa_translation/_model/_translations_sk-SK.po +952 -0
- sempy_labs/_bpa_translation/_model/_translations_sl-SL.po +950 -0
- sempy_labs/_bpa_translation/_model/_translations_sv-SE.po +942 -0
- sempy_labs/_bpa_translation/_model/_translations_ta-IN.po +976 -0
- sempy_labs/_bpa_translation/_model/_translations_te-IN.po +947 -0
- sempy_labs/_bpa_translation/_model/_translations_th-TH.po +924 -0
- sempy_labs/_bpa_translation/_model/_translations_tr-TR.po +953 -0
- sempy_labs/_bpa_translation/_model/_translations_uk-UA.po +961 -0
- sempy_labs/_bpa_translation/_model/_translations_zh-CN.po +804 -0
- sempy_labs/_bpa_translation/_model/_translations_zu-ZA.po +969 -0
- sempy_labs/_capacities.py +1198 -0
- sempy_labs/_capacity_migration.py +660 -0
- sempy_labs/_clear_cache.py +351 -0
- sempy_labs/_connections.py +610 -0
- sempy_labs/_dashboards.py +69 -0
- sempy_labs/_data_access_security.py +98 -0
- sempy_labs/_data_pipelines.py +162 -0
- sempy_labs/_dataflows.py +668 -0
- sempy_labs/_dax.py +501 -0
- sempy_labs/_daxformatter.py +80 -0
- sempy_labs/_delta_analyzer.py +467 -0
- sempy_labs/_delta_analyzer_history.py +301 -0
- sempy_labs/_dictionary_diffs.py +221 -0
- sempy_labs/_documentation.py +147 -0
- sempy_labs/_domains.py +51 -0
- sempy_labs/_eventhouses.py +182 -0
- sempy_labs/_external_data_shares.py +230 -0
- sempy_labs/_gateways.py +521 -0
- sempy_labs/_generate_semantic_model.py +521 -0
- sempy_labs/_get_connection_string.py +84 -0
- sempy_labs/_git.py +543 -0
- sempy_labs/_graphQL.py +90 -0
- sempy_labs/_helper_functions.py +2833 -0
- sempy_labs/_icons.py +149 -0
- sempy_labs/_job_scheduler.py +609 -0
- sempy_labs/_kql_databases.py +149 -0
- sempy_labs/_kql_querysets.py +124 -0
- sempy_labs/_kusto.py +137 -0
- sempy_labs/_labels.py +124 -0
- sempy_labs/_list_functions.py +1720 -0
- sempy_labs/_managed_private_endpoints.py +253 -0
- sempy_labs/_mirrored_databases.py +416 -0
- sempy_labs/_mirrored_warehouses.py +60 -0
- sempy_labs/_ml_experiments.py +113 -0
- sempy_labs/_model_auto_build.py +140 -0
- sempy_labs/_model_bpa.py +557 -0
- sempy_labs/_model_bpa_bulk.py +378 -0
- sempy_labs/_model_bpa_rules.py +859 -0
- sempy_labs/_model_dependencies.py +343 -0
- sempy_labs/_mounted_data_factories.py +123 -0
- sempy_labs/_notebooks.py +441 -0
- sempy_labs/_one_lake_integration.py +151 -0
- sempy_labs/_onelake.py +131 -0
- sempy_labs/_query_scale_out.py +433 -0
- sempy_labs/_refresh_semantic_model.py +435 -0
- sempy_labs/_semantic_models.py +468 -0
- sempy_labs/_spark.py +455 -0
- sempy_labs/_sql.py +241 -0
- sempy_labs/_sql_audit_settings.py +207 -0
- sempy_labs/_sql_endpoints.py +214 -0
- sempy_labs/_tags.py +201 -0
- sempy_labs/_translations.py +43 -0
- sempy_labs/_user_delegation_key.py +44 -0
- sempy_labs/_utils.py +79 -0
- sempy_labs/_vertipaq.py +1021 -0
- sempy_labs/_vpax.py +388 -0
- sempy_labs/_warehouses.py +234 -0
- sempy_labs/_workloads.py +140 -0
- sempy_labs/_workspace_identity.py +72 -0
- sempy_labs/_workspaces.py +595 -0
- sempy_labs/admin/__init__.py +170 -0
- sempy_labs/admin/_activities.py +167 -0
- sempy_labs/admin/_apps.py +145 -0
- sempy_labs/admin/_artifacts.py +65 -0
- sempy_labs/admin/_basic_functions.py +463 -0
- sempy_labs/admin/_capacities.py +508 -0
- sempy_labs/admin/_dataflows.py +45 -0
- sempy_labs/admin/_datasets.py +186 -0
- sempy_labs/admin/_domains.py +522 -0
- sempy_labs/admin/_external_data_share.py +100 -0
- sempy_labs/admin/_git.py +72 -0
- sempy_labs/admin/_items.py +265 -0
- sempy_labs/admin/_labels.py +211 -0
- sempy_labs/admin/_reports.py +241 -0
- sempy_labs/admin/_scanner.py +118 -0
- sempy_labs/admin/_shared.py +82 -0
- sempy_labs/admin/_sharing_links.py +110 -0
- sempy_labs/admin/_tags.py +131 -0
- sempy_labs/admin/_tenant.py +503 -0
- sempy_labs/admin/_tenant_keys.py +89 -0
- sempy_labs/admin/_users.py +140 -0
- sempy_labs/admin/_workspaces.py +236 -0
- sempy_labs/deployment_pipeline/__init__.py +23 -0
- sempy_labs/deployment_pipeline/_items.py +580 -0
- sempy_labs/directlake/__init__.py +57 -0
- sempy_labs/directlake/_autosync.py +58 -0
- sempy_labs/directlake/_directlake_schema_compare.py +120 -0
- sempy_labs/directlake/_directlake_schema_sync.py +161 -0
- sempy_labs/directlake/_dl_helper.py +274 -0
- sempy_labs/directlake/_generate_shared_expression.py +94 -0
- sempy_labs/directlake/_get_directlake_lakehouse.py +62 -0
- sempy_labs/directlake/_get_shared_expression.py +34 -0
- sempy_labs/directlake/_guardrails.py +96 -0
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +70 -0
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +90 -0
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +239 -0
- sempy_labs/directlake/_update_directlake_partition_entity.py +259 -0
- sempy_labs/directlake/_warm_cache.py +236 -0
- sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
- sempy_labs/environment/__init__.py +23 -0
- sempy_labs/environment/_items.py +212 -0
- sempy_labs/environment/_pubstage.py +223 -0
- sempy_labs/eventstream/__init__.py +37 -0
- sempy_labs/eventstream/_items.py +263 -0
- sempy_labs/eventstream/_topology.py +652 -0
- sempy_labs/graph/__init__.py +59 -0
- sempy_labs/graph/_groups.py +651 -0
- sempy_labs/graph/_sensitivity_labels.py +120 -0
- sempy_labs/graph/_teams.py +125 -0
- sempy_labs/graph/_user_licenses.py +96 -0
- sempy_labs/graph/_users.py +516 -0
- sempy_labs/graph_model/__init__.py +15 -0
- sempy_labs/graph_model/_background_jobs.py +63 -0
- sempy_labs/graph_model/_items.py +149 -0
- sempy_labs/lakehouse/__init__.py +67 -0
- sempy_labs/lakehouse/_blobs.py +247 -0
- sempy_labs/lakehouse/_get_lakehouse_columns.py +102 -0
- sempy_labs/lakehouse/_get_lakehouse_tables.py +274 -0
- sempy_labs/lakehouse/_helper.py +250 -0
- sempy_labs/lakehouse/_lakehouse.py +351 -0
- sempy_labs/lakehouse/_livy_sessions.py +143 -0
- sempy_labs/lakehouse/_materialized_lake_views.py +157 -0
- sempy_labs/lakehouse/_partitioning.py +165 -0
- sempy_labs/lakehouse/_schemas.py +217 -0
- sempy_labs/lakehouse/_shortcuts.py +440 -0
- sempy_labs/migration/__init__.py +35 -0
- sempy_labs/migration/_create_pqt_file.py +238 -0
- sempy_labs/migration/_direct_lake_to_import.py +105 -0
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +398 -0
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +148 -0
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +533 -0
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +172 -0
- sempy_labs/migration/_migration_validation.py +71 -0
- sempy_labs/migration/_refresh_calc_tables.py +131 -0
- sempy_labs/mirrored_azure_databricks_catalog/__init__.py +15 -0
- sempy_labs/mirrored_azure_databricks_catalog/_discover.py +213 -0
- sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +45 -0
- sempy_labs/ml_model/__init__.py +23 -0
- sempy_labs/ml_model/_functions.py +427 -0
- sempy_labs/report/_BPAReportTemplate.json +232 -0
- sempy_labs/report/__init__.py +55 -0
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
- sempy_labs/report/_bpareporttemplate/.platform +11 -0
- sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
- sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
- sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
- sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
- sempy_labs/report/_download_report.py +76 -0
- sempy_labs/report/_export_report.py +257 -0
- sempy_labs/report/_generate_report.py +427 -0
- sempy_labs/report/_paginated.py +76 -0
- sempy_labs/report/_report_bpa.py +354 -0
- sempy_labs/report/_report_bpa_rules.py +115 -0
- sempy_labs/report/_report_functions.py +581 -0
- sempy_labs/report/_report_helper.py +227 -0
- sempy_labs/report/_report_list_functions.py +110 -0
- sempy_labs/report/_report_rebind.py +149 -0
- sempy_labs/report/_reportwrapper.py +3100 -0
- sempy_labs/report/_save_report.py +147 -0
- sempy_labs/snowflake_database/__init__.py +10 -0
- sempy_labs/snowflake_database/_items.py +105 -0
- sempy_labs/sql_database/__init__.py +21 -0
- sempy_labs/sql_database/_items.py +201 -0
- sempy_labs/sql_database/_mirroring.py +79 -0
- sempy_labs/theme/__init__.py +12 -0
- sempy_labs/theme/_org_themes.py +129 -0
- sempy_labs/tom/__init__.py +3 -0
- sempy_labs/tom/_model.py +5977 -0
- sempy_labs/variable_library/__init__.py +19 -0
- sempy_labs/variable_library/_functions.py +403 -0
- sempy_labs/warehouse/__init__.py +28 -0
- sempy_labs/warehouse/_items.py +234 -0
- sempy_labs/warehouse/_restore_points.py +309 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
from typing import Optional, List
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
from sempy_labs._helper_functions import (
|
|
4
|
+
_create_spark_session,
|
|
5
|
+
create_abfss_path,
|
|
6
|
+
resolve_workspace_id,
|
|
7
|
+
resolve_lakehouse_id,
|
|
8
|
+
_get_delta_table,
|
|
9
|
+
)
|
|
10
|
+
from sempy._utils._log import log
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@log
|
|
14
|
+
def _get_partitions(
|
|
15
|
+
table_name: str,
|
|
16
|
+
schema_name: Optional[str] = None,
|
|
17
|
+
lakehouse: Optional[str | UUID] = None,
|
|
18
|
+
workspace: Optional[str | UUID] = None,
|
|
19
|
+
):
|
|
20
|
+
|
|
21
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
22
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
23
|
+
path = create_abfss_path(lakehouse_id, workspace_id, table_name, schema_name)
|
|
24
|
+
|
|
25
|
+
delta_table = _get_delta_table(path)
|
|
26
|
+
details_df = delta_table.detail()
|
|
27
|
+
|
|
28
|
+
return details_df.collect()[0].asDict()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@log
|
|
32
|
+
def is_partitioned(
|
|
33
|
+
table: str,
|
|
34
|
+
schema: Optional[str] = None,
|
|
35
|
+
lakehouse: Optional[str | UUID] = None,
|
|
36
|
+
workspace: Optional[str | UUID] = None,
|
|
37
|
+
) -> bool:
|
|
38
|
+
"""
|
|
39
|
+
Checks if a delta table is partitioned.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
table : str
|
|
44
|
+
The name of the delta table.
|
|
45
|
+
schema : str, optional
|
|
46
|
+
The schema of the table to check. If not provided, the default schema is used.
|
|
47
|
+
lakehouse : str | uuid.UUID, default=None
|
|
48
|
+
The Fabric lakehouse name or ID.
|
|
49
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
50
|
+
workspace : str | uuid.UUID, default=None
|
|
51
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
52
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
53
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
bool
|
|
58
|
+
True if the table is partitioned, False otherwise.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
details = _get_partitions(
|
|
62
|
+
table_name=table, schema_name=schema, lakehouse=lakehouse, workspace=workspace
|
|
63
|
+
)
|
|
64
|
+
return len(details["partitionColumns"]) > 0
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@log
|
|
68
|
+
def list_partitioned_columns(
|
|
69
|
+
table: str,
|
|
70
|
+
schema: Optional[str] = None,
|
|
71
|
+
lakehouse: Optional[str | UUID] = None,
|
|
72
|
+
workspace: Optional[str | UUID] = None,
|
|
73
|
+
) -> List[str]:
|
|
74
|
+
"""
|
|
75
|
+
Lists the partitioned columns of a delta table.
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
table : str
|
|
80
|
+
The name of the delta table.
|
|
81
|
+
schema : str, optional
|
|
82
|
+
The schema of the table to check. If not provided, the default schema is used.
|
|
83
|
+
lakehouse : str | uuid.UUID, default=None
|
|
84
|
+
The Fabric lakehouse name or ID.
|
|
85
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
86
|
+
workspace : str | uuid.UUID, default=None
|
|
87
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
88
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
89
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
List[str]
|
|
94
|
+
The list of partitioned columns.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
details = _get_partitions(
|
|
98
|
+
table_name=table, schema_name=schema, lakehouse=lakehouse, workspace=workspace
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return details["partitionColumns"]
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@log
|
|
105
|
+
def is_over_partitioned(
|
|
106
|
+
table: str,
|
|
107
|
+
schema: Optional[str] = None,
|
|
108
|
+
lakehouse: Optional[str | UUID] = None,
|
|
109
|
+
workspace: Optional[str | UUID] = None,
|
|
110
|
+
total_table_size_gb: int = 1000,
|
|
111
|
+
average_partition_size_gb: int = 1,
|
|
112
|
+
) -> bool:
|
|
113
|
+
"""
|
|
114
|
+
Checks if a delta table is over-partitioned.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
table : str
|
|
119
|
+
The name of the delta table.
|
|
120
|
+
schema : str, optional
|
|
121
|
+
The schema of the table to check. If not provided, the default schema is used.
|
|
122
|
+
lakehouse : str | uuid.UUID, default=None
|
|
123
|
+
The Fabric lakehouse name or ID.
|
|
124
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
125
|
+
workspace : str | uuid.UUID, default=None
|
|
126
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
127
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
128
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
129
|
+
total_table_size_gb : int, default=1000
|
|
130
|
+
Threshold for total table size in GB (default 1TB).
|
|
131
|
+
average_partition_size_gb : int, default=1
|
|
132
|
+
Threshold for average partition size in GB.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
bool
|
|
137
|
+
True if the table is over-partitioned, False otherwise.
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
141
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
142
|
+
path = create_abfss_path(lakehouse_id, workspace_id, table, schema)
|
|
143
|
+
# Get DeltaTable details
|
|
144
|
+
spark = _create_spark_session()
|
|
145
|
+
details_df = spark.sql(f"DESCRIBE DETAIL delta.`{path}`")
|
|
146
|
+
details = details_df.collect()[0].asDict()
|
|
147
|
+
|
|
148
|
+
# Extract relevant fields
|
|
149
|
+
size_bytes = details["sizeInBytes"]
|
|
150
|
+
partition_cols = details["partitionColumns"]
|
|
151
|
+
num_files = details["numFiles"]
|
|
152
|
+
|
|
153
|
+
total_size_gb = size_bytes / (1024**3)
|
|
154
|
+
|
|
155
|
+
# Only check if the table is partitioned
|
|
156
|
+
if len(partition_cols) > 0 and num_files > 0:
|
|
157
|
+
avg_partition_size_gb = total_size_gb / num_files
|
|
158
|
+
|
|
159
|
+
if (
|
|
160
|
+
total_size_gb < total_table_size_gb
|
|
161
|
+
or avg_partition_size_gb < average_partition_size_gb
|
|
162
|
+
):
|
|
163
|
+
return True
|
|
164
|
+
|
|
165
|
+
return False
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
from typing import Optional, List
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
from sempy._utils._log import log
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from sempy_labs._helper_functions import (
|
|
6
|
+
resolve_lakehouse_name_and_id,
|
|
7
|
+
resolve_workspace_id,
|
|
8
|
+
resolve_lakehouse_id,
|
|
9
|
+
_create_dataframe,
|
|
10
|
+
_base_api,
|
|
11
|
+
resolve_workspace_name_and_id,
|
|
12
|
+
)
|
|
13
|
+
import sempy_labs._icons as icons
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@log
|
|
17
|
+
def is_schema_enabled(
|
|
18
|
+
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
19
|
+
) -> bool:
|
|
20
|
+
"""
|
|
21
|
+
Indicates whether a lakehouse has schemas enabled.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
lakehouse : str | uuid.UUID, default=None
|
|
26
|
+
The Fabric lakehouse name or ID.
|
|
27
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
28
|
+
workspace : str | uuid.UUID, default=None
|
|
29
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
30
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
31
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
bool
|
|
36
|
+
Indicates whether the lakehouse has schemas enabled.
|
|
37
|
+
"""
|
|
38
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
39
|
+
(item_name, item_id) = resolve_lakehouse_name_and_id(lakehouse, workspace)
|
|
40
|
+
response = _base_api(f"/v1/workspaces/{workspace_id}/lakehouses/{item_id}")
|
|
41
|
+
default_schema = response.json().get("properties", {}).get("defaultSchema", None)
|
|
42
|
+
if default_schema:
|
|
43
|
+
return True
|
|
44
|
+
else:
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@log
|
|
49
|
+
def list_schemas(
|
|
50
|
+
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
51
|
+
) -> pd.DataFrame:
|
|
52
|
+
"""
|
|
53
|
+
Lists the schemas within a Fabric lakehouse.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
lakehouse : str | uuid.UUID, default=None
|
|
58
|
+
The Fabric lakehouse name or ID.
|
|
59
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
60
|
+
workspace : str | uuid.UUID, default=None
|
|
61
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
62
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
63
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
pandas.DataFrame
|
|
68
|
+
Shows the schemas within a lakehouse.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
columns = {
|
|
72
|
+
"Schema Name": "str",
|
|
73
|
+
}
|
|
74
|
+
df = _create_dataframe(columns=columns)
|
|
75
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
76
|
+
item_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
77
|
+
response = _base_api(
|
|
78
|
+
request=f"{workspace_id}/{item_id}/api/2.1/unity-catalog/schemas?catalog_name={item_id}",
|
|
79
|
+
client="onelake",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
rows = []
|
|
83
|
+
for s in response.json().get("schemas", []):
|
|
84
|
+
rows.append(
|
|
85
|
+
{
|
|
86
|
+
"Schema Name": s.get("name", None),
|
|
87
|
+
}
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
if rows:
|
|
91
|
+
df = pd.DataFrame(rows, columns=list(columns.keys()))
|
|
92
|
+
|
|
93
|
+
return df
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def list_tables(
|
|
97
|
+
lakehouse: Optional[str | UUID] = None,
|
|
98
|
+
workspace: Optional[str | UUID] = None,
|
|
99
|
+
schema: Optional[str | List[str]] = None,
|
|
100
|
+
) -> pd.DataFrame:
|
|
101
|
+
|
|
102
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
103
|
+
(item_name, item_id) = resolve_lakehouse_name_and_id(lakehouse, workspace)
|
|
104
|
+
|
|
105
|
+
response = _base_api(f"/v1/workspaces/{workspace_id}/lakehouses/{item_id}")
|
|
106
|
+
default_schema = response.json().get("properties", {}).get("defaultSchema", None)
|
|
107
|
+
schema_enabled = True if default_schema else False
|
|
108
|
+
|
|
109
|
+
columns = {
|
|
110
|
+
"Workspace Name": "str",
|
|
111
|
+
"Lakehouse Name": "str",
|
|
112
|
+
"Table Name": "str",
|
|
113
|
+
"Schema Name": "str",
|
|
114
|
+
"Format": "str",
|
|
115
|
+
"Type": "str",
|
|
116
|
+
"Location": "str",
|
|
117
|
+
}
|
|
118
|
+
df = _create_dataframe(columns=columns)
|
|
119
|
+
|
|
120
|
+
rows = []
|
|
121
|
+
if schema_enabled:
|
|
122
|
+
schemas = list_schemas(lakehouse=lakehouse, workspace=workspace)
|
|
123
|
+
if schema:
|
|
124
|
+
if isinstance(schema, str):
|
|
125
|
+
schema = [schema]
|
|
126
|
+
schemas = schemas[schemas["Schema Name"].isin(schema)]
|
|
127
|
+
|
|
128
|
+
# Loop through schemas
|
|
129
|
+
for _, r in schemas.iterrows():
|
|
130
|
+
schema_name = r["Schema Name"]
|
|
131
|
+
response = _base_api(
|
|
132
|
+
request=f"{workspace_id}/{item_id}/api/2.1/unity-catalog/tables?catalog_name={item_id}&schema_name={schema_name}",
|
|
133
|
+
client="onelake",
|
|
134
|
+
)
|
|
135
|
+
# Loop through tables
|
|
136
|
+
for t in response.json().get("tables", []):
|
|
137
|
+
location = t.get("storage_location", {})
|
|
138
|
+
location = f'abfss://{location.split(".microsoft.com/")[1]}'
|
|
139
|
+
rows.append(
|
|
140
|
+
{
|
|
141
|
+
"Workspace Name": workspace_name,
|
|
142
|
+
"Lakehouse Name": item_name,
|
|
143
|
+
"Table Name": t.get("name", {}),
|
|
144
|
+
"Schema Name": schema_name,
|
|
145
|
+
"Format": t.get("data_source_format", {}).lower(),
|
|
146
|
+
"Type": "Managed",
|
|
147
|
+
"Location": location,
|
|
148
|
+
}
|
|
149
|
+
)
|
|
150
|
+
else:
|
|
151
|
+
if schema:
|
|
152
|
+
print(
|
|
153
|
+
f"{icons.info} The schema parameter has been ignored as the '{item_name}' lakehouse within the '{workspace_name}' workspace has schemas disabled."
|
|
154
|
+
)
|
|
155
|
+
responses = _base_api(
|
|
156
|
+
request=f"v1/workspaces/{workspace_id}/lakehouses/{item_id}/tables",
|
|
157
|
+
uses_pagination=True,
|
|
158
|
+
client="fabric_sp",
|
|
159
|
+
)
|
|
160
|
+
for r in responses:
|
|
161
|
+
for i in r.get("data", []):
|
|
162
|
+
rows.append(
|
|
163
|
+
{
|
|
164
|
+
"Workspace Name": workspace_name,
|
|
165
|
+
"Lakehouse Name": item_name,
|
|
166
|
+
"Schema Name": None,
|
|
167
|
+
"Table Name": i.get("name"),
|
|
168
|
+
"Format": i.get("format"),
|
|
169
|
+
"Type": i.get("type"),
|
|
170
|
+
"Location": i.get("location"),
|
|
171
|
+
}
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
if rows:
|
|
175
|
+
df = pd.DataFrame(rows, columns=list(columns.keys()))
|
|
176
|
+
|
|
177
|
+
return df
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def schema_exists(
|
|
181
|
+
schema: str,
|
|
182
|
+
lakehouse: Optional[str | UUID] = None,
|
|
183
|
+
workspace: Optional[str | UUID] = None,
|
|
184
|
+
) -> bool:
|
|
185
|
+
"""
|
|
186
|
+
Indicates whether the specified schema exists within a Fabric lakehouse.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
schema : str
|
|
191
|
+
The name of the schema.
|
|
192
|
+
lakehouse : str | uuid.UUID, default=None
|
|
193
|
+
The Fabric lakehouse name or ID.
|
|
194
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
195
|
+
workspace : str | uuid.UUID, default=None
|
|
196
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
197
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
198
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
199
|
+
|
|
200
|
+
Returns
|
|
201
|
+
-------
|
|
202
|
+
bool
|
|
203
|
+
Indicates whether the specified schema exists within the lakehouse.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
df = list_schemas(lakehouse=lakehouse, workspace=workspace)
|
|
207
|
+
return schema in df["Schema Name"].values
|
|
208
|
+
|
|
209
|
+
# (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
210
|
+
# (item_name, item_id) = resolve_lakehouse_name_and_id(lakehouse, workspace)
|
|
211
|
+
# response = _base_api(
|
|
212
|
+
# request=f"{workspace_id}/{item_id}/api/2.1/unity-catalog/schemas/{schema}",
|
|
213
|
+
# client="onelake",
|
|
214
|
+
# method="head",
|
|
215
|
+
# )
|
|
216
|
+
|
|
217
|
+
# response.json()
|