duckrun 0.2.18.dev1__py3-none-any.whl → 0.2.18.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckrun might be problematic. Click here for more details.

duckrun/__init__.py CHANGED
@@ -3,7 +3,7 @@
3
3
  from duckrun.core import Duckrun
4
4
  from duckrun.notebook import import_notebook_from_web, import_notebook
5
5
 
6
- __version__ = "0.2.18.dev1"
6
+ __version__ = "0.2.18.dev2"
7
7
 
8
8
  # Expose unified connect method at module level
9
9
  connect = Duckrun.connect
duckrun/core.py CHANGED
@@ -1035,12 +1035,13 @@ class Duckrun(WorkspaceOperationsMixin):
1035
1035
  """Get underlying DuckDB connection"""
1036
1036
  return self.con
1037
1037
 
1038
- def get_stats(self, source: str):
1038
+ def get_stats(self, source: str = None):
1039
1039
  """
1040
1040
  Get comprehensive statistics for Delta Lake tables.
1041
1041
 
1042
1042
  Args:
1043
- source: Can be one of:
1043
+ source: Optional. Can be one of:
1044
+ - None: Use all tables in the connection's schema (default)
1044
1045
  - Table name: 'table_name' (uses current schema)
1045
1046
  - Schema.table: 'schema.table_name' (specific table in schema)
1046
1047
  - Schema only: 'schema' (all tables in schema)
@@ -1052,6 +1053,9 @@ class Duckrun(WorkspaceOperationsMixin):
1052
1053
  Examples:
1053
1054
  con = duckrun.connect("tmp/data.lakehouse/aemo")
1054
1055
 
1056
+ # All tables in current schema (aemo)
1057
+ stats = con.get_stats()
1058
+
1055
1059
  # Single table in current schema
1056
1060
  stats = con.get_stats('price')
1057
1061
 
duckrun/semantic_model.py CHANGED
@@ -130,13 +130,24 @@ def check_dataset_exists(dataset_name, workspace_id, client):
130
130
 
131
131
 
132
132
  def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
133
- """Refresh a dataset and monitor progress using Power BI API"""
133
+ """Refresh a dataset and monitor progress using Power BI API
134
+
135
+ For DirectLake models, performs a two-step refresh:
136
+ 1. clearValues - Purges data from memory
137
+ 2. full - Reframes data from Delta tables
138
+ """
134
139
 
135
140
  # If dataset_id not provided, look it up by name
136
141
  if not dataset_id:
137
142
  dataset_id = get_dataset_id(dataset_name, workspace_id, client)
138
143
 
139
- payload = {
144
+ # Use Power BI API for refresh (not Fabric API)
145
+ powerbi_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes"
146
+ headers = client._get_headers()
147
+
148
+ # Step 1: clearValues - Purge data from memory
149
+ print(" Step 1: Clearing values from memory...")
150
+ clearvalues_payload = {
140
151
  "type": "clearValues",
141
152
  "commitMode": "transactional",
142
153
  "maxParallelism": 10,
@@ -144,11 +155,24 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
144
155
  "objects": []
145
156
  }
146
157
 
147
- # Use Power BI API for refresh (not Fabric API)
148
- powerbi_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes"
149
- headers = client._get_headers()
158
+ response = requests.post(powerbi_url, headers=headers, json=clearvalues_payload)
159
+
160
+ if response.status_code in [200, 202]:
161
+ print(" ✓ Clear values completed")
162
+ else:
163
+ response.raise_for_status()
164
+
165
+ # Step 2: full refresh - Reframe data from Delta tables
166
+ print(" Step 2: Full refresh to reframe data...")
167
+ full_payload = {
168
+ "type": "full",
169
+ "commitMode": "transactional",
170
+ "maxParallelism": 10,
171
+ "retryCount": 2,
172
+ "objects": []
173
+ }
150
174
 
151
- response = requests.post(powerbi_url, headers=headers, json=payload)
175
+ response = requests.post(powerbi_url, headers=headers, json=full_payload)
152
176
 
153
177
  if response.status_code in [200, 202]:
154
178
  print(f"✓ Refresh initiated")
@@ -471,13 +495,13 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
471
495
  dataset_exists = check_dataset_exists(dataset_name, workspace_id, client)
472
496
 
473
497
  if dataset_exists:
474
- print(f"\n✓ Dataset exists - refreshing...")
498
+ print(f"✓ Dataset '{dataset_name}' already exists - skipping deployment")
475
499
 
476
500
  if wait_seconds > 0:
477
501
  print(f" Waiting {wait_seconds} seconds...")
478
502
  time.sleep(wait_seconds)
479
503
 
480
- print("\n[Step 6/6] Refreshing semantic model...")
504
+ print("\n[Step 3/3] Refreshing existing semantic model...")
481
505
  refresh_dataset(dataset_name, workspace_id, client)
482
506
 
483
507
  print("\n" + "=" * 70)
duckrun/stats.py CHANGED
@@ -60,13 +60,14 @@ def _get_existing_tables_in_schema(duckrun_instance, schema_name: str) -> list:
60
60
  return []
61
61
 
62
62
 
63
- def get_stats(duckrun_instance, source: str):
63
+ def get_stats(duckrun_instance, source: str = None):
64
64
  """
65
65
  Get comprehensive statistics for Delta Lake tables.
66
66
 
67
67
  Args:
68
68
  duckrun_instance: The Duckrun connection instance
69
- source: Can be one of:
69
+ source: Optional. Can be one of:
70
+ - None: Use all tables in the connection's schema (default)
70
71
  - Table name: 'table_name' (uses main schema in DuckDB)
71
72
  - Schema.table: 'schema.table_name' (specific table in schema, if multi-schema)
72
73
  - Schema only: 'schema' (all tables in schema, if multi-schema)
@@ -78,6 +79,9 @@ def get_stats(duckrun_instance, source: str):
78
79
  Examples:
79
80
  con = duckrun.connect("tmp/data.lakehouse/test")
80
81
 
82
+ # All tables in the connection's schema
83
+ stats = con.get_stats()
84
+
81
85
  # Single table in main schema (DuckDB uses 'main', not 'test')
82
86
  stats = con.get_stats('price_today')
83
87
 
@@ -93,6 +97,10 @@ def get_stats(duckrun_instance, source: str):
93
97
  duckdb_schema = "main"
94
98
  url_schema = duckrun_instance.schema # This is from the connection URL path
95
99
 
100
+ # If source is not provided, default to all tables in the connection's schema
101
+ if source is None:
102
+ source = url_schema
103
+
96
104
  # Parse the source and validate existence
97
105
  if '.' in source:
98
106
  # Format: schema.table - only valid if multi-schema is enabled
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.18.dev1
3
+ Version: 0.2.18.dev2
4
4
  Summary: Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)
5
5
  Author: mim
6
6
  License: MIT
@@ -0,0 +1,15 @@
1
+ duckrun/__init__.py,sha256=vqv_bJjHjrrXGs8Zyxuy-GKTCyJlZ5z3npPQgE9ipBY,355
2
+ duckrun/auth.py,sha256=EMaf-L2zeNOjbHOT97xYxfZNfWo4WrwrU1h3vBQTgEc,9624
3
+ duckrun/core.py,sha256=tWLFOSVZHoJ0r5YJaj0lG1s_kehiIrnxPMrQQIcyh94,68367
4
+ duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
5
+ duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
6
+ duckrun/notebook.py,sha256=SzdKTpvzHiWMrvg7mCd3DN6R4gU_6Gm7gfkuETzylaE,12103
7
+ duckrun/runner.py,sha256=NGVyerJA44UP2umRdndfL0fuFM_gdOZmuJUz-PLOFf0,13461
8
+ duckrun/semantic_model.py,sha256=X3VKdo4BehAg681Ucq7fzB2KPY2mwPLbfIZqI5Gbqp4,30377
9
+ duckrun/stats.py,sha256=qvWnPk2P8Ob_tzaiNfdQmUQqMVq2FWv3EgArE7hPl44,15482
10
+ duckrun/writer.py,sha256=wIsU77DSj4J7d9_bIhvk6AbC51uUrLW0e6pcSPQOY1c,9424
11
+ duckrun-0.2.18.dev2.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
12
+ duckrun-0.2.18.dev2.dist-info/METADATA,sha256=JpewTO7QqHrdUn_G3Lz-1jxFifVyBxj9lNX_Qodhe2A,20807
13
+ duckrun-0.2.18.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ duckrun-0.2.18.dev2.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
15
+ duckrun-0.2.18.dev2.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- duckrun/__init__.py,sha256=0bJaY3gWsTwGcQS1P9KfaVOH9f8O-_CHXJzVbvqeOzA,355
2
- duckrun/auth.py,sha256=EMaf-L2zeNOjbHOT97xYxfZNfWo4WrwrU1h3vBQTgEc,9624
3
- duckrun/core.py,sha256=DvxCBTob_OWOZAzcVqhoz5w95pxyH4sfoSmXMzG2BbY,68168
4
- duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
5
- duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
6
- duckrun/notebook.py,sha256=SzdKTpvzHiWMrvg7mCd3DN6R4gU_6Gm7gfkuETzylaE,12103
7
- duckrun/runner.py,sha256=NGVyerJA44UP2umRdndfL0fuFM_gdOZmuJUz-PLOFf0,13461
8
- duckrun/semantic_model.py,sha256=mkgAdi2hfJ1lkKhNo1vnPBNOFybFIxL34-zbP-71kAU,29516
9
- duckrun/stats.py,sha256=EqrCN1xwGo5nZgwezBvb6RepXT6b8H7xgK0yJJGFLfE,15155
10
- duckrun/writer.py,sha256=wIsU77DSj4J7d9_bIhvk6AbC51uUrLW0e6pcSPQOY1c,9424
11
- duckrun-0.2.18.dev1.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
12
- duckrun-0.2.18.dev1.dist-info/METADATA,sha256=fUEehSe7mTzCQuZmyoxbysSFih3x8XfcnLMHv-h3ues,20807
13
- duckrun-0.2.18.dev1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- duckrun-0.2.18.dev1.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
15
- duckrun-0.2.18.dev1.dist-info/RECORD,,