cinchdb 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cinchdb/cli/commands/column.py +3 -4
- cinchdb/cli/commands/database.py +58 -60
- cinchdb/cli/commands/table.py +3 -3
- cinchdb/cli/main.py +1 -7
- cinchdb/cli/utils.py +23 -0
- cinchdb/core/database.py +138 -11
- cinchdb/core/initializer.py +188 -10
- cinchdb/core/path_utils.py +44 -22
- cinchdb/infrastructure/metadata_connection_pool.py +145 -0
- cinchdb/infrastructure/metadata_db.py +376 -0
- cinchdb/managers/branch.py +119 -23
- cinchdb/managers/change_applier.py +30 -13
- cinchdb/managers/column.py +4 -10
- cinchdb/managers/query.py +40 -4
- cinchdb/managers/table.py +8 -6
- cinchdb/managers/tenant.py +718 -96
- cinchdb/models/table.py +0 -4
- cinchdb/models/tenant.py +4 -2
- {cinchdb-0.1.9.dist-info → cinchdb-0.1.11.dist-info}/METADATA +5 -36
- {cinchdb-0.1.9.dist-info → cinchdb-0.1.11.dist-info}/RECORD +23 -21
- {cinchdb-0.1.9.dist-info → cinchdb-0.1.11.dist-info}/WHEEL +0 -0
- {cinchdb-0.1.9.dist-info → cinchdb-0.1.11.dist-info}/entry_points.txt +0 -0
- {cinchdb-0.1.9.dist-info → cinchdb-0.1.11.dist-info}/licenses/LICENSE +0 -0
cinchdb/managers/tenant.py
CHANGED
@@ -1,18 +1,25 @@
|
|
1
1
|
"""Tenant management for CinchDB."""
|
2
2
|
|
3
|
+
import hashlib
|
3
4
|
import shutil
|
5
|
+
import sqlite3
|
6
|
+
import uuid
|
4
7
|
from pathlib import Path
|
5
8
|
from typing import List, Optional
|
9
|
+
from datetime import datetime, timezone
|
6
10
|
|
7
11
|
from cinchdb.models import Tenant
|
8
12
|
from cinchdb.core.path_utils import (
|
9
13
|
get_branch_path,
|
10
14
|
get_tenant_db_path,
|
15
|
+
get_database_path,
|
11
16
|
list_tenants,
|
12
17
|
)
|
13
18
|
from cinchdb.core.connection import DatabaseConnection
|
14
19
|
from cinchdb.core.maintenance import check_maintenance_mode
|
15
20
|
from cinchdb.utils.name_validator import validate_name
|
21
|
+
from cinchdb.infrastructure.metadata_db import MetadataDB
|
22
|
+
from cinchdb.infrastructure.metadata_connection_pool import get_metadata_db
|
16
23
|
|
17
24
|
|
18
25
|
class TenantManager:
|
@@ -30,88 +37,135 @@ class TenantManager:
|
|
30
37
|
self.database = database
|
31
38
|
self.branch = branch
|
32
39
|
self.branch_path = get_branch_path(self.project_root, database, branch)
|
33
|
-
|
34
|
-
|
40
|
+
self._empty_tenant_name = "__empty__" # Reserved name for lazy tenant template
|
41
|
+
|
42
|
+
# Lazy-initialized pooled connection
|
43
|
+
self._metadata_db = None
|
44
|
+
self.database_id = None
|
45
|
+
self.branch_id = None
|
46
|
+
|
47
|
+
def _ensure_initialized(self) -> None:
|
48
|
+
"""Ensure metadata connection and IDs are initialized."""
|
49
|
+
if self._metadata_db is None:
|
50
|
+
self._metadata_db = get_metadata_db(self.project_root)
|
51
|
+
|
52
|
+
# Initialize database and branch IDs on first access
|
53
|
+
if self.database_id is None:
|
54
|
+
db_info = self._metadata_db.get_database(self.database)
|
55
|
+
if db_info:
|
56
|
+
self.database_id = db_info['id']
|
57
|
+
branch_info = self._metadata_db.get_branch(self.database_id, self.branch)
|
58
|
+
if branch_info:
|
59
|
+
self.branch_id = branch_info['id']
|
60
|
+
|
61
|
+
@property
|
62
|
+
def metadata_db(self) -> MetadataDB:
|
63
|
+
"""Get metadata database connection (lazy-initialized from pool)."""
|
64
|
+
self._ensure_initialized()
|
65
|
+
return self._metadata_db
|
66
|
+
|
67
|
+
def list_tenants(self, include_system: bool = False) -> List[Tenant]:
|
35
68
|
"""List all tenants in the branch.
|
36
69
|
|
70
|
+
Args:
|
71
|
+
include_system: If True, include system tenants like __empty__
|
72
|
+
|
37
73
|
Returns:
|
38
74
|
List of Tenant objects
|
39
75
|
"""
|
40
|
-
|
76
|
+
# Ensure initialization
|
77
|
+
self._ensure_initialized()
|
78
|
+
|
79
|
+
if not self.branch_id:
|
80
|
+
return []
|
81
|
+
|
82
|
+
# Get tenants from metadata database
|
83
|
+
tenant_records = self.metadata_db.list_tenants(self.branch_id)
|
41
84
|
tenants = []
|
42
85
|
|
43
|
-
for
|
86
|
+
for record in tenant_records:
|
87
|
+
# Filter out the __empty__ tenant from user-facing listings unless requested
|
88
|
+
if not include_system and record['name'] == self._empty_tenant_name:
|
89
|
+
continue
|
90
|
+
|
44
91
|
tenant = Tenant(
|
45
|
-
name=name,
|
92
|
+
name=record['name'],
|
46
93
|
database=self.database,
|
47
94
|
branch=self.branch,
|
48
|
-
is_main=(name == "main"),
|
95
|
+
is_main=(record['name'] == "main"),
|
49
96
|
)
|
50
97
|
tenants.append(tenant)
|
51
98
|
|
52
99
|
return tenants
|
53
100
|
|
54
101
|
def create_tenant(
|
55
|
-
self, tenant_name: str, description: Optional[str] = None
|
102
|
+
self, tenant_name: str, description: Optional[str] = None, lazy: bool = True
|
56
103
|
) -> Tenant:
|
57
104
|
"""Create a new tenant by copying schema from main tenant.
|
58
105
|
|
59
106
|
Args:
|
60
107
|
tenant_name: Name for the new tenant
|
61
108
|
description: Optional description
|
109
|
+
lazy: If True, don't create database file until first use
|
62
110
|
|
63
111
|
Returns:
|
64
112
|
Created Tenant object
|
65
113
|
|
66
114
|
Raises:
|
67
|
-
ValueError: If tenant already exists
|
115
|
+
ValueError: If tenant already exists or uses reserved name
|
68
116
|
InvalidNameError: If tenant name is invalid
|
69
117
|
MaintenanceError: If branch is in maintenance mode
|
70
118
|
"""
|
119
|
+
# Check for reserved name
|
120
|
+
if tenant_name == self._empty_tenant_name:
|
121
|
+
raise ValueError(f"'{self._empty_tenant_name}' is a reserved tenant name")
|
122
|
+
|
71
123
|
# Validate tenant name
|
72
124
|
validate_name(tenant_name, "tenant")
|
73
125
|
|
74
126
|
# Check maintenance mode
|
75
127
|
check_maintenance_mode(self.project_root, self.database, self.branch)
|
128
|
+
|
129
|
+
# Ensure initialization
|
130
|
+
self._ensure_initialized()
|
131
|
+
|
132
|
+
if not self.branch_id:
|
133
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
76
134
|
|
77
|
-
#
|
78
|
-
|
135
|
+
# Check if tenant already exists in metadata
|
136
|
+
existing_tenant = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
137
|
+
if existing_tenant:
|
79
138
|
raise ValueError(f"Tenant '{tenant_name}' already exists")
|
80
139
|
|
81
|
-
#
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
#
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
import sqlite3
|
111
|
-
vacuum_conn = sqlite3.connect(str(new_db_path))
|
112
|
-
vacuum_conn.isolation_level = None # Autocommit mode required for VACUUM
|
113
|
-
vacuum_conn.execute("VACUUM")
|
114
|
-
vacuum_conn.close()
|
140
|
+
# Create tenant ID
|
141
|
+
tenant_id = str(uuid.uuid4())
|
142
|
+
|
143
|
+
# Calculate shard for tenant
|
144
|
+
shard = self._calculate_shard(tenant_name)
|
145
|
+
|
146
|
+
# Create tenant in metadata database
|
147
|
+
metadata = {
|
148
|
+
"description": description,
|
149
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
150
|
+
}
|
151
|
+
self.metadata_db.create_tenant(tenant_id, self.branch_id, tenant_name, shard, metadata)
|
152
|
+
|
153
|
+
if not lazy:
|
154
|
+
# Ensure __empty__ tenant exists with current schema
|
155
|
+
self._ensure_empty_tenant()
|
156
|
+
|
157
|
+
# Create actual database file using sharded paths
|
158
|
+
new_db_path = self._get_sharded_tenant_db_path(tenant_name)
|
159
|
+
empty_db_path = self._get_sharded_tenant_db_path(self._empty_tenant_name)
|
160
|
+
|
161
|
+
# Directory creation is handled by _get_sharded_tenant_db_path
|
162
|
+
|
163
|
+
# Copy __empty__ tenant database to new tenant
|
164
|
+
# __empty__ already has 512-byte pages and no data
|
165
|
+
shutil.copy2(empty_db_path, new_db_path)
|
166
|
+
|
167
|
+
# Mark as materialized in metadata
|
168
|
+
self.metadata_db.mark_tenant_materialized(tenant_id)
|
115
169
|
|
116
170
|
return Tenant(
|
117
171
|
name=tenant_name,
|
@@ -121,6 +175,128 @@ class TenantManager:
|
|
121
175
|
is_main=False,
|
122
176
|
)
|
123
177
|
|
178
|
+
def _calculate_shard(self, tenant_name: str) -> str:
|
179
|
+
"""Calculate the shard directory for a tenant using SHA256 hash.
|
180
|
+
|
181
|
+
Args:
|
182
|
+
tenant_name: Name of the tenant
|
183
|
+
|
184
|
+
Returns:
|
185
|
+
Two-character hex string (e.g., "a0", "ff")
|
186
|
+
"""
|
187
|
+
hash_val = hashlib.sha256(tenant_name.encode('utf-8')).hexdigest()
|
188
|
+
return hash_val[:2]
|
189
|
+
|
190
|
+
def _get_sharded_tenant_db_path(self, tenant_name: str) -> Path:
|
191
|
+
"""Get the sharded database path for a tenant using metadata DB lookup.
|
192
|
+
|
193
|
+
Args:
|
194
|
+
tenant_name: Name of the tenant
|
195
|
+
|
196
|
+
Returns:
|
197
|
+
Path to the tenant database file in its shard directory
|
198
|
+
|
199
|
+
Raises:
|
200
|
+
ValueError: If tenant doesn't exist in metadata
|
201
|
+
"""
|
202
|
+
# For __empty__ tenant, calculate shard directly
|
203
|
+
if tenant_name == self._empty_tenant_name:
|
204
|
+
shard = self._calculate_shard(tenant_name)
|
205
|
+
else:
|
206
|
+
# Look up shard from metadata DB
|
207
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
208
|
+
if not tenant_info or not tenant_info.get('shard'):
|
209
|
+
raise ValueError(f"Tenant '{tenant_name}' not found in metadata or missing shard info")
|
210
|
+
shard = tenant_info['shard']
|
211
|
+
|
212
|
+
# Build sharded path
|
213
|
+
branch_path = get_branch_path(self.project_root, self.database, self.branch)
|
214
|
+
tenants_dir = branch_path / "tenants"
|
215
|
+
shard_dir = tenants_dir / shard
|
216
|
+
|
217
|
+
# Ensure shard directory exists
|
218
|
+
shard_dir.mkdir(parents=True, exist_ok=True)
|
219
|
+
|
220
|
+
return shard_dir / f"{tenant_name}.db"
|
221
|
+
|
222
|
+
def _ensure_empty_tenant(self) -> None:
|
223
|
+
"""Ensure the __empty__ tenant exists with current schema.
|
224
|
+
|
225
|
+
This tenant serves as a template for lazy tenants.
|
226
|
+
It's created on-demand when first lazy tenant is read.
|
227
|
+
"""
|
228
|
+
# Ensure initialization
|
229
|
+
self._ensure_initialized()
|
230
|
+
|
231
|
+
if not self.branch_id:
|
232
|
+
return
|
233
|
+
|
234
|
+
# Check if __empty__ exists in metadata
|
235
|
+
empty_tenant = self.metadata_db.get_tenant(self.branch_id, self._empty_tenant_name)
|
236
|
+
|
237
|
+
empty_db_path = self._get_sharded_tenant_db_path(self._empty_tenant_name)
|
238
|
+
|
239
|
+
# Create in metadata if doesn't exist (should already be created during branch/database init)
|
240
|
+
if not empty_tenant:
|
241
|
+
tenant_id = str(uuid.uuid4())
|
242
|
+
shard = self._calculate_shard(self._empty_tenant_name)
|
243
|
+
self.metadata_db.create_tenant(
|
244
|
+
tenant_id, self.branch_id, self._empty_tenant_name, shard,
|
245
|
+
metadata={"system": True, "description": "Template for lazy tenants"}
|
246
|
+
)
|
247
|
+
# Don't mark as materialized yet - it will be when the file is created
|
248
|
+
empty_tenant = {"id": tenant_id}
|
249
|
+
|
250
|
+
# If __empty__ database doesn't exist, create it by copying from main tenant
|
251
|
+
if not empty_db_path.exists():
|
252
|
+
empty_db_path.parent.mkdir(parents=True, exist_ok=True)
|
253
|
+
|
254
|
+
# Get main tenant database path (may need to materialize it first)
|
255
|
+
main_db_path = self._get_sharded_tenant_db_path("main")
|
256
|
+
|
257
|
+
if main_db_path.exists():
|
258
|
+
# Copy main tenant database to __empty__
|
259
|
+
shutil.copy2(main_db_path, empty_db_path)
|
260
|
+
|
261
|
+
# Clear all data from tables (keep schema only)
|
262
|
+
with DatabaseConnection(empty_db_path) as conn:
|
263
|
+
# Get all tables
|
264
|
+
result = conn.execute("""
|
265
|
+
SELECT name FROM sqlite_master
|
266
|
+
WHERE type='table'
|
267
|
+
AND name NOT LIKE 'sqlite_%'
|
268
|
+
""")
|
269
|
+
tables = [row["name"] for row in result.fetchall()]
|
270
|
+
|
271
|
+
# Clear data from each table
|
272
|
+
for table in tables:
|
273
|
+
conn.execute(f"DELETE FROM {table}")
|
274
|
+
|
275
|
+
conn.commit()
|
276
|
+
else:
|
277
|
+
# If main doesn't exist either, create empty database
|
278
|
+
empty_db_path.touch()
|
279
|
+
with DatabaseConnection(empty_db_path):
|
280
|
+
pass # Just initialize with PRAGMAs
|
281
|
+
|
282
|
+
# Optimize with small page size for empty template
|
283
|
+
# We need to rebuild the database with new page size
|
284
|
+
temp_path = empty_db_path.with_suffix('.tmp')
|
285
|
+
|
286
|
+
# Create new database with 512-byte pages
|
287
|
+
vacuum_conn = sqlite3.connect(str(empty_db_path))
|
288
|
+
vacuum_conn.isolation_level = None
|
289
|
+
vacuum_conn.execute("PRAGMA page_size = 512")
|
290
|
+
vacuum_conn.execute(f"VACUUM INTO '{temp_path}'")
|
291
|
+
vacuum_conn.close()
|
292
|
+
|
293
|
+
# Replace original with optimized version
|
294
|
+
shutil.move(str(temp_path), str(empty_db_path))
|
295
|
+
|
296
|
+
# Mark as materialized now that the file exists
|
297
|
+
self.metadata_db.mark_tenant_materialized(empty_tenant['id'])
|
298
|
+
|
299
|
+
|
124
300
|
def delete_tenant(self, tenant_name: str) -> None:
|
125
301
|
"""Delete a tenant.
|
126
302
|
|
@@ -128,36 +304,245 @@ class TenantManager:
|
|
128
304
|
tenant_name: Name of tenant to delete
|
129
305
|
|
130
306
|
Raises:
|
131
|
-
ValueError: If tenant doesn't exist
|
307
|
+
ValueError: If tenant doesn't exist, is main tenant, or is reserved
|
132
308
|
MaintenanceError: If branch is in maintenance mode
|
133
309
|
"""
|
134
310
|
# Check maintenance mode
|
135
311
|
check_maintenance_mode(self.project_root, self.database, self.branch)
|
136
312
|
|
137
|
-
# Can't delete main
|
313
|
+
# Can't delete main or __empty__ tenants
|
138
314
|
if tenant_name == "main":
|
139
315
|
raise ValueError("Cannot delete the main tenant")
|
316
|
+
if tenant_name == self._empty_tenant_name:
|
317
|
+
raise ValueError(f"Cannot delete the reserved '{self._empty_tenant_name}' tenant")
|
140
318
|
|
141
|
-
#
|
142
|
-
|
143
|
-
|
144
|
-
|
319
|
+
# Ensure initialization
|
320
|
+
self._ensure_initialized()
|
321
|
+
|
322
|
+
if not self.branch_id:
|
323
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
324
|
+
|
325
|
+
# Get tenant info from metadata
|
326
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
327
|
+
if not tenant_info:
|
145
328
|
raise ValueError(f"Tenant '{tenant_name}' does not exist")
|
146
329
|
|
147
|
-
# Delete
|
330
|
+
# Delete from metadata database (this handles cascade delete)
|
331
|
+
with self.metadata_db.conn:
|
332
|
+
self.metadata_db.conn.execute(
|
333
|
+
"DELETE FROM tenants WHERE id = ?",
|
334
|
+
(tenant_info['id'],)
|
335
|
+
)
|
336
|
+
|
337
|
+
# Delete tenant database file and related files (if they exist and it's materialized)
|
338
|
+
if tenant_info['materialized']:
|
339
|
+
db_path = get_tenant_db_path(
|
340
|
+
self.project_root, self.database, self.branch, tenant_name
|
341
|
+
)
|
342
|
+
if db_path.exists():
|
343
|
+
db_path.unlink()
|
344
|
+
|
345
|
+
# Also remove WAL and SHM files if they exist
|
346
|
+
wal_path = db_path.with_suffix(".db-wal")
|
347
|
+
shm_path = db_path.with_suffix(".db-shm")
|
348
|
+
|
349
|
+
if wal_path.exists():
|
350
|
+
wal_path.unlink()
|
351
|
+
if shm_path.exists():
|
352
|
+
shm_path.unlink()
|
353
|
+
|
354
|
+
def optimize_all_tenants(self, force: bool = False) -> dict:
|
355
|
+
"""Optimize storage for all materialized tenants in the branch.
|
356
|
+
|
357
|
+
This is designed to be called periodically (e.g., every minute) to:
|
358
|
+
- Reclaim unused space with VACUUM
|
359
|
+
- Adjust page sizes as databases grow
|
360
|
+
- Keep small databases compact
|
361
|
+
|
362
|
+
Args:
|
363
|
+
force: If True, optimize all tenants regardless of size
|
364
|
+
|
365
|
+
Returns:
|
366
|
+
Dictionary with optimization results:
|
367
|
+
- optimized: List of tenant names that were optimized
|
368
|
+
- skipped: List of tenant names that were skipped
|
369
|
+
- errors: List of tuples (tenant_name, error_message)
|
370
|
+
"""
|
371
|
+
results = {
|
372
|
+
"optimized": [],
|
373
|
+
"skipped": [],
|
374
|
+
"errors": []
|
375
|
+
}
|
376
|
+
|
377
|
+
# Ensure initialization
|
378
|
+
self._ensure_initialized()
|
379
|
+
|
380
|
+
if not self.branch_id:
|
381
|
+
return results
|
382
|
+
|
383
|
+
# Get all materialized tenants for this branch
|
384
|
+
tenants = self.metadata_db.list_tenants(self.branch_id, materialized_only=True)
|
385
|
+
|
386
|
+
for tenant in tenants:
|
387
|
+
tenant_name = tenant['name']
|
388
|
+
|
389
|
+
# Skip system tenants unless forced
|
390
|
+
if not force and tenant_name in ["main", self._empty_tenant_name]:
|
391
|
+
results["skipped"].append(tenant_name)
|
392
|
+
continue
|
393
|
+
|
394
|
+
try:
|
395
|
+
optimized = self.optimize_tenant_storage(tenant_name, force=force)
|
396
|
+
if optimized:
|
397
|
+
results["optimized"].append(tenant_name)
|
398
|
+
else:
|
399
|
+
results["skipped"].append(tenant_name)
|
400
|
+
except Exception as e:
|
401
|
+
results["errors"].append((tenant_name, str(e)))
|
402
|
+
|
403
|
+
return results
|
404
|
+
|
405
|
+
def optimize_tenant_storage(self, tenant_name: str, force: bool = False) -> bool:
|
406
|
+
"""Optimize tenant database storage with VACUUM and optional page size adjustment.
|
407
|
+
|
408
|
+
This performs:
|
409
|
+
1. Always: VACUUM to reclaim unused space and defragment
|
410
|
+
2. If needed: Rebuild with optimal page size based on database size
|
411
|
+
|
412
|
+
Args:
|
413
|
+
tenant_name: Name of tenant to optimize
|
414
|
+
force: If True, always perform VACUUM even if page size is optimal
|
415
|
+
|
416
|
+
Returns:
|
417
|
+
True if optimization was performed, False if tenant doesn't exist
|
418
|
+
"""
|
419
|
+
# Ensure initialization
|
420
|
+
self._ensure_initialized()
|
421
|
+
|
422
|
+
if not self.branch_id:
|
423
|
+
return False
|
424
|
+
|
425
|
+
# Skip system tenants
|
426
|
+
if tenant_name in ["main", self._empty_tenant_name]:
|
427
|
+
return False
|
428
|
+
|
429
|
+
# Get tenant info
|
430
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
431
|
+
if not tenant_info or not tenant_info['materialized']:
|
432
|
+
return False
|
433
|
+
|
148
434
|
db_path = get_tenant_db_path(
|
149
435
|
self.project_root, self.database, self.branch, tenant_name
|
150
436
|
)
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
437
|
+
|
438
|
+
if not db_path.exists():
|
439
|
+
return False
|
440
|
+
|
441
|
+
# Check current page size
|
442
|
+
conn = sqlite3.connect(str(db_path))
|
443
|
+
current_page_size = conn.execute("PRAGMA page_size").fetchone()[0]
|
444
|
+
conn.close()
|
445
|
+
|
446
|
+
# Determine optimal page size
|
447
|
+
optimal_page_size = self._get_optimal_page_size(db_path)
|
448
|
+
|
449
|
+
# Decide if we need to rebuild with new page size
|
450
|
+
needs_page_size_change = (current_page_size != optimal_page_size and
|
451
|
+
db_path.stat().st_size > 1024 * 1024) # Only if > 1MB
|
452
|
+
|
453
|
+
if needs_page_size_change:
|
454
|
+
# Rebuild with new page size using VACUUM INTO
|
455
|
+
temp_path = db_path.with_suffix('.tmp')
|
456
|
+
conn = sqlite3.connect(str(db_path))
|
457
|
+
conn.isolation_level = None
|
458
|
+
conn.execute(f"PRAGMA page_size = {optimal_page_size}")
|
459
|
+
conn.execute(f"VACUUM INTO '{temp_path}'")
|
460
|
+
conn.close()
|
461
|
+
|
462
|
+
# Replace original with optimized version
|
463
|
+
shutil.move(str(temp_path), str(db_path))
|
464
|
+
return True
|
465
|
+
elif force or current_page_size == 512:
|
466
|
+
# Just run regular VACUUM to defragment and reclaim space
|
467
|
+
# Always vacuum 512-byte page databases to keep them compact
|
468
|
+
conn = sqlite3.connect(str(db_path))
|
469
|
+
conn.isolation_level = None
|
470
|
+
conn.execute("VACUUM")
|
471
|
+
conn.close()
|
472
|
+
return True
|
473
|
+
|
474
|
+
return False
|
475
|
+
|
476
|
+
def _get_optimal_page_size(self, db_path: Path) -> int:
|
477
|
+
"""Determine optimal page size based on database file size.
|
478
|
+
|
479
|
+
Args:
|
480
|
+
db_path: Path to database file
|
481
|
+
|
482
|
+
Returns:
|
483
|
+
Optimal page size in bytes
|
484
|
+
"""
|
485
|
+
if not db_path.exists():
|
486
|
+
return 512 # Default for new/empty databases
|
487
|
+
|
488
|
+
size_mb = db_path.stat().st_size / (1024 * 1024)
|
489
|
+
|
490
|
+
if size_mb < 0.1: # < 100KB
|
491
|
+
return 512
|
492
|
+
elif size_mb < 10: # < 10MB
|
493
|
+
return 4096 # 4KB - good balance for small-medium DBs
|
494
|
+
elif size_mb < 100: # < 100MB
|
495
|
+
return 8192 # 8KB - better for larger rows
|
496
|
+
else: # >= 100MB
|
497
|
+
return 16384 # 16KB - optimal for bulk operations
|
498
|
+
|
499
|
+
def materialize_tenant(self, tenant_name: str) -> None:
|
500
|
+
"""Materialize a lazy tenant into an actual database file.
|
501
|
+
|
502
|
+
Args:
|
503
|
+
tenant_name: Name of the tenant to materialize
|
504
|
+
|
505
|
+
Raises:
|
506
|
+
ValueError: If tenant doesn't exist or is already materialized
|
507
|
+
"""
|
508
|
+
# Ensure initialization
|
509
|
+
self._ensure_initialized()
|
510
|
+
|
511
|
+
if not self.branch_id:
|
512
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
513
|
+
|
514
|
+
# Get tenant info from metadata
|
515
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
516
|
+
if not tenant_info:
|
517
|
+
raise ValueError(f"Tenant '{tenant_name}' does not exist")
|
518
|
+
|
519
|
+
# Check if already materialized
|
520
|
+
if tenant_info['materialized']:
|
521
|
+
return # Already materialized
|
522
|
+
|
523
|
+
db_path = get_tenant_db_path(
|
524
|
+
self.project_root, self.database, self.branch, tenant_name
|
525
|
+
)
|
526
|
+
|
527
|
+
# Ensure tenants directory exists
|
528
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
529
|
+
|
530
|
+
# Ensure __empty__ tenant exists with current schema
|
531
|
+
self._ensure_empty_tenant()
|
532
|
+
|
533
|
+
# Get __empty__ tenant path for schema copy
|
534
|
+
empty_db_path = get_tenant_db_path(
|
535
|
+
self.project_root, self.database, self.branch, self._empty_tenant_name
|
536
|
+
)
|
537
|
+
|
538
|
+
# Copy __empty__ tenant database to new tenant
|
539
|
+
shutil.copy2(empty_db_path, db_path)
|
540
|
+
|
541
|
+
# No need to vacuum when copying from __empty__ since it's already optimized
|
542
|
+
# The __empty__ template already has 512-byte pages and is vacuumed
|
543
|
+
|
544
|
+
# Mark as materialized in metadata database
|
545
|
+
self.metadata_db.mark_tenant_materialized(tenant_info['id'])
|
161
546
|
|
162
547
|
def copy_tenant(self, source_tenant: str, target_tenant: str) -> Tenant:
|
163
548
|
"""Copy a tenant to a new tenant.
|
@@ -189,17 +574,33 @@ class TenantManager:
|
|
189
574
|
# Validate target doesn't exist
|
190
575
|
if target_tenant in list_tenants(self.project_root, self.database, self.branch):
|
191
576
|
raise ValueError(f"Tenant '{target_tenant}' already exists")
|
577
|
+
|
578
|
+
# Ensure initialization
|
579
|
+
self._ensure_initialized()
|
580
|
+
|
581
|
+
if not self.branch_id:
|
582
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
192
583
|
|
193
|
-
#
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
584
|
+
# Create tenant in metadata database first with shard
|
585
|
+
tenant_id = str(uuid.uuid4())
|
586
|
+
target_shard = self._calculate_shard(target_tenant)
|
587
|
+
metadata = {
|
588
|
+
"description": f"Copied from {source_tenant}",
|
589
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
590
|
+
}
|
591
|
+
self.metadata_db.create_tenant(tenant_id, self.branch_id, target_tenant, target_shard, metadata)
|
592
|
+
|
593
|
+
# Get paths using sharded approach
|
594
|
+
source_path = self._get_sharded_tenant_db_path(source_tenant)
|
595
|
+
target_path = self._get_sharded_tenant_db_path(target_tenant)
|
596
|
+
|
597
|
+
# Directory creation is handled by _get_sharded_tenant_db_path
|
200
598
|
|
201
599
|
# Copy database file
|
202
600
|
shutil.copy2(source_path, target_path)
|
601
|
+
|
602
|
+
# Mark as materialized since we copied a physical file
|
603
|
+
self.metadata_db.mark_tenant_materialized(tenant_id)
|
203
604
|
|
204
605
|
return Tenant(
|
205
606
|
name=target_tenant,
|
@@ -233,47 +634,268 @@ class TenantManager:
|
|
233
634
|
# Validate new doesn't exist
|
234
635
|
if new_name in list_tenants(self.project_root, self.database, self.branch):
|
235
636
|
raise ValueError(f"Tenant '{new_name}' already exists")
|
637
|
+
|
638
|
+
# Ensure initialization
|
639
|
+
self._ensure_initialized()
|
640
|
+
|
641
|
+
if not self.branch_id:
|
642
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
643
|
+
|
644
|
+
# Get tenant info from metadata
|
645
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, old_name)
|
646
|
+
if not tenant_info:
|
647
|
+
raise ValueError(f"Tenant '{old_name}' does not exist in metadata")
|
648
|
+
|
649
|
+
# Get old path before updating metadata (if materialized)
|
650
|
+
old_path = None
|
651
|
+
new_path = None
|
652
|
+
if tenant_info['materialized']:
|
653
|
+
# Calculate paths before metadata update
|
654
|
+
old_shard = tenant_info['shard']
|
655
|
+
new_shard = self._calculate_shard(new_name)
|
656
|
+
|
657
|
+
branch_path = get_branch_path(self.project_root, self.database, self.branch)
|
658
|
+
tenants_dir = branch_path / "tenants"
|
659
|
+
|
660
|
+
old_path = tenants_dir / old_shard / f"{old_name}.db"
|
661
|
+
new_shard_dir = tenants_dir / new_shard
|
662
|
+
new_shard_dir.mkdir(parents=True, exist_ok=True)
|
663
|
+
new_path = new_shard_dir / f"{new_name}.db"
|
664
|
+
|
665
|
+
# Update metadata database
|
666
|
+
new_shard = self._calculate_shard(new_name)
|
667
|
+
with self.metadata_db.conn:
|
668
|
+
self.metadata_db.conn.execute(
|
669
|
+
"UPDATE tenants SET name = ?, shard = ? WHERE id = ?",
|
670
|
+
(new_name, new_shard, tenant_info['id'])
|
671
|
+
)
|
236
672
|
|
237
|
-
#
|
238
|
-
old_path
|
239
|
-
self.project_root, self.database, self.branch, old_name
|
240
|
-
)
|
241
|
-
new_path = get_tenant_db_path(
|
242
|
-
self.project_root, self.database, self.branch, new_name
|
243
|
-
)
|
673
|
+
# Rename physical files if tenant is materialized
|
674
|
+
if tenant_info['materialized'] and old_path and new_path:
|
244
675
|
|
245
|
-
|
246
|
-
|
676
|
+
# Rename database file if it exists
|
677
|
+
if old_path.exists():
|
678
|
+
new_path.parent.mkdir(parents=True, exist_ok=True)
|
679
|
+
old_path.rename(new_path)
|
247
680
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
681
|
+
# Also rename WAL and SHM files if they exist
|
682
|
+
old_wal = old_path.with_suffix(".db-wal")
|
683
|
+
old_shm = old_path.with_suffix(".db-shm")
|
684
|
+
new_wal = new_path.with_suffix(".db-wal")
|
685
|
+
new_shm = new_path.with_suffix(".db-shm")
|
253
686
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
687
|
+
if old_wal.exists():
|
688
|
+
old_wal.rename(new_wal)
|
689
|
+
if old_shm.exists():
|
690
|
+
old_shm.rename(new_shm)
|
258
691
|
|
259
|
-
def
|
692
|
+
def get_tenant_size(self, tenant_name: str) -> dict:
|
693
|
+
"""Get storage size information for a tenant.
|
694
|
+
|
695
|
+
Args:
|
696
|
+
tenant_name: Name of the tenant
|
697
|
+
|
698
|
+
Returns:
|
699
|
+
Dictionary with size information:
|
700
|
+
- name: Tenant name
|
701
|
+
- materialized: Whether tenant is materialized
|
702
|
+
- size_bytes: Size in bytes (0 if lazy)
|
703
|
+
- size_kb: Size in KB
|
704
|
+
- size_mb: Size in MB
|
705
|
+
- page_size: SQLite page size (if materialized)
|
706
|
+
- page_count: Number of pages (if materialized)
|
707
|
+
|
708
|
+
Raises:
|
709
|
+
ValueError: If tenant doesn't exist
|
710
|
+
"""
|
711
|
+
# Ensure initialization
|
712
|
+
self._ensure_initialized()
|
713
|
+
|
714
|
+
if not self.branch_id:
|
715
|
+
raise ValueError(f"Branch '{self.branch}' not found")
|
716
|
+
|
717
|
+
# Get tenant info from metadata
|
718
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
719
|
+
if not tenant_info:
|
720
|
+
raise ValueError(f"Tenant '{tenant_name}' does not exist")
|
721
|
+
|
722
|
+
result = {
|
723
|
+
"name": tenant_name,
|
724
|
+
"materialized": tenant_info['materialized'],
|
725
|
+
"size_bytes": 0,
|
726
|
+
"size_kb": 0.0,
|
727
|
+
"size_mb": 0.0,
|
728
|
+
"page_size": None,
|
729
|
+
"page_count": None
|
730
|
+
}
|
731
|
+
|
732
|
+
# If not materialized, return zeros
|
733
|
+
if not tenant_info['materialized']:
|
734
|
+
return result
|
735
|
+
|
736
|
+
# Get actual file size
|
737
|
+
db_path = get_tenant_db_path(
|
738
|
+
self.project_root, self.database, self.branch, tenant_name
|
739
|
+
)
|
740
|
+
|
741
|
+
if db_path.exists():
|
742
|
+
size_bytes = db_path.stat().st_size
|
743
|
+
result["size_bytes"] = size_bytes
|
744
|
+
result["size_kb"] = size_bytes / 1024
|
745
|
+
result["size_mb"] = size_bytes / (1024 * 1024)
|
746
|
+
|
747
|
+
# Get page information
|
748
|
+
try:
|
749
|
+
conn = sqlite3.connect(str(db_path))
|
750
|
+
result["page_size"] = conn.execute("PRAGMA page_size").fetchone()[0]
|
751
|
+
result["page_count"] = conn.execute("PRAGMA page_count").fetchone()[0]
|
752
|
+
conn.close()
|
753
|
+
except Exception:
|
754
|
+
pass # Ignore errors reading page info
|
755
|
+
|
756
|
+
return result
|
757
|
+
|
758
|
+
def get_all_tenant_sizes(self) -> dict:
|
759
|
+
"""Get storage size information for all tenants in the branch.
|
760
|
+
|
761
|
+
Returns:
|
762
|
+
Dictionary with:
|
763
|
+
- tenants: List of individual tenant size info
|
764
|
+
- total_size_bytes: Total size of all materialized tenants
|
765
|
+
- total_size_mb: Total size in MB
|
766
|
+
- lazy_count: Number of lazy tenants
|
767
|
+
- materialized_count: Number of materialized tenants
|
768
|
+
"""
|
769
|
+
# Ensure initialization
|
770
|
+
self._ensure_initialized()
|
771
|
+
|
772
|
+
if not self.branch_id:
|
773
|
+
return {
|
774
|
+
"tenants": [],
|
775
|
+
"total_size_bytes": 0,
|
776
|
+
"total_size_mb": 0.0,
|
777
|
+
"lazy_count": 0,
|
778
|
+
"materialized_count": 0
|
779
|
+
}
|
780
|
+
|
781
|
+
# Get all tenants for this branch
|
782
|
+
all_tenants = self.metadata_db.list_tenants(self.branch_id)
|
783
|
+
|
784
|
+
result = {
|
785
|
+
"tenants": [],
|
786
|
+
"total_size_bytes": 0,
|
787
|
+
"total_size_mb": 0.0,
|
788
|
+
"lazy_count": 0,
|
789
|
+
"materialized_count": 0
|
790
|
+
}
|
791
|
+
|
792
|
+
for tenant_info in all_tenants:
|
793
|
+
tenant_name = tenant_info['name']
|
794
|
+
size_info = self.get_tenant_size(tenant_name)
|
795
|
+
result["tenants"].append(size_info)
|
796
|
+
|
797
|
+
if size_info["materialized"]:
|
798
|
+
result["materialized_count"] += 1
|
799
|
+
result["total_size_bytes"] += size_info["size_bytes"]
|
800
|
+
else:
|
801
|
+
result["lazy_count"] += 1
|
802
|
+
|
803
|
+
result["total_size_mb"] = result["total_size_bytes"] / (1024 * 1024)
|
804
|
+
|
805
|
+
# Sort by size descending
|
806
|
+
result["tenants"].sort(key=lambda x: x["size_bytes"], reverse=True)
|
807
|
+
|
808
|
+
return result
|
809
|
+
|
810
|
+
def is_tenant_lazy(self, tenant_name: str) -> bool:
|
811
|
+
"""Check if a tenant is lazy (not materialized).
|
812
|
+
|
813
|
+
Args:
|
814
|
+
tenant_name: Name of the tenant to check
|
815
|
+
|
816
|
+
Returns:
|
817
|
+
True if tenant is lazy, False if materialized
|
818
|
+
"""
|
819
|
+
# Check if it's the __empty__ tenant (always materialized when exists)
|
820
|
+
if tenant_name == self._empty_tenant_name:
|
821
|
+
return False
|
822
|
+
|
823
|
+
# Ensure initialization
|
824
|
+
self._ensure_initialized()
|
825
|
+
|
826
|
+
if not self.branch_id:
|
827
|
+
return False
|
828
|
+
|
829
|
+
# Check metadata database
|
830
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
831
|
+
if not tenant_info:
|
832
|
+
return False
|
833
|
+
|
834
|
+
# Tenant is lazy if it's not materialized
|
835
|
+
return not tenant_info['materialized']
|
836
|
+
|
837
|
+
def get_tenant_db_path_for_operation(self, tenant_name: str, is_write: bool = False) -> Path:
|
838
|
+
"""Get the appropriate database path for a tenant operation.
|
839
|
+
|
840
|
+
For lazy tenants:
|
841
|
+
- Read operations use __empty__ tenant
|
842
|
+
- Write operations trigger materialization
|
843
|
+
|
844
|
+
Args:
|
845
|
+
tenant_name: Name of the tenant
|
846
|
+
is_write: Whether this is for a write operation
|
847
|
+
|
848
|
+
Returns:
|
849
|
+
Path to the appropriate database file
|
850
|
+
|
851
|
+
Raises:
|
852
|
+
ValueError: If tenant doesn't exist
|
853
|
+
"""
|
854
|
+
# Ensure initialization
|
855
|
+
self._ensure_initialized()
|
856
|
+
|
857
|
+
# Check if tenant exists in metadata
|
858
|
+
if not self.branch_id:
|
859
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
860
|
+
|
861
|
+
if tenant_name != self._empty_tenant_name:
|
862
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
863
|
+
if not tenant_info:
|
864
|
+
raise ValueError(f"Tenant '{tenant_name}' does not exist")
|
865
|
+
|
866
|
+
# For lazy tenants
|
867
|
+
if self.is_tenant_lazy(tenant_name):
|
868
|
+
if is_write:
|
869
|
+
# Materialize the tenant for writes
|
870
|
+
self.materialize_tenant(tenant_name)
|
871
|
+
return self._get_sharded_tenant_db_path(tenant_name)
|
872
|
+
else:
|
873
|
+
# Use __empty__ tenant for reads
|
874
|
+
self._ensure_empty_tenant()
|
875
|
+
return self._get_sharded_tenant_db_path(self._empty_tenant_name)
|
876
|
+
else:
|
877
|
+
# For materialized tenants, use their actual database
|
878
|
+
return self._get_sharded_tenant_db_path(tenant_name)
|
879
|
+
|
880
|
+
def get_tenant_connection(self, tenant_name: str, is_write: bool = False) -> DatabaseConnection:
|
260
881
|
"""Get a database connection for a tenant.
|
261
882
|
|
883
|
+
IMPORTANT: The returned connection must be used with a context manager (with statement)
|
884
|
+
to ensure proper resource cleanup and prevent file descriptor leaks.
|
885
|
+
|
262
886
|
Args:
|
263
887
|
tenant_name: Tenant name
|
888
|
+
is_write: Whether this connection will be used for writes
|
264
889
|
|
265
890
|
Returns:
|
266
|
-
DatabaseConnection object
|
891
|
+
DatabaseConnection object (must be used with 'with' statement)
|
267
892
|
|
268
893
|
Raises:
|
269
894
|
ValueError: If tenant doesn't exist
|
895
|
+
|
896
|
+
Example:
|
897
|
+
with tenant_manager.get_tenant_connection("main") as conn:
|
898
|
+
conn.execute("SELECT * FROM table")
|
270
899
|
"""
|
271
|
-
|
272
|
-
self.project_root, self.database, self.branch
|
273
|
-
):
|
274
|
-
raise ValueError(f"Tenant '{tenant_name}' does not exist")
|
275
|
-
|
276
|
-
db_path = get_tenant_db_path(
|
277
|
-
self.project_root, self.database, self.branch, tenant_name
|
278
|
-
)
|
900
|
+
db_path = self.get_tenant_db_path_for_operation(tenant_name, is_write)
|
279
901
|
return DatabaseConnection(db_path)
|