cinchdb 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cinchdb/cli/commands/column.py +3 -4
- cinchdb/cli/commands/database.py +58 -60
- cinchdb/cli/commands/table.py +3 -3
- cinchdb/cli/main.py +1 -7
- cinchdb/cli/utils.py +23 -0
- cinchdb/core/database.py +138 -11
- cinchdb/core/initializer.py +188 -10
- cinchdb/core/path_utils.py +44 -27
- cinchdb/infrastructure/metadata_connection_pool.py +145 -0
- cinchdb/infrastructure/metadata_db.py +376 -0
- cinchdb/managers/branch.py +119 -23
- cinchdb/managers/change_applier.py +30 -13
- cinchdb/managers/column.py +4 -10
- cinchdb/managers/query.py +40 -4
- cinchdb/managers/table.py +8 -6
- cinchdb/managers/tenant.py +698 -167
- cinchdb/models/table.py +0 -4
- cinchdb/models/tenant.py +4 -2
- {cinchdb-0.1.10.dist-info → cinchdb-0.1.12.dist-info}/METADATA +10 -37
- {cinchdb-0.1.10.dist-info → cinchdb-0.1.12.dist-info}/RECORD +23 -21
- {cinchdb-0.1.10.dist-info → cinchdb-0.1.12.dist-info}/WHEEL +0 -0
- {cinchdb-0.1.10.dist-info → cinchdb-0.1.12.dist-info}/entry_points.txt +0 -0
- {cinchdb-0.1.10.dist-info → cinchdb-0.1.12.dist-info}/licenses/LICENSE +0 -0
cinchdb/managers/tenant.py
CHANGED
@@ -1,18 +1,25 @@
|
|
1
1
|
"""Tenant management for CinchDB."""
|
2
2
|
|
3
|
+
import hashlib
|
3
4
|
import shutil
|
5
|
+
import sqlite3
|
6
|
+
import uuid
|
4
7
|
from pathlib import Path
|
5
8
|
from typing import List, Optional
|
9
|
+
from datetime import datetime, timezone
|
6
10
|
|
7
11
|
from cinchdb.models import Tenant
|
8
12
|
from cinchdb.core.path_utils import (
|
9
13
|
get_branch_path,
|
10
14
|
get_tenant_db_path,
|
15
|
+
get_database_path,
|
11
16
|
list_tenants,
|
12
17
|
)
|
13
18
|
from cinchdb.core.connection import DatabaseConnection
|
14
19
|
from cinchdb.core.maintenance import check_maintenance_mode
|
15
20
|
from cinchdb.utils.name_validator import validate_name
|
21
|
+
from cinchdb.infrastructure.metadata_db import MetadataDB
|
22
|
+
from cinchdb.infrastructure.metadata_connection_pool import get_metadata_db
|
16
23
|
|
17
24
|
|
18
25
|
class TenantManager:
|
@@ -30,29 +37,69 @@ class TenantManager:
|
|
30
37
|
self.database = database
|
31
38
|
self.branch = branch
|
32
39
|
self.branch_path = get_branch_path(self.project_root, database, branch)
|
33
|
-
|
34
|
-
|
40
|
+
self._empty_tenant_name = "__empty__" # Reserved name for lazy tenant template
|
41
|
+
|
42
|
+
# Lazy-initialized pooled connection
|
43
|
+
self._metadata_db = None
|
44
|
+
self.database_id = None
|
45
|
+
self.branch_id = None
|
46
|
+
|
47
|
+
def _ensure_initialized(self) -> None:
|
48
|
+
"""Ensure metadata connection and IDs are initialized."""
|
49
|
+
if self._metadata_db is None:
|
50
|
+
self._metadata_db = get_metadata_db(self.project_root)
|
51
|
+
|
52
|
+
# Initialize database and branch IDs on first access
|
53
|
+
if self.database_id is None:
|
54
|
+
db_info = self._metadata_db.get_database(self.database)
|
55
|
+
if db_info:
|
56
|
+
self.database_id = db_info['id']
|
57
|
+
branch_info = self._metadata_db.get_branch(self.database_id, self.branch)
|
58
|
+
if branch_info:
|
59
|
+
self.branch_id = branch_info['id']
|
60
|
+
|
61
|
+
@property
|
62
|
+
def metadata_db(self) -> MetadataDB:
|
63
|
+
"""Get metadata database connection (lazy-initialized from pool)."""
|
64
|
+
self._ensure_initialized()
|
65
|
+
return self._metadata_db
|
66
|
+
|
67
|
+
def list_tenants(self, include_system: bool = False) -> List[Tenant]:
|
35
68
|
"""List all tenants in the branch.
|
36
69
|
|
70
|
+
Args:
|
71
|
+
include_system: If True, include system tenants like __empty__
|
72
|
+
|
37
73
|
Returns:
|
38
74
|
List of Tenant objects
|
39
75
|
"""
|
40
|
-
|
76
|
+
# Ensure initialization
|
77
|
+
self._ensure_initialized()
|
78
|
+
|
79
|
+
if not self.branch_id:
|
80
|
+
return []
|
81
|
+
|
82
|
+
# Get tenants from metadata database
|
83
|
+
tenant_records = self.metadata_db.list_tenants(self.branch_id)
|
41
84
|
tenants = []
|
42
85
|
|
43
|
-
for
|
86
|
+
for record in tenant_records:
|
87
|
+
# Filter out the __empty__ tenant from user-facing listings unless requested
|
88
|
+
if not include_system and record['name'] == self._empty_tenant_name:
|
89
|
+
continue
|
90
|
+
|
44
91
|
tenant = Tenant(
|
45
|
-
name=name,
|
92
|
+
name=record['name'],
|
46
93
|
database=self.database,
|
47
94
|
branch=self.branch,
|
48
|
-
is_main=(name == "main"),
|
95
|
+
is_main=(record['name'] == "main"),
|
49
96
|
)
|
50
97
|
tenants.append(tenant)
|
51
98
|
|
52
99
|
return tenants
|
53
100
|
|
54
101
|
def create_tenant(
|
55
|
-
self, tenant_name: str, description: Optional[str] = None, lazy: bool =
|
102
|
+
self, tenant_name: str, description: Optional[str] = None, lazy: bool = True
|
56
103
|
) -> Tenant:
|
57
104
|
"""Create a new tenant by copying schema from main tenant.
|
58
105
|
|
@@ -65,74 +112,60 @@ class TenantManager:
|
|
65
112
|
Created Tenant object
|
66
113
|
|
67
114
|
Raises:
|
68
|
-
ValueError: If tenant already exists
|
115
|
+
ValueError: If tenant already exists or uses reserved name
|
69
116
|
InvalidNameError: If tenant name is invalid
|
70
117
|
MaintenanceError: If branch is in maintenance mode
|
71
118
|
"""
|
119
|
+
# Check for reserved name
|
120
|
+
if tenant_name == self._empty_tenant_name:
|
121
|
+
raise ValueError(f"'{self._empty_tenant_name}' is a reserved tenant name")
|
122
|
+
|
72
123
|
# Validate tenant name
|
73
124
|
validate_name(tenant_name, "tenant")
|
74
125
|
|
75
126
|
# Check maintenance mode
|
76
127
|
check_maintenance_mode(self.project_root, self.database, self.branch)
|
77
|
-
|
78
|
-
# Check if tenant metadata already exists
|
79
|
-
tenants_dir = self.branch_path / "tenants"
|
80
|
-
tenant_meta_file = tenants_dir / f".{tenant_name}.meta"
|
81
|
-
new_db_path = get_tenant_db_path(
|
82
|
-
self.project_root, self.database, self.branch, tenant_name
|
83
|
-
)
|
84
128
|
|
85
|
-
#
|
86
|
-
|
129
|
+
# Ensure initialization
|
130
|
+
self._ensure_initialized()
|
131
|
+
|
132
|
+
if not self.branch_id:
|
133
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
134
|
+
|
135
|
+
# Check if tenant already exists in metadata
|
136
|
+
existing_tenant = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
137
|
+
if existing_tenant:
|
87
138
|
raise ValueError(f"Tenant '{tenant_name}' already exists")
|
88
139
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
140
|
+
# Create tenant ID
|
141
|
+
tenant_id = str(uuid.uuid4())
|
142
|
+
|
143
|
+
# Calculate shard for tenant
|
144
|
+
shard = self._calculate_shard(tenant_name)
|
145
|
+
|
146
|
+
# Create tenant in metadata database
|
147
|
+
metadata = {
|
148
|
+
"description": description,
|
149
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
150
|
+
}
|
151
|
+
self.metadata_db.create_tenant(tenant_id, self.branch_id, tenant_name, shard, metadata)
|
152
|
+
|
153
|
+
if not lazy:
|
154
|
+
# Ensure __empty__ tenant exists with current schema
|
155
|
+
self._ensure_empty_tenant()
|
101
156
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
# Create actual database file (existing behavior)
|
106
|
-
main_db_path = get_tenant_db_path(
|
107
|
-
self.project_root, self.database, self.branch, "main"
|
108
|
-
)
|
157
|
+
# Create actual database file using sharded paths
|
158
|
+
new_db_path = self._get_sharded_tenant_db_path(tenant_name)
|
159
|
+
empty_db_path = self._get_sharded_tenant_db_path(self._empty_tenant_name)
|
109
160
|
|
110
|
-
#
|
111
|
-
|
112
|
-
|
113
|
-
#
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
WHERE type='table'
|
119
|
-
AND name NOT LIKE 'sqlite_%'
|
120
|
-
""")
|
121
|
-
tables = [row["name"] for row in result.fetchall()]
|
122
|
-
|
123
|
-
# Clear data from each table
|
124
|
-
for table in tables:
|
125
|
-
conn.execute(f"DELETE FROM {table}")
|
126
|
-
|
127
|
-
conn.commit()
|
128
|
-
|
129
|
-
# Vacuum the database to reduce size
|
130
|
-
# Must use raw sqlite3 connection with autocommit mode for VACUUM
|
131
|
-
import sqlite3
|
132
|
-
vacuum_conn = sqlite3.connect(str(new_db_path))
|
133
|
-
vacuum_conn.isolation_level = None # Autocommit mode required for VACUUM
|
134
|
-
vacuum_conn.execute("VACUUM")
|
135
|
-
vacuum_conn.close()
|
161
|
+
# Directory creation is handled by _get_sharded_tenant_db_path
|
162
|
+
|
163
|
+
# Copy __empty__ tenant database to new tenant
|
164
|
+
# __empty__ already has 512-byte pages and no data
|
165
|
+
shutil.copy2(empty_db_path, new_db_path)
|
166
|
+
|
167
|
+
# Mark as materialized in metadata
|
168
|
+
self.metadata_db.mark_tenant_materialized(tenant_id)
|
136
169
|
|
137
170
|
return Tenant(
|
138
171
|
name=tenant_name,
|
@@ -142,6 +175,128 @@ class TenantManager:
|
|
142
175
|
is_main=False,
|
143
176
|
)
|
144
177
|
|
178
|
+
def _calculate_shard(self, tenant_name: str) -> str:
|
179
|
+
"""Calculate the shard directory for a tenant using SHA256 hash.
|
180
|
+
|
181
|
+
Args:
|
182
|
+
tenant_name: Name of the tenant
|
183
|
+
|
184
|
+
Returns:
|
185
|
+
Two-character hex string (e.g., "a0", "ff")
|
186
|
+
"""
|
187
|
+
hash_val = hashlib.sha256(tenant_name.encode('utf-8')).hexdigest()
|
188
|
+
return hash_val[:2]
|
189
|
+
|
190
|
+
def _get_sharded_tenant_db_path(self, tenant_name: str) -> Path:
|
191
|
+
"""Get the sharded database path for a tenant using metadata DB lookup.
|
192
|
+
|
193
|
+
Args:
|
194
|
+
tenant_name: Name of the tenant
|
195
|
+
|
196
|
+
Returns:
|
197
|
+
Path to the tenant database file in its shard directory
|
198
|
+
|
199
|
+
Raises:
|
200
|
+
ValueError: If tenant doesn't exist in metadata
|
201
|
+
"""
|
202
|
+
# For __empty__ tenant, calculate shard directly
|
203
|
+
if tenant_name == self._empty_tenant_name:
|
204
|
+
shard = self._calculate_shard(tenant_name)
|
205
|
+
else:
|
206
|
+
# Look up shard from metadata DB
|
207
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
208
|
+
if not tenant_info or not tenant_info.get('shard'):
|
209
|
+
raise ValueError(f"Tenant '{tenant_name}' not found in metadata or missing shard info")
|
210
|
+
shard = tenant_info['shard']
|
211
|
+
|
212
|
+
# Build sharded path
|
213
|
+
branch_path = get_branch_path(self.project_root, self.database, self.branch)
|
214
|
+
tenants_dir = branch_path / "tenants"
|
215
|
+
shard_dir = tenants_dir / shard
|
216
|
+
|
217
|
+
# Ensure shard directory exists
|
218
|
+
shard_dir.mkdir(parents=True, exist_ok=True)
|
219
|
+
|
220
|
+
return shard_dir / f"{tenant_name}.db"
|
221
|
+
|
222
|
+
def _ensure_empty_tenant(self) -> None:
|
223
|
+
"""Ensure the __empty__ tenant exists with current schema.
|
224
|
+
|
225
|
+
This tenant serves as a template for lazy tenants.
|
226
|
+
It's created on-demand when first lazy tenant is read.
|
227
|
+
"""
|
228
|
+
# Ensure initialization
|
229
|
+
self._ensure_initialized()
|
230
|
+
|
231
|
+
if not self.branch_id:
|
232
|
+
return
|
233
|
+
|
234
|
+
# Check if __empty__ exists in metadata
|
235
|
+
empty_tenant = self.metadata_db.get_tenant(self.branch_id, self._empty_tenant_name)
|
236
|
+
|
237
|
+
empty_db_path = self._get_sharded_tenant_db_path(self._empty_tenant_name)
|
238
|
+
|
239
|
+
# Create in metadata if doesn't exist (should already be created during branch/database init)
|
240
|
+
if not empty_tenant:
|
241
|
+
tenant_id = str(uuid.uuid4())
|
242
|
+
shard = self._calculate_shard(self._empty_tenant_name)
|
243
|
+
self.metadata_db.create_tenant(
|
244
|
+
tenant_id, self.branch_id, self._empty_tenant_name, shard,
|
245
|
+
metadata={"system": True, "description": "Template for lazy tenants"}
|
246
|
+
)
|
247
|
+
# Don't mark as materialized yet - it will be when the file is created
|
248
|
+
empty_tenant = {"id": tenant_id}
|
249
|
+
|
250
|
+
# If __empty__ database doesn't exist, create it by copying from main tenant
|
251
|
+
if not empty_db_path.exists():
|
252
|
+
empty_db_path.parent.mkdir(parents=True, exist_ok=True)
|
253
|
+
|
254
|
+
# Get main tenant database path (may need to materialize it first)
|
255
|
+
main_db_path = self._get_sharded_tenant_db_path("main")
|
256
|
+
|
257
|
+
if main_db_path.exists():
|
258
|
+
# Copy main tenant database to __empty__
|
259
|
+
shutil.copy2(main_db_path, empty_db_path)
|
260
|
+
|
261
|
+
# Clear all data from tables (keep schema only)
|
262
|
+
with DatabaseConnection(empty_db_path) as conn:
|
263
|
+
# Get all tables
|
264
|
+
result = conn.execute("""
|
265
|
+
SELECT name FROM sqlite_master
|
266
|
+
WHERE type='table'
|
267
|
+
AND name NOT LIKE 'sqlite_%'
|
268
|
+
""")
|
269
|
+
tables = [row["name"] for row in result.fetchall()]
|
270
|
+
|
271
|
+
# Clear data from each table
|
272
|
+
for table in tables:
|
273
|
+
conn.execute(f"DELETE FROM {table}")
|
274
|
+
|
275
|
+
conn.commit()
|
276
|
+
else:
|
277
|
+
# If main doesn't exist either, create empty database
|
278
|
+
empty_db_path.touch()
|
279
|
+
with DatabaseConnection(empty_db_path):
|
280
|
+
pass # Just initialize with PRAGMAs
|
281
|
+
|
282
|
+
# Optimize with small page size for empty template
|
283
|
+
# We need to rebuild the database with new page size
|
284
|
+
temp_path = empty_db_path.with_suffix('.tmp')
|
285
|
+
|
286
|
+
# Create new database with 512-byte pages
|
287
|
+
vacuum_conn = sqlite3.connect(str(empty_db_path))
|
288
|
+
vacuum_conn.isolation_level = None
|
289
|
+
vacuum_conn.execute("PRAGMA page_size = 512")
|
290
|
+
vacuum_conn.execute(f"VACUUM INTO '{temp_path}'")
|
291
|
+
vacuum_conn.close()
|
292
|
+
|
293
|
+
# Replace original with optimized version
|
294
|
+
shutil.move(str(temp_path), str(empty_db_path))
|
295
|
+
|
296
|
+
# Mark as materialized now that the file exists
|
297
|
+
self.metadata_db.mark_tenant_materialized(empty_tenant['id'])
|
298
|
+
|
299
|
+
|
145
300
|
def delete_tenant(self, tenant_name: str) -> None:
|
146
301
|
"""Delete a tenant.
|
147
302
|
|
@@ -149,44 +304,198 @@ class TenantManager:
|
|
149
304
|
tenant_name: Name of tenant to delete
|
150
305
|
|
151
306
|
Raises:
|
152
|
-
ValueError: If tenant doesn't exist
|
307
|
+
ValueError: If tenant doesn't exist, is main tenant, or is reserved
|
153
308
|
MaintenanceError: If branch is in maintenance mode
|
154
309
|
"""
|
155
310
|
# Check maintenance mode
|
156
311
|
check_maintenance_mode(self.project_root, self.database, self.branch)
|
157
312
|
|
158
|
-
# Can't delete main
|
313
|
+
# Can't delete main or __empty__ tenants
|
159
314
|
if tenant_name == "main":
|
160
315
|
raise ValueError("Cannot delete the main tenant")
|
316
|
+
if tenant_name == self._empty_tenant_name:
|
317
|
+
raise ValueError(f"Cannot delete the reserved '{self._empty_tenant_name}' tenant")
|
161
318
|
|
162
|
-
#
|
163
|
-
|
164
|
-
|
165
|
-
|
319
|
+
# Ensure initialization
|
320
|
+
self._ensure_initialized()
|
321
|
+
|
322
|
+
if not self.branch_id:
|
323
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
324
|
+
|
325
|
+
# Get tenant info from metadata
|
326
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
327
|
+
if not tenant_info:
|
166
328
|
raise ValueError(f"Tenant '{tenant_name}' does not exist")
|
167
329
|
|
168
|
-
#
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
330
|
+
# Delete from metadata database (this handles cascade delete)
|
331
|
+
with self.metadata_db.conn:
|
332
|
+
self.metadata_db.conn.execute(
|
333
|
+
"DELETE FROM tenants WHERE id = ?",
|
334
|
+
(tenant_info['id'],)
|
335
|
+
)
|
173
336
|
|
174
|
-
# Delete tenant database file and related files (if they exist)
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
db_path.
|
337
|
+
# Delete tenant database file and related files (if they exist and it's materialized)
|
338
|
+
if tenant_info['materialized']:
|
339
|
+
db_path = get_tenant_db_path(
|
340
|
+
self.project_root, self.database, self.branch, tenant_name
|
341
|
+
)
|
342
|
+
if db_path.exists():
|
343
|
+
db_path.unlink()
|
180
344
|
|
181
|
-
|
182
|
-
|
183
|
-
|
345
|
+
# Also remove WAL and SHM files if they exist
|
346
|
+
wal_path = db_path.with_suffix(".db-wal")
|
347
|
+
shm_path = db_path.with_suffix(".db-shm")
|
184
348
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
349
|
+
if wal_path.exists():
|
350
|
+
wal_path.unlink()
|
351
|
+
if shm_path.exists():
|
352
|
+
shm_path.unlink()
|
189
353
|
|
354
|
+
def optimize_all_tenants(self, force: bool = False) -> dict:
|
355
|
+
"""Optimize storage for all materialized tenants in the branch.
|
356
|
+
|
357
|
+
This is designed to be called periodically (e.g., every minute) to:
|
358
|
+
- Reclaim unused space with VACUUM
|
359
|
+
- Adjust page sizes as databases grow
|
360
|
+
- Keep small databases compact
|
361
|
+
|
362
|
+
Args:
|
363
|
+
force: If True, optimize all tenants regardless of size
|
364
|
+
|
365
|
+
Returns:
|
366
|
+
Dictionary with optimization results:
|
367
|
+
- optimized: List of tenant names that were optimized
|
368
|
+
- skipped: List of tenant names that were skipped
|
369
|
+
- errors: List of tuples (tenant_name, error_message)
|
370
|
+
"""
|
371
|
+
results = {
|
372
|
+
"optimized": [],
|
373
|
+
"skipped": [],
|
374
|
+
"errors": []
|
375
|
+
}
|
376
|
+
|
377
|
+
# Ensure initialization
|
378
|
+
self._ensure_initialized()
|
379
|
+
|
380
|
+
if not self.branch_id:
|
381
|
+
return results
|
382
|
+
|
383
|
+
# Get all materialized tenants for this branch
|
384
|
+
tenants = self.metadata_db.list_tenants(self.branch_id, materialized_only=True)
|
385
|
+
|
386
|
+
for tenant in tenants:
|
387
|
+
tenant_name = tenant['name']
|
388
|
+
|
389
|
+
# Skip system tenants unless forced
|
390
|
+
if not force and tenant_name in ["main", self._empty_tenant_name]:
|
391
|
+
results["skipped"].append(tenant_name)
|
392
|
+
continue
|
393
|
+
|
394
|
+
try:
|
395
|
+
optimized = self.optimize_tenant_storage(tenant_name, force=force)
|
396
|
+
if optimized:
|
397
|
+
results["optimized"].append(tenant_name)
|
398
|
+
else:
|
399
|
+
results["skipped"].append(tenant_name)
|
400
|
+
except Exception as e:
|
401
|
+
results["errors"].append((tenant_name, str(e)))
|
402
|
+
|
403
|
+
return results
|
404
|
+
|
405
|
+
def optimize_tenant_storage(self, tenant_name: str, force: bool = False) -> bool:
|
406
|
+
"""Optimize tenant database storage with VACUUM and optional page size adjustment.
|
407
|
+
|
408
|
+
This performs:
|
409
|
+
1. Always: VACUUM to reclaim unused space and defragment
|
410
|
+
2. If needed: Rebuild with optimal page size based on database size
|
411
|
+
|
412
|
+
Args:
|
413
|
+
tenant_name: Name of tenant to optimize
|
414
|
+
force: If True, always perform VACUUM even if page size is optimal
|
415
|
+
|
416
|
+
Returns:
|
417
|
+
True if optimization was performed, False if tenant doesn't exist
|
418
|
+
"""
|
419
|
+
# Ensure initialization
|
420
|
+
self._ensure_initialized()
|
421
|
+
|
422
|
+
if not self.branch_id:
|
423
|
+
return False
|
424
|
+
|
425
|
+
# Skip system tenants
|
426
|
+
if tenant_name in ["main", self._empty_tenant_name]:
|
427
|
+
return False
|
428
|
+
|
429
|
+
# Get tenant info
|
430
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
431
|
+
if not tenant_info or not tenant_info['materialized']:
|
432
|
+
return False
|
433
|
+
|
434
|
+
db_path = get_tenant_db_path(
|
435
|
+
self.project_root, self.database, self.branch, tenant_name
|
436
|
+
)
|
437
|
+
|
438
|
+
if not db_path.exists():
|
439
|
+
return False
|
440
|
+
|
441
|
+
# Check current page size
|
442
|
+
conn = sqlite3.connect(str(db_path))
|
443
|
+
current_page_size = conn.execute("PRAGMA page_size").fetchone()[0]
|
444
|
+
conn.close()
|
445
|
+
|
446
|
+
# Determine optimal page size
|
447
|
+
optimal_page_size = self._get_optimal_page_size(db_path)
|
448
|
+
|
449
|
+
# Decide if we need to rebuild with new page size
|
450
|
+
needs_page_size_change = (current_page_size != optimal_page_size and
|
451
|
+
db_path.stat().st_size > 1024 * 1024) # Only if > 1MB
|
452
|
+
|
453
|
+
if needs_page_size_change:
|
454
|
+
# Rebuild with new page size using VACUUM INTO
|
455
|
+
temp_path = db_path.with_suffix('.tmp')
|
456
|
+
conn = sqlite3.connect(str(db_path))
|
457
|
+
conn.isolation_level = None
|
458
|
+
conn.execute(f"PRAGMA page_size = {optimal_page_size}")
|
459
|
+
conn.execute(f"VACUUM INTO '{temp_path}'")
|
460
|
+
conn.close()
|
461
|
+
|
462
|
+
# Replace original with optimized version
|
463
|
+
shutil.move(str(temp_path), str(db_path))
|
464
|
+
return True
|
465
|
+
elif force or current_page_size == 512:
|
466
|
+
# Just run regular VACUUM to defragment and reclaim space
|
467
|
+
# Always vacuum 512-byte page databases to keep them compact
|
468
|
+
conn = sqlite3.connect(str(db_path))
|
469
|
+
conn.isolation_level = None
|
470
|
+
conn.execute("VACUUM")
|
471
|
+
conn.close()
|
472
|
+
return True
|
473
|
+
|
474
|
+
return False
|
475
|
+
|
476
|
+
def _get_optimal_page_size(self, db_path: Path) -> int:
|
477
|
+
"""Determine optimal page size based on database file size.
|
478
|
+
|
479
|
+
Args:
|
480
|
+
db_path: Path to database file
|
481
|
+
|
482
|
+
Returns:
|
483
|
+
Optimal page size in bytes
|
484
|
+
"""
|
485
|
+
if not db_path.exists():
|
486
|
+
return 512 # Default for new/empty databases
|
487
|
+
|
488
|
+
size_mb = db_path.stat().st_size / (1024 * 1024)
|
489
|
+
|
490
|
+
if size_mb < 0.1: # < 100KB
|
491
|
+
return 512
|
492
|
+
elif size_mb < 10: # < 10MB
|
493
|
+
return 4096 # 4KB - good balance for small-medium DBs
|
494
|
+
elif size_mb < 100: # < 100MB
|
495
|
+
return 8192 # 8KB - better for larger rows
|
496
|
+
else: # >= 100MB
|
497
|
+
return 16384 # 16KB - optimal for bulk operations
|
498
|
+
|
190
499
|
def materialize_tenant(self, tenant_name: str) -> None:
|
191
500
|
"""Materialize a lazy tenant into an actual database file.
|
192
501
|
|
@@ -196,59 +505,44 @@ class TenantManager:
|
|
196
505
|
Raises:
|
197
506
|
ValueError: If tenant doesn't exist or is already materialized
|
198
507
|
"""
|
199
|
-
|
200
|
-
|
201
|
-
db_path = get_tenant_db_path(
|
202
|
-
self.project_root, self.database, self.branch, tenant_name
|
203
|
-
)
|
508
|
+
# Ensure initialization
|
509
|
+
self._ensure_initialized()
|
204
510
|
|
511
|
+
if not self.branch_id:
|
512
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
513
|
+
|
514
|
+
# Get tenant info from metadata
|
515
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
516
|
+
if not tenant_info:
|
517
|
+
raise ValueError(f"Tenant '{tenant_name}' does not exist")
|
518
|
+
|
205
519
|
# Check if already materialized
|
206
|
-
if
|
520
|
+
if tenant_info['materialized']:
|
207
521
|
return # Already materialized
|
208
522
|
|
209
|
-
|
210
|
-
|
211
|
-
|
523
|
+
db_path = get_tenant_db_path(
|
524
|
+
self.project_root, self.database, self.branch, tenant_name
|
525
|
+
)
|
526
|
+
|
527
|
+
# Ensure tenants directory exists
|
528
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
529
|
+
|
530
|
+
# Ensure __empty__ tenant exists with current schema
|
531
|
+
self._ensure_empty_tenant()
|
212
532
|
|
213
|
-
# Get
|
214
|
-
|
215
|
-
self.project_root, self.database, self.branch,
|
533
|
+
# Get __empty__ tenant path for schema copy
|
534
|
+
empty_db_path = get_tenant_db_path(
|
535
|
+
self.project_root, self.database, self.branch, self._empty_tenant_name
|
216
536
|
)
|
217
537
|
|
218
|
-
# Copy
|
219
|
-
shutil.copy2(
|
220
|
-
|
221
|
-
#
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
WHERE type='table'
|
227
|
-
AND name NOT LIKE 'sqlite_%'
|
228
|
-
""")
|
229
|
-
tables = [row["name"] for row in result.fetchall()]
|
230
|
-
|
231
|
-
# Clear data from each table
|
232
|
-
for table in tables:
|
233
|
-
conn.execute(f"DELETE FROM {table}")
|
234
|
-
|
235
|
-
conn.commit()
|
236
|
-
|
237
|
-
# Vacuum the database to reduce size
|
238
|
-
import sqlite3
|
239
|
-
vacuum_conn = sqlite3.connect(str(db_path))
|
240
|
-
vacuum_conn.isolation_level = None
|
241
|
-
vacuum_conn.execute("VACUUM")
|
242
|
-
vacuum_conn.close()
|
243
|
-
|
244
|
-
# Update metadata to indicate it's no longer lazy
|
245
|
-
import json
|
246
|
-
with open(tenant_meta_file, 'r') as f:
|
247
|
-
metadata = json.load(f)
|
248
|
-
metadata['lazy'] = False
|
249
|
-
metadata['materialized_at'] = Path(db_path).stat().st_mtime
|
250
|
-
with open(tenant_meta_file, 'w') as f:
|
251
|
-
json.dump(metadata, f)
|
538
|
+
# Copy __empty__ tenant database to new tenant
|
539
|
+
shutil.copy2(empty_db_path, db_path)
|
540
|
+
|
541
|
+
# No need to vacuum when copying from __empty__ since it's already optimized
|
542
|
+
# The __empty__ template already has 512-byte pages and is vacuumed
|
543
|
+
|
544
|
+
# Mark as materialized in metadata database
|
545
|
+
self.metadata_db.mark_tenant_materialized(tenant_info['id'])
|
252
546
|
|
253
547
|
def copy_tenant(self, source_tenant: str, target_tenant: str) -> Tenant:
|
254
548
|
"""Copy a tenant to a new tenant.
|
@@ -280,17 +574,33 @@ class TenantManager:
|
|
280
574
|
# Validate target doesn't exist
|
281
575
|
if target_tenant in list_tenants(self.project_root, self.database, self.branch):
|
282
576
|
raise ValueError(f"Tenant '{target_tenant}' already exists")
|
577
|
+
|
578
|
+
# Ensure initialization
|
579
|
+
self._ensure_initialized()
|
580
|
+
|
581
|
+
if not self.branch_id:
|
582
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
283
583
|
|
284
|
-
#
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
584
|
+
# Create tenant in metadata database first with shard
|
585
|
+
tenant_id = str(uuid.uuid4())
|
586
|
+
target_shard = self._calculate_shard(target_tenant)
|
587
|
+
metadata = {
|
588
|
+
"description": f"Copied from {source_tenant}",
|
589
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
590
|
+
}
|
591
|
+
self.metadata_db.create_tenant(tenant_id, self.branch_id, target_tenant, target_shard, metadata)
|
592
|
+
|
593
|
+
# Get paths using sharded approach
|
594
|
+
source_path = self._get_sharded_tenant_db_path(source_tenant)
|
595
|
+
target_path = self._get_sharded_tenant_db_path(target_tenant)
|
596
|
+
|
597
|
+
# Directory creation is handled by _get_sharded_tenant_db_path
|
291
598
|
|
292
599
|
# Copy database file
|
293
600
|
shutil.copy2(source_path, target_path)
|
601
|
+
|
602
|
+
# Mark as materialized since we copied a physical file
|
603
|
+
self.metadata_db.mark_tenant_materialized(tenant_id)
|
294
604
|
|
295
605
|
return Tenant(
|
296
606
|
name=target_tenant,
|
@@ -324,47 +634,268 @@ class TenantManager:
|
|
324
634
|
# Validate new doesn't exist
|
325
635
|
if new_name in list_tenants(self.project_root, self.database, self.branch):
|
326
636
|
raise ValueError(f"Tenant '{new_name}' already exists")
|
637
|
+
|
638
|
+
# Ensure initialization
|
639
|
+
self._ensure_initialized()
|
640
|
+
|
641
|
+
if not self.branch_id:
|
642
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
643
|
+
|
644
|
+
# Get tenant info from metadata
|
645
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, old_name)
|
646
|
+
if not tenant_info:
|
647
|
+
raise ValueError(f"Tenant '{old_name}' does not exist in metadata")
|
648
|
+
|
649
|
+
# Get old path before updating metadata (if materialized)
|
650
|
+
old_path = None
|
651
|
+
new_path = None
|
652
|
+
if tenant_info['materialized']:
|
653
|
+
# Calculate paths before metadata update
|
654
|
+
old_shard = tenant_info['shard']
|
655
|
+
new_shard = self._calculate_shard(new_name)
|
656
|
+
|
657
|
+
branch_path = get_branch_path(self.project_root, self.database, self.branch)
|
658
|
+
tenants_dir = branch_path / "tenants"
|
659
|
+
|
660
|
+
old_path = tenants_dir / old_shard / f"{old_name}.db"
|
661
|
+
new_shard_dir = tenants_dir / new_shard
|
662
|
+
new_shard_dir.mkdir(parents=True, exist_ok=True)
|
663
|
+
new_path = new_shard_dir / f"{new_name}.db"
|
664
|
+
|
665
|
+
# Update metadata database
|
666
|
+
new_shard = self._calculate_shard(new_name)
|
667
|
+
with self.metadata_db.conn:
|
668
|
+
self.metadata_db.conn.execute(
|
669
|
+
"UPDATE tenants SET name = ?, shard = ? WHERE id = ?",
|
670
|
+
(new_name, new_shard, tenant_info['id'])
|
671
|
+
)
|
327
672
|
|
328
|
-
#
|
329
|
-
old_path
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
673
|
+
# Rename physical files if tenant is materialized
|
674
|
+
if tenant_info['materialized'] and old_path and new_path:
|
675
|
+
|
676
|
+
# Rename database file if it exists
|
677
|
+
if old_path.exists():
|
678
|
+
new_path.parent.mkdir(parents=True, exist_ok=True)
|
679
|
+
old_path.rename(new_path)
|
335
680
|
|
336
|
-
|
337
|
-
|
681
|
+
# Also rename WAL and SHM files if they exist
|
682
|
+
old_wal = old_path.with_suffix(".db-wal")
|
683
|
+
old_shm = old_path.with_suffix(".db-shm")
|
684
|
+
new_wal = new_path.with_suffix(".db-wal")
|
685
|
+
new_shm = new_path.with_suffix(".db-shm")
|
338
686
|
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
new_shm = new_path.with_suffix(".db-shm")
|
687
|
+
if old_wal.exists():
|
688
|
+
old_wal.rename(new_wal)
|
689
|
+
if old_shm.exists():
|
690
|
+
old_shm.rename(new_shm)
|
344
691
|
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
692
|
+
def get_tenant_size(self, tenant_name: str) -> dict:
|
693
|
+
"""Get storage size information for a tenant.
|
694
|
+
|
695
|
+
Args:
|
696
|
+
tenant_name: Name of the tenant
|
697
|
+
|
698
|
+
Returns:
|
699
|
+
Dictionary with size information:
|
700
|
+
- name: Tenant name
|
701
|
+
- materialized: Whether tenant is materialized
|
702
|
+
- size_bytes: Size in bytes (0 if lazy)
|
703
|
+
- size_kb: Size in KB
|
704
|
+
- size_mb: Size in MB
|
705
|
+
- page_size: SQLite page size (if materialized)
|
706
|
+
- page_count: Number of pages (if materialized)
|
707
|
+
|
708
|
+
Raises:
|
709
|
+
ValueError: If tenant doesn't exist
|
710
|
+
"""
|
711
|
+
# Ensure initialization
|
712
|
+
self._ensure_initialized()
|
713
|
+
|
714
|
+
if not self.branch_id:
|
715
|
+
raise ValueError(f"Branch '{self.branch}' not found")
|
716
|
+
|
717
|
+
# Get tenant info from metadata
|
718
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
719
|
+
if not tenant_info:
|
720
|
+
raise ValueError(f"Tenant '{tenant_name}' does not exist")
|
721
|
+
|
722
|
+
result = {
|
723
|
+
"name": tenant_name,
|
724
|
+
"materialized": tenant_info['materialized'],
|
725
|
+
"size_bytes": 0,
|
726
|
+
"size_kb": 0.0,
|
727
|
+
"size_mb": 0.0,
|
728
|
+
"page_size": None,
|
729
|
+
"page_count": None
|
730
|
+
}
|
731
|
+
|
732
|
+
# If not materialized, return zeros
|
733
|
+
if not tenant_info['materialized']:
|
734
|
+
return result
|
735
|
+
|
736
|
+
# Get actual file size
|
737
|
+
db_path = get_tenant_db_path(
|
738
|
+
self.project_root, self.database, self.branch, tenant_name
|
739
|
+
)
|
740
|
+
|
741
|
+
if db_path.exists():
|
742
|
+
size_bytes = db_path.stat().st_size
|
743
|
+
result["size_bytes"] = size_bytes
|
744
|
+
result["size_kb"] = size_bytes / 1024
|
745
|
+
result["size_mb"] = size_bytes / (1024 * 1024)
|
746
|
+
|
747
|
+
# Get page information
|
748
|
+
try:
|
749
|
+
conn = sqlite3.connect(str(db_path))
|
750
|
+
result["page_size"] = conn.execute("PRAGMA page_size").fetchone()[0]
|
751
|
+
result["page_count"] = conn.execute("PRAGMA page_count").fetchone()[0]
|
752
|
+
conn.close()
|
753
|
+
except Exception:
|
754
|
+
pass # Ignore errors reading page info
|
755
|
+
|
756
|
+
return result
|
757
|
+
|
758
|
+
def get_all_tenant_sizes(self) -> dict:
|
759
|
+
"""Get storage size information for all tenants in the branch.
|
760
|
+
|
761
|
+
Returns:
|
762
|
+
Dictionary with:
|
763
|
+
- tenants: List of individual tenant size info
|
764
|
+
- total_size_bytes: Total size of all materialized tenants
|
765
|
+
- total_size_mb: Total size in MB
|
766
|
+
- lazy_count: Number of lazy tenants
|
767
|
+
- materialized_count: Number of materialized tenants
|
768
|
+
"""
|
769
|
+
# Ensure initialization
|
770
|
+
self._ensure_initialized()
|
771
|
+
|
772
|
+
if not self.branch_id:
|
773
|
+
return {
|
774
|
+
"tenants": [],
|
775
|
+
"total_size_bytes": 0,
|
776
|
+
"total_size_mb": 0.0,
|
777
|
+
"lazy_count": 0,
|
778
|
+
"materialized_count": 0
|
779
|
+
}
|
780
|
+
|
781
|
+
# Get all tenants for this branch
|
782
|
+
all_tenants = self.metadata_db.list_tenants(self.branch_id)
|
783
|
+
|
784
|
+
result = {
|
785
|
+
"tenants": [],
|
786
|
+
"total_size_bytes": 0,
|
787
|
+
"total_size_mb": 0.0,
|
788
|
+
"lazy_count": 0,
|
789
|
+
"materialized_count": 0
|
790
|
+
}
|
791
|
+
|
792
|
+
for tenant_info in all_tenants:
|
793
|
+
tenant_name = tenant_info['name']
|
794
|
+
size_info = self.get_tenant_size(tenant_name)
|
795
|
+
result["tenants"].append(size_info)
|
796
|
+
|
797
|
+
if size_info["materialized"]:
|
798
|
+
result["materialized_count"] += 1
|
799
|
+
result["total_size_bytes"] += size_info["size_bytes"]
|
800
|
+
else:
|
801
|
+
result["lazy_count"] += 1
|
802
|
+
|
803
|
+
result["total_size_mb"] = result["total_size_bytes"] / (1024 * 1024)
|
804
|
+
|
805
|
+
# Sort by size descending
|
806
|
+
result["tenants"].sort(key=lambda x: x["size_bytes"], reverse=True)
|
807
|
+
|
808
|
+
return result
|
809
|
+
|
810
|
+
def is_tenant_lazy(self, tenant_name: str) -> bool:
|
811
|
+
"""Check if a tenant is lazy (not materialized).
|
812
|
+
|
813
|
+
Args:
|
814
|
+
tenant_name: Name of the tenant to check
|
815
|
+
|
816
|
+
Returns:
|
817
|
+
True if tenant is lazy, False if materialized
|
818
|
+
"""
|
819
|
+
# Check if it's the __empty__ tenant (always materialized when exists)
|
820
|
+
if tenant_name == self._empty_tenant_name:
|
821
|
+
return False
|
822
|
+
|
823
|
+
# Ensure initialization
|
824
|
+
self._ensure_initialized()
|
825
|
+
|
826
|
+
if not self.branch_id:
|
827
|
+
return False
|
828
|
+
|
829
|
+
# Check metadata database
|
830
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
831
|
+
if not tenant_info:
|
832
|
+
return False
|
833
|
+
|
834
|
+
# Tenant is lazy if it's not materialized
|
835
|
+
return not tenant_info['materialized']
|
836
|
+
|
837
|
+
def get_tenant_db_path_for_operation(self, tenant_name: str, is_write: bool = False) -> Path:
|
838
|
+
"""Get the appropriate database path for a tenant operation.
|
839
|
+
|
840
|
+
For lazy tenants:
|
841
|
+
- Read operations use __empty__ tenant
|
842
|
+
- Write operations trigger materialization
|
843
|
+
|
844
|
+
Args:
|
845
|
+
tenant_name: Name of the tenant
|
846
|
+
is_write: Whether this is for a write operation
|
847
|
+
|
848
|
+
Returns:
|
849
|
+
Path to the appropriate database file
|
850
|
+
|
851
|
+
Raises:
|
852
|
+
ValueError: If tenant doesn't exist
|
853
|
+
"""
|
854
|
+
# Ensure initialization
|
855
|
+
self._ensure_initialized()
|
856
|
+
|
857
|
+
# Check if tenant exists in metadata
|
858
|
+
if not self.branch_id:
|
859
|
+
raise ValueError(f"Branch '{self.branch}' not found in metadata database")
|
860
|
+
|
861
|
+
if tenant_name != self._empty_tenant_name:
|
862
|
+
tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
|
863
|
+
if not tenant_info:
|
864
|
+
raise ValueError(f"Tenant '{tenant_name}' does not exist")
|
865
|
+
|
866
|
+
# For lazy tenants
|
867
|
+
if self.is_tenant_lazy(tenant_name):
|
868
|
+
if is_write:
|
869
|
+
# Materialize the tenant for writes
|
870
|
+
self.materialize_tenant(tenant_name)
|
871
|
+
return self._get_sharded_tenant_db_path(tenant_name)
|
872
|
+
else:
|
873
|
+
# Use __empty__ tenant for reads
|
874
|
+
self._ensure_empty_tenant()
|
875
|
+
return self._get_sharded_tenant_db_path(self._empty_tenant_name)
|
876
|
+
else:
|
877
|
+
# For materialized tenants, use their actual database
|
878
|
+
return self._get_sharded_tenant_db_path(tenant_name)
|
349
879
|
|
350
|
-
def get_tenant_connection(self, tenant_name: str) -> DatabaseConnection:
|
880
|
+
def get_tenant_connection(self, tenant_name: str, is_write: bool = False) -> DatabaseConnection:
|
351
881
|
"""Get a database connection for a tenant.
|
352
882
|
|
883
|
+
IMPORTANT: The returned connection must be used with a context manager (with statement)
|
884
|
+
to ensure proper resource cleanup and prevent file descriptor leaks.
|
885
|
+
|
353
886
|
Args:
|
354
887
|
tenant_name: Tenant name
|
888
|
+
is_write: Whether this connection will be used for writes
|
355
889
|
|
356
890
|
Returns:
|
357
|
-
DatabaseConnection object
|
891
|
+
DatabaseConnection object (must be used with 'with' statement)
|
358
892
|
|
359
893
|
Raises:
|
360
894
|
ValueError: If tenant doesn't exist
|
895
|
+
|
896
|
+
Example:
|
897
|
+
with tenant_manager.get_tenant_connection("main") as conn:
|
898
|
+
conn.execute("SELECT * FROM table")
|
361
899
|
"""
|
362
|
-
|
363
|
-
self.project_root, self.database, self.branch
|
364
|
-
):
|
365
|
-
raise ValueError(f"Tenant '{tenant_name}' does not exist")
|
366
|
-
|
367
|
-
db_path = get_tenant_db_path(
|
368
|
-
self.project_root, self.database, self.branch, tenant_name
|
369
|
-
)
|
900
|
+
db_path = self.get_tenant_db_path_for_operation(tenant_name, is_write)
|
370
901
|
return DatabaseConnection(db_path)
|