cinchdb 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,25 @@
1
1
  """Tenant management for CinchDB."""
2
2
 
3
+ import hashlib
3
4
  import shutil
5
+ import sqlite3
6
+ import uuid
4
7
  from pathlib import Path
5
8
  from typing import List, Optional
9
+ from datetime import datetime, timezone
6
10
 
7
11
  from cinchdb.models import Tenant
8
12
  from cinchdb.core.path_utils import (
9
13
  get_branch_path,
10
14
  get_tenant_db_path,
15
+ get_database_path,
11
16
  list_tenants,
12
17
  )
13
18
  from cinchdb.core.connection import DatabaseConnection
14
19
  from cinchdb.core.maintenance import check_maintenance_mode
15
20
  from cinchdb.utils.name_validator import validate_name
21
+ from cinchdb.infrastructure.metadata_db import MetadataDB
22
+ from cinchdb.infrastructure.metadata_connection_pool import get_metadata_db
16
23
 
17
24
 
18
25
  class TenantManager:
@@ -30,88 +37,135 @@ class TenantManager:
30
37
  self.database = database
31
38
  self.branch = branch
32
39
  self.branch_path = get_branch_path(self.project_root, database, branch)
33
-
34
- def list_tenants(self) -> List[Tenant]:
40
+ self._empty_tenant_name = "__empty__" # Reserved name for lazy tenant template
41
+
42
+ # Lazy-initialized pooled connection
43
+ self._metadata_db = None
44
+ self.database_id = None
45
+ self.branch_id = None
46
+
47
+ def _ensure_initialized(self) -> None:
48
+ """Ensure metadata connection and IDs are initialized."""
49
+ if self._metadata_db is None:
50
+ self._metadata_db = get_metadata_db(self.project_root)
51
+
52
+ # Initialize database and branch IDs on first access
53
+ if self.database_id is None:
54
+ db_info = self._metadata_db.get_database(self.database)
55
+ if db_info:
56
+ self.database_id = db_info['id']
57
+ branch_info = self._metadata_db.get_branch(self.database_id, self.branch)
58
+ if branch_info:
59
+ self.branch_id = branch_info['id']
60
+
61
+ @property
62
+ def metadata_db(self) -> MetadataDB:
63
+ """Get metadata database connection (lazy-initialized from pool)."""
64
+ self._ensure_initialized()
65
+ return self._metadata_db
66
+
67
+ def list_tenants(self, include_system: bool = False) -> List[Tenant]:
35
68
  """List all tenants in the branch.
36
69
 
70
+ Args:
71
+ include_system: If True, include system tenants like __empty__
72
+
37
73
  Returns:
38
74
  List of Tenant objects
39
75
  """
40
- tenant_names = list_tenants(self.project_root, self.database, self.branch)
76
+ # Ensure initialization
77
+ self._ensure_initialized()
78
+
79
+ if not self.branch_id:
80
+ return []
81
+
82
+ # Get tenants from metadata database
83
+ tenant_records = self.metadata_db.list_tenants(self.branch_id)
41
84
  tenants = []
42
85
 
43
- for name in tenant_names:
86
+ for record in tenant_records:
87
+ # Filter out the __empty__ tenant from user-facing listings unless requested
88
+ if not include_system and record['name'] == self._empty_tenant_name:
89
+ continue
90
+
44
91
  tenant = Tenant(
45
- name=name,
92
+ name=record['name'],
46
93
  database=self.database,
47
94
  branch=self.branch,
48
- is_main=(name == "main"),
95
+ is_main=(record['name'] == "main"),
49
96
  )
50
97
  tenants.append(tenant)
51
98
 
52
99
  return tenants
53
100
 
54
101
  def create_tenant(
55
- self, tenant_name: str, description: Optional[str] = None
102
+ self, tenant_name: str, description: Optional[str] = None, lazy: bool = True
56
103
  ) -> Tenant:
57
104
  """Create a new tenant by copying schema from main tenant.
58
105
 
59
106
  Args:
60
107
  tenant_name: Name for the new tenant
61
108
  description: Optional description
109
+ lazy: If True, don't create database file until first use
62
110
 
63
111
  Returns:
64
112
  Created Tenant object
65
113
 
66
114
  Raises:
67
- ValueError: If tenant already exists
115
+ ValueError: If tenant already exists or uses reserved name
68
116
  InvalidNameError: If tenant name is invalid
69
117
  MaintenanceError: If branch is in maintenance mode
70
118
  """
119
+ # Check for reserved name
120
+ if tenant_name == self._empty_tenant_name:
121
+ raise ValueError(f"'{self._empty_tenant_name}' is a reserved tenant name")
122
+
71
123
  # Validate tenant name
72
124
  validate_name(tenant_name, "tenant")
73
125
 
74
126
  # Check maintenance mode
75
127
  check_maintenance_mode(self.project_root, self.database, self.branch)
128
+
129
+ # Ensure initialization
130
+ self._ensure_initialized()
131
+
132
+ if not self.branch_id:
133
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
76
134
 
77
- # Validate tenant doesn't exist
78
- if tenant_name in list_tenants(self.project_root, self.database, self.branch):
135
+ # Check if tenant already exists in metadata
136
+ existing_tenant = self.metadata_db.get_tenant(self.branch_id, tenant_name)
137
+ if existing_tenant:
79
138
  raise ValueError(f"Tenant '{tenant_name}' already exists")
80
139
 
81
- # Get paths
82
- main_db_path = get_tenant_db_path(
83
- self.project_root, self.database, self.branch, "main"
84
- )
85
- new_db_path = get_tenant_db_path(
86
- self.project_root, self.database, self.branch, tenant_name
87
- )
88
-
89
- # Copy main tenant database to new tenant
90
- shutil.copy2(main_db_path, new_db_path)
91
-
92
- # Clear any data from the copied database (keep schema only)
93
- with DatabaseConnection(new_db_path) as conn:
94
- # Get all tables
95
- result = conn.execute("""
96
- SELECT name FROM sqlite_master
97
- WHERE type='table'
98
- AND name NOT LIKE 'sqlite_%'
99
- """)
100
- tables = [row["name"] for row in result.fetchall()]
101
-
102
- # Clear data from each table
103
- for table in tables:
104
- conn.execute(f"DELETE FROM {table}")
105
-
106
- conn.commit()
107
-
108
- # Vacuum the database to reduce size
109
- # Must use raw sqlite3 connection with autocommit mode for VACUUM
110
- import sqlite3
111
- vacuum_conn = sqlite3.connect(str(new_db_path))
112
- vacuum_conn.isolation_level = None # Autocommit mode required for VACUUM
113
- vacuum_conn.execute("VACUUM")
114
- vacuum_conn.close()
140
+ # Create tenant ID
141
+ tenant_id = str(uuid.uuid4())
142
+
143
+ # Calculate shard for tenant
144
+ shard = self._calculate_shard(tenant_name)
145
+
146
+ # Create tenant in metadata database
147
+ metadata = {
148
+ "description": description,
149
+ "created_at": datetime.now(timezone.utc).isoformat(),
150
+ }
151
+ self.metadata_db.create_tenant(tenant_id, self.branch_id, tenant_name, shard, metadata)
152
+
153
+ if not lazy:
154
+ # Ensure __empty__ tenant exists with current schema
155
+ self._ensure_empty_tenant()
156
+
157
+ # Create actual database file using sharded paths
158
+ new_db_path = self._get_sharded_tenant_db_path(tenant_name)
159
+ empty_db_path = self._get_sharded_tenant_db_path(self._empty_tenant_name)
160
+
161
+ # Directory creation is handled by _get_sharded_tenant_db_path
162
+
163
+ # Copy __empty__ tenant database to new tenant
164
+ # __empty__ already has 512-byte pages and no data
165
+ shutil.copy2(empty_db_path, new_db_path)
166
+
167
+ # Mark as materialized in metadata
168
+ self.metadata_db.mark_tenant_materialized(tenant_id)
115
169
 
116
170
  return Tenant(
117
171
  name=tenant_name,
@@ -121,6 +175,128 @@ class TenantManager:
121
175
  is_main=False,
122
176
  )
123
177
 
178
+ def _calculate_shard(self, tenant_name: str) -> str:
179
+ """Calculate the shard directory for a tenant using SHA256 hash.
180
+
181
+ Args:
182
+ tenant_name: Name of the tenant
183
+
184
+ Returns:
185
+ Two-character hex string (e.g., "a0", "ff")
186
+ """
187
+ hash_val = hashlib.sha256(tenant_name.encode('utf-8')).hexdigest()
188
+ return hash_val[:2]
189
+
190
+ def _get_sharded_tenant_db_path(self, tenant_name: str) -> Path:
191
+ """Get the sharded database path for a tenant using metadata DB lookup.
192
+
193
+ Args:
194
+ tenant_name: Name of the tenant
195
+
196
+ Returns:
197
+ Path to the tenant database file in its shard directory
198
+
199
+ Raises:
200
+ ValueError: If tenant doesn't exist in metadata
201
+ """
202
+ # For __empty__ tenant, calculate shard directly
203
+ if tenant_name == self._empty_tenant_name:
204
+ shard = self._calculate_shard(tenant_name)
205
+ else:
206
+ # Look up shard from metadata DB
207
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
208
+ if not tenant_info or not tenant_info.get('shard'):
209
+ raise ValueError(f"Tenant '{tenant_name}' not found in metadata or missing shard info")
210
+ shard = tenant_info['shard']
211
+
212
+ # Build sharded path
213
+ branch_path = get_branch_path(self.project_root, self.database, self.branch)
214
+ tenants_dir = branch_path / "tenants"
215
+ shard_dir = tenants_dir / shard
216
+
217
+ # Ensure shard directory exists
218
+ shard_dir.mkdir(parents=True, exist_ok=True)
219
+
220
+ return shard_dir / f"{tenant_name}.db"
221
+
222
+ def _ensure_empty_tenant(self) -> None:
223
+ """Ensure the __empty__ tenant exists with current schema.
224
+
225
+ This tenant serves as a template for lazy tenants.
226
+ It's created on-demand when first lazy tenant is read.
227
+ """
228
+ # Ensure initialization
229
+ self._ensure_initialized()
230
+
231
+ if not self.branch_id:
232
+ return
233
+
234
+ # Check if __empty__ exists in metadata
235
+ empty_tenant = self.metadata_db.get_tenant(self.branch_id, self._empty_tenant_name)
236
+
237
+ empty_db_path = self._get_sharded_tenant_db_path(self._empty_tenant_name)
238
+
239
+ # Create in metadata if doesn't exist (should already be created during branch/database init)
240
+ if not empty_tenant:
241
+ tenant_id = str(uuid.uuid4())
242
+ shard = self._calculate_shard(self._empty_tenant_name)
243
+ self.metadata_db.create_tenant(
244
+ tenant_id, self.branch_id, self._empty_tenant_name, shard,
245
+ metadata={"system": True, "description": "Template for lazy tenants"}
246
+ )
247
+ # Don't mark as materialized yet - it will be when the file is created
248
+ empty_tenant = {"id": tenant_id}
249
+
250
+ # If __empty__ database doesn't exist, create it by copying from main tenant
251
+ if not empty_db_path.exists():
252
+ empty_db_path.parent.mkdir(parents=True, exist_ok=True)
253
+
254
+ # Get main tenant database path (may need to materialize it first)
255
+ main_db_path = self._get_sharded_tenant_db_path("main")
256
+
257
+ if main_db_path.exists():
258
+ # Copy main tenant database to __empty__
259
+ shutil.copy2(main_db_path, empty_db_path)
260
+
261
+ # Clear all data from tables (keep schema only)
262
+ with DatabaseConnection(empty_db_path) as conn:
263
+ # Get all tables
264
+ result = conn.execute("""
265
+ SELECT name FROM sqlite_master
266
+ WHERE type='table'
267
+ AND name NOT LIKE 'sqlite_%'
268
+ """)
269
+ tables = [row["name"] for row in result.fetchall()]
270
+
271
+ # Clear data from each table
272
+ for table in tables:
273
+ conn.execute(f"DELETE FROM {table}")
274
+
275
+ conn.commit()
276
+ else:
277
+ # If main doesn't exist either, create empty database
278
+ empty_db_path.touch()
279
+ with DatabaseConnection(empty_db_path):
280
+ pass # Just initialize with PRAGMAs
281
+
282
+ # Optimize with small page size for empty template
283
+ # We need to rebuild the database with new page size
284
+ temp_path = empty_db_path.with_suffix('.tmp')
285
+
286
+ # Create new database with 512-byte pages
287
+ vacuum_conn = sqlite3.connect(str(empty_db_path))
288
+ vacuum_conn.isolation_level = None
289
+ vacuum_conn.execute("PRAGMA page_size = 512")
290
+ vacuum_conn.execute(f"VACUUM INTO '{temp_path}'")
291
+ vacuum_conn.close()
292
+
293
+ # Replace original with optimized version
294
+ shutil.move(str(temp_path), str(empty_db_path))
295
+
296
+ # Mark as materialized now that the file exists
297
+ self.metadata_db.mark_tenant_materialized(empty_tenant['id'])
298
+
299
+
124
300
  def delete_tenant(self, tenant_name: str) -> None:
125
301
  """Delete a tenant.
126
302
 
@@ -128,36 +304,245 @@ class TenantManager:
128
304
  tenant_name: Name of tenant to delete
129
305
 
130
306
  Raises:
131
- ValueError: If tenant doesn't exist or is main tenant
307
+ ValueError: If tenant doesn't exist, is main tenant, or is reserved
132
308
  MaintenanceError: If branch is in maintenance mode
133
309
  """
134
310
  # Check maintenance mode
135
311
  check_maintenance_mode(self.project_root, self.database, self.branch)
136
312
 
137
- # Can't delete main tenant
313
+ # Can't delete main or __empty__ tenants
138
314
  if tenant_name == "main":
139
315
  raise ValueError("Cannot delete the main tenant")
316
+ if tenant_name == self._empty_tenant_name:
317
+ raise ValueError(f"Cannot delete the reserved '{self._empty_tenant_name}' tenant")
140
318
 
141
- # Validate tenant exists
142
- if tenant_name not in list_tenants(
143
- self.project_root, self.database, self.branch
144
- ):
319
+ # Ensure initialization
320
+ self._ensure_initialized()
321
+
322
+ if not self.branch_id:
323
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
324
+
325
+ # Get tenant info from metadata
326
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
327
+ if not tenant_info:
145
328
  raise ValueError(f"Tenant '{tenant_name}' does not exist")
146
329
 
147
- # Delete tenant database file and related files
330
+ # Delete from metadata database (this handles cascade delete)
331
+ with self.metadata_db.conn:
332
+ self.metadata_db.conn.execute(
333
+ "DELETE FROM tenants WHERE id = ?",
334
+ (tenant_info['id'],)
335
+ )
336
+
337
+ # Delete tenant database file and related files (if they exist and it's materialized)
338
+ if tenant_info['materialized']:
339
+ db_path = get_tenant_db_path(
340
+ self.project_root, self.database, self.branch, tenant_name
341
+ )
342
+ if db_path.exists():
343
+ db_path.unlink()
344
+
345
+ # Also remove WAL and SHM files if they exist
346
+ wal_path = db_path.with_suffix(".db-wal")
347
+ shm_path = db_path.with_suffix(".db-shm")
348
+
349
+ if wal_path.exists():
350
+ wal_path.unlink()
351
+ if shm_path.exists():
352
+ shm_path.unlink()
353
+
354
+ def optimize_all_tenants(self, force: bool = False) -> dict:
355
+ """Optimize storage for all materialized tenants in the branch.
356
+
357
+ This is designed to be called periodically (e.g., every minute) to:
358
+ - Reclaim unused space with VACUUM
359
+ - Adjust page sizes as databases grow
360
+ - Keep small databases compact
361
+
362
+ Args:
363
+ force: If True, optimize all tenants regardless of size
364
+
365
+ Returns:
366
+ Dictionary with optimization results:
367
+ - optimized: List of tenant names that were optimized
368
+ - skipped: List of tenant names that were skipped
369
+ - errors: List of tuples (tenant_name, error_message)
370
+ """
371
+ results = {
372
+ "optimized": [],
373
+ "skipped": [],
374
+ "errors": []
375
+ }
376
+
377
+ # Ensure initialization
378
+ self._ensure_initialized()
379
+
380
+ if not self.branch_id:
381
+ return results
382
+
383
+ # Get all materialized tenants for this branch
384
+ tenants = self.metadata_db.list_tenants(self.branch_id, materialized_only=True)
385
+
386
+ for tenant in tenants:
387
+ tenant_name = tenant['name']
388
+
389
+ # Skip system tenants unless forced
390
+ if not force and tenant_name in ["main", self._empty_tenant_name]:
391
+ results["skipped"].append(tenant_name)
392
+ continue
393
+
394
+ try:
395
+ optimized = self.optimize_tenant_storage(tenant_name, force=force)
396
+ if optimized:
397
+ results["optimized"].append(tenant_name)
398
+ else:
399
+ results["skipped"].append(tenant_name)
400
+ except Exception as e:
401
+ results["errors"].append((tenant_name, str(e)))
402
+
403
+ return results
404
+
405
+ def optimize_tenant_storage(self, tenant_name: str, force: bool = False) -> bool:
406
+ """Optimize tenant database storage with VACUUM and optional page size adjustment.
407
+
408
+ This performs:
409
+ 1. Always: VACUUM to reclaim unused space and defragment
410
+ 2. If needed: Rebuild with optimal page size based on database size
411
+
412
+ Args:
413
+ tenant_name: Name of tenant to optimize
414
+ force: If True, always perform VACUUM even if page size is optimal
415
+
416
+ Returns:
417
+ True if optimization was performed, False if tenant doesn't exist
418
+ """
419
+ # Ensure initialization
420
+ self._ensure_initialized()
421
+
422
+ if not self.branch_id:
423
+ return False
424
+
425
+ # Skip system tenants
426
+ if tenant_name in ["main", self._empty_tenant_name]:
427
+ return False
428
+
429
+ # Get tenant info
430
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
431
+ if not tenant_info or not tenant_info['materialized']:
432
+ return False
433
+
148
434
  db_path = get_tenant_db_path(
149
435
  self.project_root, self.database, self.branch, tenant_name
150
436
  )
151
- db_path.unlink()
152
-
153
- # Also remove WAL and SHM files if they exist
154
- wal_path = db_path.with_suffix(".db-wal")
155
- shm_path = db_path.with_suffix(".db-shm")
156
-
157
- if wal_path.exists():
158
- wal_path.unlink()
159
- if shm_path.exists():
160
- shm_path.unlink()
437
+
438
+ if not db_path.exists():
439
+ return False
440
+
441
+ # Check current page size
442
+ conn = sqlite3.connect(str(db_path))
443
+ current_page_size = conn.execute("PRAGMA page_size").fetchone()[0]
444
+ conn.close()
445
+
446
+ # Determine optimal page size
447
+ optimal_page_size = self._get_optimal_page_size(db_path)
448
+
449
+ # Decide if we need to rebuild with new page size
450
+ needs_page_size_change = (current_page_size != optimal_page_size and
451
+ db_path.stat().st_size > 1024 * 1024) # Only if > 1MB
452
+
453
+ if needs_page_size_change:
454
+ # Rebuild with new page size using VACUUM INTO
455
+ temp_path = db_path.with_suffix('.tmp')
456
+ conn = sqlite3.connect(str(db_path))
457
+ conn.isolation_level = None
458
+ conn.execute(f"PRAGMA page_size = {optimal_page_size}")
459
+ conn.execute(f"VACUUM INTO '{temp_path}'")
460
+ conn.close()
461
+
462
+ # Replace original with optimized version
463
+ shutil.move(str(temp_path), str(db_path))
464
+ return True
465
+ elif force or current_page_size == 512:
466
+ # Just run regular VACUUM to defragment and reclaim space
467
+ # Always vacuum 512-byte page databases to keep them compact
468
+ conn = sqlite3.connect(str(db_path))
469
+ conn.isolation_level = None
470
+ conn.execute("VACUUM")
471
+ conn.close()
472
+ return True
473
+
474
+ return False
475
+
476
+ def _get_optimal_page_size(self, db_path: Path) -> int:
477
+ """Determine optimal page size based on database file size.
478
+
479
+ Args:
480
+ db_path: Path to database file
481
+
482
+ Returns:
483
+ Optimal page size in bytes
484
+ """
485
+ if not db_path.exists():
486
+ return 512 # Default for new/empty databases
487
+
488
+ size_mb = db_path.stat().st_size / (1024 * 1024)
489
+
490
+ if size_mb < 0.1: # < 100KB
491
+ return 512
492
+ elif size_mb < 10: # < 10MB
493
+ return 4096 # 4KB - good balance for small-medium DBs
494
+ elif size_mb < 100: # < 100MB
495
+ return 8192 # 8KB - better for larger rows
496
+ else: # >= 100MB
497
+ return 16384 # 16KB - optimal for bulk operations
498
+
499
+ def materialize_tenant(self, tenant_name: str) -> None:
500
+ """Materialize a lazy tenant into an actual database file.
501
+
502
+ Args:
503
+ tenant_name: Name of the tenant to materialize
504
+
505
+ Raises:
506
+ ValueError: If tenant doesn't exist or is already materialized
507
+ """
508
+ # Ensure initialization
509
+ self._ensure_initialized()
510
+
511
+ if not self.branch_id:
512
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
513
+
514
+ # Get tenant info from metadata
515
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
516
+ if not tenant_info:
517
+ raise ValueError(f"Tenant '{tenant_name}' does not exist")
518
+
519
+ # Check if already materialized
520
+ if tenant_info['materialized']:
521
+ return # Already materialized
522
+
523
+ db_path = get_tenant_db_path(
524
+ self.project_root, self.database, self.branch, tenant_name
525
+ )
526
+
527
+ # Ensure tenants directory exists
528
+ db_path.parent.mkdir(parents=True, exist_ok=True)
529
+
530
+ # Ensure __empty__ tenant exists with current schema
531
+ self._ensure_empty_tenant()
532
+
533
+ # Get __empty__ tenant path for schema copy
534
+ empty_db_path = get_tenant_db_path(
535
+ self.project_root, self.database, self.branch, self._empty_tenant_name
536
+ )
537
+
538
+ # Copy __empty__ tenant database to new tenant
539
+ shutil.copy2(empty_db_path, db_path)
540
+
541
+ # No need to vacuum when copying from __empty__ since it's already optimized
542
+ # The __empty__ template already has 512-byte pages and is vacuumed
543
+
544
+ # Mark as materialized in metadata database
545
+ self.metadata_db.mark_tenant_materialized(tenant_info['id'])
161
546
 
162
547
  def copy_tenant(self, source_tenant: str, target_tenant: str) -> Tenant:
163
548
  """Copy a tenant to a new tenant.
@@ -189,17 +574,33 @@ class TenantManager:
189
574
  # Validate target doesn't exist
190
575
  if target_tenant in list_tenants(self.project_root, self.database, self.branch):
191
576
  raise ValueError(f"Tenant '{target_tenant}' already exists")
577
+
578
+ # Ensure initialization
579
+ self._ensure_initialized()
580
+
581
+ if not self.branch_id:
582
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
192
583
 
193
- # Get paths
194
- source_path = get_tenant_db_path(
195
- self.project_root, self.database, self.branch, source_tenant
196
- )
197
- target_path = get_tenant_db_path(
198
- self.project_root, self.database, self.branch, target_tenant
199
- )
584
+ # Create tenant in metadata database first with shard
585
+ tenant_id = str(uuid.uuid4())
586
+ target_shard = self._calculate_shard(target_tenant)
587
+ metadata = {
588
+ "description": f"Copied from {source_tenant}",
589
+ "created_at": datetime.now(timezone.utc).isoformat(),
590
+ }
591
+ self.metadata_db.create_tenant(tenant_id, self.branch_id, target_tenant, target_shard, metadata)
592
+
593
+ # Get paths using sharded approach
594
+ source_path = self._get_sharded_tenant_db_path(source_tenant)
595
+ target_path = self._get_sharded_tenant_db_path(target_tenant)
596
+
597
+ # Directory creation is handled by _get_sharded_tenant_db_path
200
598
 
201
599
  # Copy database file
202
600
  shutil.copy2(source_path, target_path)
601
+
602
+ # Mark as materialized since we copied a physical file
603
+ self.metadata_db.mark_tenant_materialized(tenant_id)
203
604
 
204
605
  return Tenant(
205
606
  name=target_tenant,
@@ -233,47 +634,268 @@ class TenantManager:
233
634
  # Validate new doesn't exist
234
635
  if new_name in list_tenants(self.project_root, self.database, self.branch):
235
636
  raise ValueError(f"Tenant '{new_name}' already exists")
637
+
638
+ # Ensure initialization
639
+ self._ensure_initialized()
640
+
641
+ if not self.branch_id:
642
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
643
+
644
+ # Get tenant info from metadata
645
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, old_name)
646
+ if not tenant_info:
647
+ raise ValueError(f"Tenant '{old_name}' does not exist in metadata")
648
+
649
+ # Get old path before updating metadata (if materialized)
650
+ old_path = None
651
+ new_path = None
652
+ if tenant_info['materialized']:
653
+ # Calculate paths before metadata update
654
+ old_shard = tenant_info['shard']
655
+ new_shard = self._calculate_shard(new_name)
656
+
657
+ branch_path = get_branch_path(self.project_root, self.database, self.branch)
658
+ tenants_dir = branch_path / "tenants"
659
+
660
+ old_path = tenants_dir / old_shard / f"{old_name}.db"
661
+ new_shard_dir = tenants_dir / new_shard
662
+ new_shard_dir.mkdir(parents=True, exist_ok=True)
663
+ new_path = new_shard_dir / f"{new_name}.db"
664
+
665
+ # Update metadata database
666
+ new_shard = self._calculate_shard(new_name)
667
+ with self.metadata_db.conn:
668
+ self.metadata_db.conn.execute(
669
+ "UPDATE tenants SET name = ?, shard = ? WHERE id = ?",
670
+ (new_name, new_shard, tenant_info['id'])
671
+ )
236
672
 
237
- # Get paths
238
- old_path = get_tenant_db_path(
239
- self.project_root, self.database, self.branch, old_name
240
- )
241
- new_path = get_tenant_db_path(
242
- self.project_root, self.database, self.branch, new_name
243
- )
673
+ # Rename physical files if tenant is materialized
674
+ if tenant_info['materialized'] and old_path and new_path:
244
675
 
245
- # Rename database file
246
- old_path.rename(new_path)
676
+ # Rename database file if it exists
677
+ if old_path.exists():
678
+ new_path.parent.mkdir(parents=True, exist_ok=True)
679
+ old_path.rename(new_path)
247
680
 
248
- # Also rename WAL and SHM files if they exist
249
- old_wal = old_path.with_suffix(".db-wal")
250
- old_shm = old_path.with_suffix(".db-shm")
251
- new_wal = new_path.with_suffix(".db-wal")
252
- new_shm = new_path.with_suffix(".db-shm")
681
+ # Also rename WAL and SHM files if they exist
682
+ old_wal = old_path.with_suffix(".db-wal")
683
+ old_shm = old_path.with_suffix(".db-shm")
684
+ new_wal = new_path.with_suffix(".db-wal")
685
+ new_shm = new_path.with_suffix(".db-shm")
253
686
 
254
- if old_wal.exists():
255
- old_wal.rename(new_wal)
256
- if old_shm.exists():
257
- old_shm.rename(new_shm)
687
+ if old_wal.exists():
688
+ old_wal.rename(new_wal)
689
+ if old_shm.exists():
690
+ old_shm.rename(new_shm)
258
691
 
259
- def get_tenant_connection(self, tenant_name: str) -> DatabaseConnection:
692
+ def get_tenant_size(self, tenant_name: str) -> dict:
693
+ """Get storage size information for a tenant.
694
+
695
+ Args:
696
+ tenant_name: Name of the tenant
697
+
698
+ Returns:
699
+ Dictionary with size information:
700
+ - name: Tenant name
701
+ - materialized: Whether tenant is materialized
702
+ - size_bytes: Size in bytes (0 if lazy)
703
+ - size_kb: Size in KB
704
+ - size_mb: Size in MB
705
+ - page_size: SQLite page size (if materialized)
706
+ - page_count: Number of pages (if materialized)
707
+
708
+ Raises:
709
+ ValueError: If tenant doesn't exist
710
+ """
711
+ # Ensure initialization
712
+ self._ensure_initialized()
713
+
714
+ if not self.branch_id:
715
+ raise ValueError(f"Branch '{self.branch}' not found")
716
+
717
+ # Get tenant info from metadata
718
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
719
+ if not tenant_info:
720
+ raise ValueError(f"Tenant '{tenant_name}' does not exist")
721
+
722
+ result = {
723
+ "name": tenant_name,
724
+ "materialized": tenant_info['materialized'],
725
+ "size_bytes": 0,
726
+ "size_kb": 0.0,
727
+ "size_mb": 0.0,
728
+ "page_size": None,
729
+ "page_count": None
730
+ }
731
+
732
+ # If not materialized, return zeros
733
+ if not tenant_info['materialized']:
734
+ return result
735
+
736
+ # Get actual file size
737
+ db_path = get_tenant_db_path(
738
+ self.project_root, self.database, self.branch, tenant_name
739
+ )
740
+
741
+ if db_path.exists():
742
+ size_bytes = db_path.stat().st_size
743
+ result["size_bytes"] = size_bytes
744
+ result["size_kb"] = size_bytes / 1024
745
+ result["size_mb"] = size_bytes / (1024 * 1024)
746
+
747
+ # Get page information
748
+ try:
749
+ conn = sqlite3.connect(str(db_path))
750
+ result["page_size"] = conn.execute("PRAGMA page_size").fetchone()[0]
751
+ result["page_count"] = conn.execute("PRAGMA page_count").fetchone()[0]
752
+ conn.close()
753
+ except Exception:
754
+ pass # Ignore errors reading page info
755
+
756
+ return result
757
+
758
+ def get_all_tenant_sizes(self) -> dict:
759
+ """Get storage size information for all tenants in the branch.
760
+
761
+ Returns:
762
+ Dictionary with:
763
+ - tenants: List of individual tenant size info
764
+ - total_size_bytes: Total size of all materialized tenants
765
+ - total_size_mb: Total size in MB
766
+ - lazy_count: Number of lazy tenants
767
+ - materialized_count: Number of materialized tenants
768
+ """
769
+ # Ensure initialization
770
+ self._ensure_initialized()
771
+
772
+ if not self.branch_id:
773
+ return {
774
+ "tenants": [],
775
+ "total_size_bytes": 0,
776
+ "total_size_mb": 0.0,
777
+ "lazy_count": 0,
778
+ "materialized_count": 0
779
+ }
780
+
781
+ # Get all tenants for this branch
782
+ all_tenants = self.metadata_db.list_tenants(self.branch_id)
783
+
784
+ result = {
785
+ "tenants": [],
786
+ "total_size_bytes": 0,
787
+ "total_size_mb": 0.0,
788
+ "lazy_count": 0,
789
+ "materialized_count": 0
790
+ }
791
+
792
+ for tenant_info in all_tenants:
793
+ tenant_name = tenant_info['name']
794
+ size_info = self.get_tenant_size(tenant_name)
795
+ result["tenants"].append(size_info)
796
+
797
+ if size_info["materialized"]:
798
+ result["materialized_count"] += 1
799
+ result["total_size_bytes"] += size_info["size_bytes"]
800
+ else:
801
+ result["lazy_count"] += 1
802
+
803
+ result["total_size_mb"] = result["total_size_bytes"] / (1024 * 1024)
804
+
805
+ # Sort by size descending
806
+ result["tenants"].sort(key=lambda x: x["size_bytes"], reverse=True)
807
+
808
+ return result
809
+
810
+ def is_tenant_lazy(self, tenant_name: str) -> bool:
811
+ """Check if a tenant is lazy (not materialized).
812
+
813
+ Args:
814
+ tenant_name: Name of the tenant to check
815
+
816
+ Returns:
817
+ True if tenant is lazy, False if materialized
818
+ """
819
+ # Check if it's the __empty__ tenant (always materialized when exists)
820
+ if tenant_name == self._empty_tenant_name:
821
+ return False
822
+
823
+ # Ensure initialization
824
+ self._ensure_initialized()
825
+
826
+ if not self.branch_id:
827
+ return False
828
+
829
+ # Check metadata database
830
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
831
+ if not tenant_info:
832
+ return False
833
+
834
+ # Tenant is lazy if it's not materialized
835
+ return not tenant_info['materialized']
836
+
837
+ def get_tenant_db_path_for_operation(self, tenant_name: str, is_write: bool = False) -> Path:
838
+ """Get the appropriate database path for a tenant operation.
839
+
840
+ For lazy tenants:
841
+ - Read operations use __empty__ tenant
842
+ - Write operations trigger materialization
843
+
844
+ Args:
845
+ tenant_name: Name of the tenant
846
+ is_write: Whether this is for a write operation
847
+
848
+ Returns:
849
+ Path to the appropriate database file
850
+
851
+ Raises:
852
+ ValueError: If tenant doesn't exist
853
+ """
854
+ # Ensure initialization
855
+ self._ensure_initialized()
856
+
857
+ # Check if tenant exists in metadata
858
+ if not self.branch_id:
859
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
860
+
861
+ if tenant_name != self._empty_tenant_name:
862
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
863
+ if not tenant_info:
864
+ raise ValueError(f"Tenant '{tenant_name}' does not exist")
865
+
866
+ # For lazy tenants
867
+ if self.is_tenant_lazy(tenant_name):
868
+ if is_write:
869
+ # Materialize the tenant for writes
870
+ self.materialize_tenant(tenant_name)
871
+ return self._get_sharded_tenant_db_path(tenant_name)
872
+ else:
873
+ # Use __empty__ tenant for reads
874
+ self._ensure_empty_tenant()
875
+ return self._get_sharded_tenant_db_path(self._empty_tenant_name)
876
+ else:
877
+ # For materialized tenants, use their actual database
878
+ return self._get_sharded_tenant_db_path(tenant_name)
879
+
880
+ def get_tenant_connection(self, tenant_name: str, is_write: bool = False) -> DatabaseConnection:
260
881
  """Get a database connection for a tenant.
261
882
 
883
+ IMPORTANT: The returned connection must be used with a context manager (with statement)
884
+ to ensure proper resource cleanup and prevent file descriptor leaks.
885
+
262
886
  Args:
263
887
  tenant_name: Tenant name
888
+ is_write: Whether this connection will be used for writes
264
889
 
265
890
  Returns:
266
- DatabaseConnection object
891
+ DatabaseConnection object (must be used with 'with' statement)
267
892
 
268
893
  Raises:
269
894
  ValueError: If tenant doesn't exist
895
+
896
+ Example:
897
+ with tenant_manager.get_tenant_connection("main") as conn:
898
+ conn.execute("SELECT * FROM table")
270
899
  """
271
- if tenant_name not in list_tenants(
272
- self.project_root, self.database, self.branch
273
- ):
274
- raise ValueError(f"Tenant '{tenant_name}' does not exist")
275
-
276
- db_path = get_tenant_db_path(
277
- self.project_root, self.database, self.branch, tenant_name
278
- )
900
+ db_path = self.get_tenant_db_path_for_operation(tenant_name, is_write)
279
901
  return DatabaseConnection(db_path)