cinchdb 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,25 @@
1
1
  """Tenant management for CinchDB."""
2
2
 
3
+ import hashlib
3
4
  import shutil
5
+ import sqlite3
6
+ import uuid
4
7
  from pathlib import Path
5
8
  from typing import List, Optional
9
+ from datetime import datetime, timezone
6
10
 
7
11
  from cinchdb.models import Tenant
8
12
  from cinchdb.core.path_utils import (
9
13
  get_branch_path,
10
14
  get_tenant_db_path,
15
+ get_database_path,
11
16
  list_tenants,
12
17
  )
13
18
  from cinchdb.core.connection import DatabaseConnection
14
19
  from cinchdb.core.maintenance import check_maintenance_mode
15
20
  from cinchdb.utils.name_validator import validate_name
21
+ from cinchdb.infrastructure.metadata_db import MetadataDB
22
+ from cinchdb.infrastructure.metadata_connection_pool import get_metadata_db
16
23
 
17
24
 
18
25
  class TenantManager:
@@ -30,29 +37,69 @@ class TenantManager:
30
37
  self.database = database
31
38
  self.branch = branch
32
39
  self.branch_path = get_branch_path(self.project_root, database, branch)
33
-
34
- def list_tenants(self) -> List[Tenant]:
40
+ self._empty_tenant_name = "__empty__" # Reserved name for lazy tenant template
41
+
42
+ # Lazy-initialized pooled connection
43
+ self._metadata_db = None
44
+ self.database_id = None
45
+ self.branch_id = None
46
+
47
+ def _ensure_initialized(self) -> None:
48
+ """Ensure metadata connection and IDs are initialized."""
49
+ if self._metadata_db is None:
50
+ self._metadata_db = get_metadata_db(self.project_root)
51
+
52
+ # Initialize database and branch IDs on first access
53
+ if self.database_id is None:
54
+ db_info = self._metadata_db.get_database(self.database)
55
+ if db_info:
56
+ self.database_id = db_info['id']
57
+ branch_info = self._metadata_db.get_branch(self.database_id, self.branch)
58
+ if branch_info:
59
+ self.branch_id = branch_info['id']
60
+
61
+ @property
62
+ def metadata_db(self) -> MetadataDB:
63
+ """Get metadata database connection (lazy-initialized from pool)."""
64
+ self._ensure_initialized()
65
+ return self._metadata_db
66
+
67
+ def list_tenants(self, include_system: bool = False) -> List[Tenant]:
35
68
  """List all tenants in the branch.
36
69
 
70
+ Args:
71
+ include_system: If True, include system tenants like __empty__
72
+
37
73
  Returns:
38
74
  List of Tenant objects
39
75
  """
40
- tenant_names = list_tenants(self.project_root, self.database, self.branch)
76
+ # Ensure initialization
77
+ self._ensure_initialized()
78
+
79
+ if not self.branch_id:
80
+ return []
81
+
82
+ # Get tenants from metadata database
83
+ tenant_records = self.metadata_db.list_tenants(self.branch_id)
41
84
  tenants = []
42
85
 
43
- for name in tenant_names:
86
+ for record in tenant_records:
87
+ # Filter out the __empty__ tenant from user-facing listings unless requested
88
+ if not include_system and record['name'] == self._empty_tenant_name:
89
+ continue
90
+
44
91
  tenant = Tenant(
45
- name=name,
92
+ name=record['name'],
46
93
  database=self.database,
47
94
  branch=self.branch,
48
- is_main=(name == "main"),
95
+ is_main=(record['name'] == "main"),
49
96
  )
50
97
  tenants.append(tenant)
51
98
 
52
99
  return tenants
53
100
 
54
101
  def create_tenant(
55
- self, tenant_name: str, description: Optional[str] = None, lazy: bool = False
102
+ self, tenant_name: str, description: Optional[str] = None, lazy: bool = True
56
103
  ) -> Tenant:
57
104
  """Create a new tenant by copying schema from main tenant.
58
105
 
@@ -65,74 +112,60 @@ class TenantManager:
65
112
  Created Tenant object
66
113
 
67
114
  Raises:
68
- ValueError: If tenant already exists
115
+ ValueError: If tenant already exists or uses reserved name
69
116
  InvalidNameError: If tenant name is invalid
70
117
  MaintenanceError: If branch is in maintenance mode
71
118
  """
119
+ # Check for reserved name
120
+ if tenant_name == self._empty_tenant_name:
121
+ raise ValueError(f"'{self._empty_tenant_name}' is a reserved tenant name")
122
+
72
123
  # Validate tenant name
73
124
  validate_name(tenant_name, "tenant")
74
125
 
75
126
  # Check maintenance mode
76
127
  check_maintenance_mode(self.project_root, self.database, self.branch)
77
-
78
- # Check if tenant metadata already exists
79
- tenants_dir = self.branch_path / "tenants"
80
- tenant_meta_file = tenants_dir / f".{tenant_name}.meta"
81
- new_db_path = get_tenant_db_path(
82
- self.project_root, self.database, self.branch, tenant_name
83
- )
84
128
 
85
- # Validate tenant doesn't exist (either as file or metadata)
86
- if new_db_path.exists() or tenant_meta_file.exists():
129
+ # Ensure initialization
130
+ self._ensure_initialized()
131
+
132
+ if not self.branch_id:
133
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
134
+
135
+ # Check if tenant already exists in metadata
136
+ existing_tenant = self.metadata_db.get_tenant(self.branch_id, tenant_name)
137
+ if existing_tenant:
87
138
  raise ValueError(f"Tenant '{tenant_name}' already exists")
88
139
 
89
- if lazy:
90
- # Just create metadata file, don't create actual database
91
- tenants_dir.mkdir(parents=True, exist_ok=True)
92
- import json
93
- from datetime import datetime, timezone
94
-
95
- metadata = {
96
- "name": tenant_name,
97
- "description": description,
98
- "created_at": datetime.now(timezone.utc).isoformat(),
99
- "lazy": True
100
- }
140
+ # Create tenant ID
141
+ tenant_id = str(uuid.uuid4())
142
+
143
+ # Calculate shard for tenant
144
+ shard = self._calculate_shard(tenant_name)
145
+
146
+ # Create tenant in metadata database
147
+ metadata = {
148
+ "description": description,
149
+ "created_at": datetime.now(timezone.utc).isoformat(),
150
+ }
151
+ self.metadata_db.create_tenant(tenant_id, self.branch_id, tenant_name, shard, metadata)
152
+
153
+ if not lazy:
154
+ # Ensure __empty__ tenant exists with current schema
155
+ self._ensure_empty_tenant()
101
156
 
102
- with open(tenant_meta_file, 'w') as f:
103
- json.dump(metadata, f)
104
- else:
105
- # Create actual database file (existing behavior)
106
- main_db_path = get_tenant_db_path(
107
- self.project_root, self.database, self.branch, "main"
108
- )
157
+ # Create actual database file using sharded paths
158
+ new_db_path = self._get_sharded_tenant_db_path(tenant_name)
159
+ empty_db_path = self._get_sharded_tenant_db_path(self._empty_tenant_name)
109
160
 
110
- # Copy main tenant database to new tenant
111
- shutil.copy2(main_db_path, new_db_path)
112
-
113
- # Clear any data from the copied database (keep schema only)
114
- with DatabaseConnection(new_db_path) as conn:
115
- # Get all tables
116
- result = conn.execute("""
117
- SELECT name FROM sqlite_master
118
- WHERE type='table'
119
- AND name NOT LIKE 'sqlite_%'
120
- """)
121
- tables = [row["name"] for row in result.fetchall()]
122
-
123
- # Clear data from each table
124
- for table in tables:
125
- conn.execute(f"DELETE FROM {table}")
126
-
127
- conn.commit()
128
-
129
- # Vacuum the database to reduce size
130
- # Must use raw sqlite3 connection with autocommit mode for VACUUM
131
- import sqlite3
132
- vacuum_conn = sqlite3.connect(str(new_db_path))
133
- vacuum_conn.isolation_level = None # Autocommit mode required for VACUUM
134
- vacuum_conn.execute("VACUUM")
135
- vacuum_conn.close()
161
+ # Directory creation is handled by _get_sharded_tenant_db_path
162
+
163
+ # Copy __empty__ tenant database to new tenant
164
+ # __empty__ already has 512-byte pages and no data
165
+ shutil.copy2(empty_db_path, new_db_path)
166
+
167
+ # Mark as materialized in metadata
168
+ self.metadata_db.mark_tenant_materialized(tenant_id)
136
169
 
137
170
  return Tenant(
138
171
  name=tenant_name,
@@ -142,6 +175,128 @@ class TenantManager:
142
175
  is_main=False,
143
176
  )
144
177
 
178
+ def _calculate_shard(self, tenant_name: str) -> str:
179
+ """Calculate the shard directory for a tenant using SHA256 hash.
180
+
181
+ Args:
182
+ tenant_name: Name of the tenant
183
+
184
+ Returns:
185
+ Two-character hex string (e.g., "a0", "ff")
186
+ """
187
+ hash_val = hashlib.sha256(tenant_name.encode('utf-8')).hexdigest()
188
+ return hash_val[:2]
189
+
190
+ def _get_sharded_tenant_db_path(self, tenant_name: str) -> Path:
191
+ """Get the sharded database path for a tenant using metadata DB lookup.
192
+
193
+ Args:
194
+ tenant_name: Name of the tenant
195
+
196
+ Returns:
197
+ Path to the tenant database file in its shard directory
198
+
199
+ Raises:
200
+ ValueError: If tenant doesn't exist in metadata
201
+ """
202
+ # For __empty__ tenant, calculate shard directly
203
+ if tenant_name == self._empty_tenant_name:
204
+ shard = self._calculate_shard(tenant_name)
205
+ else:
206
+ # Look up shard from metadata DB
207
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
208
+ if not tenant_info or not tenant_info.get('shard'):
209
+ raise ValueError(f"Tenant '{tenant_name}' not found in metadata or missing shard info")
210
+ shard = tenant_info['shard']
211
+
212
+ # Build sharded path
213
+ branch_path = get_branch_path(self.project_root, self.database, self.branch)
214
+ tenants_dir = branch_path / "tenants"
215
+ shard_dir = tenants_dir / shard
216
+
217
+ # Ensure shard directory exists
218
+ shard_dir.mkdir(parents=True, exist_ok=True)
219
+
220
+ return shard_dir / f"{tenant_name}.db"
221
+
222
+ def _ensure_empty_tenant(self) -> None:
223
+ """Ensure the __empty__ tenant exists with current schema.
224
+
225
+ This tenant serves as a template for lazy tenants.
226
+ It's created on-demand when first lazy tenant is read.
227
+ """
228
+ # Ensure initialization
229
+ self._ensure_initialized()
230
+
231
+ if not self.branch_id:
232
+ return
233
+
234
+ # Check if __empty__ exists in metadata
235
+ empty_tenant = self.metadata_db.get_tenant(self.branch_id, self._empty_tenant_name)
236
+
237
+ empty_db_path = self._get_sharded_tenant_db_path(self._empty_tenant_name)
238
+
239
+ # Create in metadata if doesn't exist (should already be created during branch/database init)
240
+ if not empty_tenant:
241
+ tenant_id = str(uuid.uuid4())
242
+ shard = self._calculate_shard(self._empty_tenant_name)
243
+ self.metadata_db.create_tenant(
244
+ tenant_id, self.branch_id, self._empty_tenant_name, shard,
245
+ metadata={"system": True, "description": "Template for lazy tenants"}
246
+ )
247
+ # Don't mark as materialized yet - it will be when the file is created
248
+ empty_tenant = {"id": tenant_id}
249
+
250
+ # If __empty__ database doesn't exist, create it by copying from main tenant
251
+ if not empty_db_path.exists():
252
+ empty_db_path.parent.mkdir(parents=True, exist_ok=True)
253
+
254
+ # Get main tenant database path (may need to materialize it first)
255
+ main_db_path = self._get_sharded_tenant_db_path("main")
256
+
257
+ if main_db_path.exists():
258
+ # Copy main tenant database to __empty__
259
+ shutil.copy2(main_db_path, empty_db_path)
260
+
261
+ # Clear all data from tables (keep schema only)
262
+ with DatabaseConnection(empty_db_path) as conn:
263
+ # Get all tables
264
+ result = conn.execute("""
265
+ SELECT name FROM sqlite_master
266
+ WHERE type='table'
267
+ AND name NOT LIKE 'sqlite_%'
268
+ """)
269
+ tables = [row["name"] for row in result.fetchall()]
270
+
271
+ # Clear data from each table
272
+ for table in tables:
273
+ conn.execute(f"DELETE FROM {table}")
274
+
275
+ conn.commit()
276
+ else:
277
+ # If main doesn't exist either, create empty database
278
+ empty_db_path.touch()
279
+ with DatabaseConnection(empty_db_path):
280
+ pass # Just initialize with PRAGMAs
281
+
282
+ # Optimize with small page size for empty template
283
+ # We need to rebuild the database with new page size
284
+ temp_path = empty_db_path.with_suffix('.tmp')
285
+
286
+ # Create new database with 512-byte pages
287
+ vacuum_conn = sqlite3.connect(str(empty_db_path))
288
+ vacuum_conn.isolation_level = None
289
+ vacuum_conn.execute("PRAGMA page_size = 512")
290
+ vacuum_conn.execute(f"VACUUM INTO '{temp_path}'")
291
+ vacuum_conn.close()
292
+
293
+ # Replace original with optimized version
294
+ shutil.move(str(temp_path), str(empty_db_path))
295
+
296
+ # Mark as materialized now that the file exists
297
+ self.metadata_db.mark_tenant_materialized(empty_tenant['id'])
298
+
299
+
145
300
  def delete_tenant(self, tenant_name: str) -> None:
146
301
  """Delete a tenant.
147
302
 
@@ -149,44 +304,198 @@ class TenantManager:
149
304
  tenant_name: Name of tenant to delete
150
305
 
151
306
  Raises:
152
- ValueError: If tenant doesn't exist or is main tenant
307
+ ValueError: If tenant doesn't exist, is main tenant, or is reserved
153
308
  MaintenanceError: If branch is in maintenance mode
154
309
  """
155
310
  # Check maintenance mode
156
311
  check_maintenance_mode(self.project_root, self.database, self.branch)
157
312
 
158
- # Can't delete main tenant
313
+ # Can't delete main or __empty__ tenants
159
314
  if tenant_name == "main":
160
315
  raise ValueError("Cannot delete the main tenant")
316
+ if tenant_name == self._empty_tenant_name:
317
+ raise ValueError(f"Cannot delete the reserved '{self._empty_tenant_name}' tenant")
161
318
 
162
- # Validate tenant exists
163
- if tenant_name not in list_tenants(
164
- self.project_root, self.database, self.branch
165
- ):
319
+ # Ensure initialization
320
+ self._ensure_initialized()
321
+
322
+ if not self.branch_id:
323
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
324
+
325
+ # Get tenant info from metadata
326
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
327
+ if not tenant_info:
166
328
  raise ValueError(f"Tenant '{tenant_name}' does not exist")
167
329
 
168
- # Check for and delete metadata file (for lazy tenants)
169
- tenants_dir = self.branch_path / "tenants"
170
- meta_file = tenants_dir / f".{tenant_name}.meta"
171
- if meta_file.exists():
172
- meta_file.unlink()
330
+ # Delete from metadata database (this handles cascade delete)
331
+ with self.metadata_db.conn:
332
+ self.metadata_db.conn.execute(
333
+ "DELETE FROM tenants WHERE id = ?",
334
+ (tenant_info['id'],)
335
+ )
173
336
 
174
- # Delete tenant database file and related files (if they exist)
175
- db_path = get_tenant_db_path(
176
- self.project_root, self.database, self.branch, tenant_name
177
- )
178
- if db_path.exists():
179
- db_path.unlink()
337
+ # Delete tenant database file and related files (if they exist and it's materialized)
338
+ if tenant_info['materialized']:
339
+ db_path = get_tenant_db_path(
340
+ self.project_root, self.database, self.branch, tenant_name
341
+ )
342
+ if db_path.exists():
343
+ db_path.unlink()
180
344
 
181
- # Also remove WAL and SHM files if they exist
182
- wal_path = db_path.with_suffix(".db-wal")
183
- shm_path = db_path.with_suffix(".db-shm")
345
+ # Also remove WAL and SHM files if they exist
346
+ wal_path = db_path.with_suffix(".db-wal")
347
+ shm_path = db_path.with_suffix(".db-shm")
184
348
 
185
- if wal_path.exists():
186
- wal_path.unlink()
187
- if shm_path.exists():
188
- shm_path.unlink()
349
+ if wal_path.exists():
350
+ wal_path.unlink()
351
+ if shm_path.exists():
352
+ shm_path.unlink()
189
353
 
354
+ def optimize_all_tenants(self, force: bool = False) -> dict:
355
+ """Optimize storage for all materialized tenants in the branch.
356
+
357
+ This is designed to be called periodically (e.g., every minute) to:
358
+ - Reclaim unused space with VACUUM
359
+ - Adjust page sizes as databases grow
360
+ - Keep small databases compact
361
+
362
+ Args:
363
+ force: If True, optimize all tenants regardless of size
364
+
365
+ Returns:
366
+ Dictionary with optimization results:
367
+ - optimized: List of tenant names that were optimized
368
+ - skipped: List of tenant names that were skipped
369
+ - errors: List of tuples (tenant_name, error_message)
370
+ """
371
+ results = {
372
+ "optimized": [],
373
+ "skipped": [],
374
+ "errors": []
375
+ }
376
+
377
+ # Ensure initialization
378
+ self._ensure_initialized()
379
+
380
+ if not self.branch_id:
381
+ return results
382
+
383
+ # Get all materialized tenants for this branch
384
+ tenants = self.metadata_db.list_tenants(self.branch_id, materialized_only=True)
385
+
386
+ for tenant in tenants:
387
+ tenant_name = tenant['name']
388
+
389
+ # Skip system tenants unless forced
390
+ if not force and tenant_name in ["main", self._empty_tenant_name]:
391
+ results["skipped"].append(tenant_name)
392
+ continue
393
+
394
+ try:
395
+ optimized = self.optimize_tenant_storage(tenant_name, force=force)
396
+ if optimized:
397
+ results["optimized"].append(tenant_name)
398
+ else:
399
+ results["skipped"].append(tenant_name)
400
+ except Exception as e:
401
+ results["errors"].append((tenant_name, str(e)))
402
+
403
+ return results
404
+
405
+ def optimize_tenant_storage(self, tenant_name: str, force: bool = False) -> bool:
406
+ """Optimize tenant database storage with VACUUM and optional page size adjustment.
407
+
408
+ This performs:
409
+ 1. Always: VACUUM to reclaim unused space and defragment
410
+ 2. If needed: Rebuild with optimal page size based on database size
411
+
412
+ Args:
413
+ tenant_name: Name of tenant to optimize
414
+ force: If True, always perform VACUUM even if page size is optimal
415
+
416
+ Returns:
417
+ True if optimization was performed, False if tenant doesn't exist
418
+ """
419
+ # Ensure initialization
420
+ self._ensure_initialized()
421
+
422
+ if not self.branch_id:
423
+ return False
424
+
425
+ # Skip system tenants
426
+ if tenant_name in ["main", self._empty_tenant_name]:
427
+ return False
428
+
429
+ # Get tenant info
430
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
431
+ if not tenant_info or not tenant_info['materialized']:
432
+ return False
433
+
434
+ db_path = get_tenant_db_path(
435
+ self.project_root, self.database, self.branch, tenant_name
436
+ )
437
+
438
+ if not db_path.exists():
439
+ return False
440
+
441
+ # Check current page size
442
+ conn = sqlite3.connect(str(db_path))
443
+ current_page_size = conn.execute("PRAGMA page_size").fetchone()[0]
444
+ conn.close()
445
+
446
+ # Determine optimal page size
447
+ optimal_page_size = self._get_optimal_page_size(db_path)
448
+
449
+ # Decide if we need to rebuild with new page size
450
+ needs_page_size_change = (current_page_size != optimal_page_size and
451
+ db_path.stat().st_size > 1024 * 1024) # Only if > 1MB
452
+
453
+ if needs_page_size_change:
454
+ # Rebuild with new page size using VACUUM INTO
455
+ temp_path = db_path.with_suffix('.tmp')
456
+ conn = sqlite3.connect(str(db_path))
457
+ conn.isolation_level = None
458
+ conn.execute(f"PRAGMA page_size = {optimal_page_size}")
459
+ conn.execute(f"VACUUM INTO '{temp_path}'")
460
+ conn.close()
461
+
462
+ # Replace original with optimized version
463
+ shutil.move(str(temp_path), str(db_path))
464
+ return True
465
+ elif force or current_page_size == 512:
466
+ # Just run regular VACUUM to defragment and reclaim space
467
+ # Always vacuum 512-byte page databases to keep them compact
468
+ conn = sqlite3.connect(str(db_path))
469
+ conn.isolation_level = None
470
+ conn.execute("VACUUM")
471
+ conn.close()
472
+ return True
473
+
474
+ return False
475
+
476
+ def _get_optimal_page_size(self, db_path: Path) -> int:
477
+ """Determine optimal page size based on database file size.
478
+
479
+ Args:
480
+ db_path: Path to database file
481
+
482
+ Returns:
483
+ Optimal page size in bytes
484
+ """
485
+ if not db_path.exists():
486
+ return 512 # Default for new/empty databases
487
+
488
+ size_mb = db_path.stat().st_size / (1024 * 1024)
489
+
490
+ if size_mb < 0.1: # < 100KB
491
+ return 512
492
+ elif size_mb < 10: # < 10MB
493
+ return 4096 # 4KB - good balance for small-medium DBs
494
+ elif size_mb < 100: # < 100MB
495
+ return 8192 # 8KB - better for larger rows
496
+ else: # >= 100MB
497
+ return 16384 # 16KB - optimal for bulk operations
498
+
190
499
  def materialize_tenant(self, tenant_name: str) -> None:
191
500
  """Materialize a lazy tenant into an actual database file.
192
501
 
@@ -196,59 +505,44 @@ class TenantManager:
196
505
  Raises:
197
506
  ValueError: If tenant doesn't exist or is already materialized
198
507
  """
199
- tenants_dir = self.branch_path / "tenants"
200
- tenant_meta_file = tenants_dir / f".{tenant_name}.meta"
201
- db_path = get_tenant_db_path(
202
- self.project_root, self.database, self.branch, tenant_name
203
- )
508
+ # Ensure initialization
509
+ self._ensure_initialized()
204
510
 
511
+ if not self.branch_id:
512
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
513
+
514
+ # Get tenant info from metadata
515
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
516
+ if not tenant_info:
517
+ raise ValueError(f"Tenant '{tenant_name}' does not exist")
518
+
205
519
  # Check if already materialized
206
- if db_path.exists():
520
+ if tenant_info['materialized']:
207
521
  return # Already materialized
208
522
 
209
- # Check if metadata exists
210
- if not tenant_meta_file.exists():
211
- raise ValueError(f"Tenant '{tenant_name}' does not exist")
523
+ db_path = get_tenant_db_path(
524
+ self.project_root, self.database, self.branch, tenant_name
525
+ )
526
+
527
+ # Ensure tenants directory exists
528
+ db_path.parent.mkdir(parents=True, exist_ok=True)
529
+
530
+ # Ensure __empty__ tenant exists with current schema
531
+ self._ensure_empty_tenant()
212
532
 
213
- # Get main tenant path for schema copy
214
- main_db_path = get_tenant_db_path(
215
- self.project_root, self.database, self.branch, "main"
533
+ # Get __empty__ tenant path for schema copy
534
+ empty_db_path = get_tenant_db_path(
535
+ self.project_root, self.database, self.branch, self._empty_tenant_name
216
536
  )
217
537
 
218
- # Copy main tenant database to new tenant
219
- shutil.copy2(main_db_path, db_path)
220
-
221
- # Clear any data from the copied database (keep schema only)
222
- with DatabaseConnection(db_path) as conn:
223
- # Get all tables
224
- result = conn.execute("""
225
- SELECT name FROM sqlite_master
226
- WHERE type='table'
227
- AND name NOT LIKE 'sqlite_%'
228
- """)
229
- tables = [row["name"] for row in result.fetchall()]
230
-
231
- # Clear data from each table
232
- for table in tables:
233
- conn.execute(f"DELETE FROM {table}")
234
-
235
- conn.commit()
236
-
237
- # Vacuum the database to reduce size
238
- import sqlite3
239
- vacuum_conn = sqlite3.connect(str(db_path))
240
- vacuum_conn.isolation_level = None
241
- vacuum_conn.execute("VACUUM")
242
- vacuum_conn.close()
243
-
244
- # Update metadata to indicate it's no longer lazy
245
- import json
246
- with open(tenant_meta_file, 'r') as f:
247
- metadata = json.load(f)
248
- metadata['lazy'] = False
249
- metadata['materialized_at'] = Path(db_path).stat().st_mtime
250
- with open(tenant_meta_file, 'w') as f:
251
- json.dump(metadata, f)
538
+ # Copy __empty__ tenant database to new tenant
539
+ shutil.copy2(empty_db_path, db_path)
540
+
541
+ # No need to vacuum when copying from __empty__ since it's already optimized
542
+ # The __empty__ template already has 512-byte pages and is vacuumed
543
+
544
+ # Mark as materialized in metadata database
545
+ self.metadata_db.mark_tenant_materialized(tenant_info['id'])
252
546
 
253
547
  def copy_tenant(self, source_tenant: str, target_tenant: str) -> Tenant:
254
548
  """Copy a tenant to a new tenant.
@@ -280,17 +574,33 @@ class TenantManager:
280
574
  # Validate target doesn't exist
281
575
  if target_tenant in list_tenants(self.project_root, self.database, self.branch):
282
576
  raise ValueError(f"Tenant '{target_tenant}' already exists")
577
+
578
+ # Ensure initialization
579
+ self._ensure_initialized()
580
+
581
+ if not self.branch_id:
582
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
283
583
 
284
- # Get paths
285
- source_path = get_tenant_db_path(
286
- self.project_root, self.database, self.branch, source_tenant
287
- )
288
- target_path = get_tenant_db_path(
289
- self.project_root, self.database, self.branch, target_tenant
290
- )
584
+ # Create tenant in metadata database first with shard
585
+ tenant_id = str(uuid.uuid4())
586
+ target_shard = self._calculate_shard(target_tenant)
587
+ metadata = {
588
+ "description": f"Copied from {source_tenant}",
589
+ "created_at": datetime.now(timezone.utc).isoformat(),
590
+ }
591
+ self.metadata_db.create_tenant(tenant_id, self.branch_id, target_tenant, target_shard, metadata)
592
+
593
+ # Get paths using sharded approach
594
+ source_path = self._get_sharded_tenant_db_path(source_tenant)
595
+ target_path = self._get_sharded_tenant_db_path(target_tenant)
596
+
597
+ # Directory creation is handled by _get_sharded_tenant_db_path
291
598
 
292
599
  # Copy database file
293
600
  shutil.copy2(source_path, target_path)
601
+
602
+ # Mark as materialized since we copied a physical file
603
+ self.metadata_db.mark_tenant_materialized(tenant_id)
294
604
 
295
605
  return Tenant(
296
606
  name=target_tenant,
@@ -324,47 +634,268 @@ class TenantManager:
324
634
  # Validate new doesn't exist
325
635
  if new_name in list_tenants(self.project_root, self.database, self.branch):
326
636
  raise ValueError(f"Tenant '{new_name}' already exists")
637
+
638
+ # Ensure initialization
639
+ self._ensure_initialized()
640
+
641
+ if not self.branch_id:
642
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
643
+
644
+ # Get tenant info from metadata
645
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, old_name)
646
+ if not tenant_info:
647
+ raise ValueError(f"Tenant '{old_name}' does not exist in metadata")
648
+
649
+ # Get old path before updating metadata (if materialized)
650
+ old_path = None
651
+ new_path = None
652
+ if tenant_info['materialized']:
653
+ # Calculate paths before metadata update
654
+ old_shard = tenant_info['shard']
655
+ new_shard = self._calculate_shard(new_name)
656
+
657
+ branch_path = get_branch_path(self.project_root, self.database, self.branch)
658
+ tenants_dir = branch_path / "tenants"
659
+
660
+ old_path = tenants_dir / old_shard / f"{old_name}.db"
661
+ new_shard_dir = tenants_dir / new_shard
662
+ new_shard_dir.mkdir(parents=True, exist_ok=True)
663
+ new_path = new_shard_dir / f"{new_name}.db"
664
+
665
+ # Update metadata database
666
+ new_shard = self._calculate_shard(new_name)
667
+ with self.metadata_db.conn:
668
+ self.metadata_db.conn.execute(
669
+ "UPDATE tenants SET name = ?, shard = ? WHERE id = ?",
670
+ (new_name, new_shard, tenant_info['id'])
671
+ )
327
672
 
328
- # Get paths
329
- old_path = get_tenant_db_path(
330
- self.project_root, self.database, self.branch, old_name
331
- )
332
- new_path = get_tenant_db_path(
333
- self.project_root, self.database, self.branch, new_name
334
- )
673
+ # Rename physical files if tenant is materialized
674
+ if tenant_info['materialized'] and old_path and new_path:
675
+
676
+ # Rename database file if it exists
677
+ if old_path.exists():
678
+ new_path.parent.mkdir(parents=True, exist_ok=True)
679
+ old_path.rename(new_path)
335
680
 
336
- # Rename database file
337
- old_path.rename(new_path)
681
+ # Also rename WAL and SHM files if they exist
682
+ old_wal = old_path.with_suffix(".db-wal")
683
+ old_shm = old_path.with_suffix(".db-shm")
684
+ new_wal = new_path.with_suffix(".db-wal")
685
+ new_shm = new_path.with_suffix(".db-shm")
338
686
 
339
- # Also rename WAL and SHM files if they exist
340
- old_wal = old_path.with_suffix(".db-wal")
341
- old_shm = old_path.with_suffix(".db-shm")
342
- new_wal = new_path.with_suffix(".db-wal")
343
- new_shm = new_path.with_suffix(".db-shm")
687
+ if old_wal.exists():
688
+ old_wal.rename(new_wal)
689
+ if old_shm.exists():
690
+ old_shm.rename(new_shm)
344
691
 
345
- if old_wal.exists():
346
- old_wal.rename(new_wal)
347
- if old_shm.exists():
348
- old_shm.rename(new_shm)
692
+ def get_tenant_size(self, tenant_name: str) -> dict:
693
+ """Get storage size information for a tenant.
694
+
695
+ Args:
696
+ tenant_name: Name of the tenant
697
+
698
+ Returns:
699
+ Dictionary with size information:
700
+ - name: Tenant name
701
+ - materialized: Whether tenant is materialized
702
+ - size_bytes: Size in bytes (0 if lazy)
703
+ - size_kb: Size in KB
704
+ - size_mb: Size in MB
705
+ - page_size: SQLite page size (if materialized)
706
+ - page_count: Number of pages (if materialized)
707
+
708
+ Raises:
709
+ ValueError: If tenant doesn't exist
710
+ """
711
+ # Ensure initialization
712
+ self._ensure_initialized()
713
+
714
+ if not self.branch_id:
715
+ raise ValueError(f"Branch '{self.branch}' not found")
716
+
717
+ # Get tenant info from metadata
718
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
719
+ if not tenant_info:
720
+ raise ValueError(f"Tenant '{tenant_name}' does not exist")
721
+
722
+ result = {
723
+ "name": tenant_name,
724
+ "materialized": tenant_info['materialized'],
725
+ "size_bytes": 0,
726
+ "size_kb": 0.0,
727
+ "size_mb": 0.0,
728
+ "page_size": None,
729
+ "page_count": None
730
+ }
731
+
732
+ # If not materialized, return zeros
733
+ if not tenant_info['materialized']:
734
+ return result
735
+
736
+ # Get actual file size
737
+ db_path = get_tenant_db_path(
738
+ self.project_root, self.database, self.branch, tenant_name
739
+ )
740
+
741
+ if db_path.exists():
742
+ size_bytes = db_path.stat().st_size
743
+ result["size_bytes"] = size_bytes
744
+ result["size_kb"] = size_bytes / 1024
745
+ result["size_mb"] = size_bytes / (1024 * 1024)
746
+
747
+ # Get page information
748
+ try:
749
+ conn = sqlite3.connect(str(db_path))
750
+ result["page_size"] = conn.execute("PRAGMA page_size").fetchone()[0]
751
+ result["page_count"] = conn.execute("PRAGMA page_count").fetchone()[0]
752
+ conn.close()
753
+ except Exception:
754
+ pass # Ignore errors reading page info
755
+
756
+ return result
757
+
758
+ def get_all_tenant_sizes(self) -> dict:
759
+ """Get storage size information for all tenants in the branch.
760
+
761
+ Returns:
762
+ Dictionary with:
763
+ - tenants: List of individual tenant size info
764
+ - total_size_bytes: Total size of all materialized tenants
765
+ - total_size_mb: Total size in MB
766
+ - lazy_count: Number of lazy tenants
767
+ - materialized_count: Number of materialized tenants
768
+ """
769
+ # Ensure initialization
770
+ self._ensure_initialized()
771
+
772
+ if not self.branch_id:
773
+ return {
774
+ "tenants": [],
775
+ "total_size_bytes": 0,
776
+ "total_size_mb": 0.0,
777
+ "lazy_count": 0,
778
+ "materialized_count": 0
779
+ }
780
+
781
+ # Get all tenants for this branch
782
+ all_tenants = self.metadata_db.list_tenants(self.branch_id)
783
+
784
+ result = {
785
+ "tenants": [],
786
+ "total_size_bytes": 0,
787
+ "total_size_mb": 0.0,
788
+ "lazy_count": 0,
789
+ "materialized_count": 0
790
+ }
791
+
792
+ for tenant_info in all_tenants:
793
+ tenant_name = tenant_info['name']
794
+ size_info = self.get_tenant_size(tenant_name)
795
+ result["tenants"].append(size_info)
796
+
797
+ if size_info["materialized"]:
798
+ result["materialized_count"] += 1
799
+ result["total_size_bytes"] += size_info["size_bytes"]
800
+ else:
801
+ result["lazy_count"] += 1
802
+
803
+ result["total_size_mb"] = result["total_size_bytes"] / (1024 * 1024)
804
+
805
+ # Sort by size descending
806
+ result["tenants"].sort(key=lambda x: x["size_bytes"], reverse=True)
807
+
808
+ return result
809
+
810
+ def is_tenant_lazy(self, tenant_name: str) -> bool:
811
+ """Check if a tenant is lazy (not materialized).
812
+
813
+ Args:
814
+ tenant_name: Name of the tenant to check
815
+
816
+ Returns:
817
+ True if tenant is lazy, False if materialized
818
+ """
819
+ # Check if it's the __empty__ tenant (always materialized when exists)
820
+ if tenant_name == self._empty_tenant_name:
821
+ return False
822
+
823
+ # Ensure initialization
824
+ self._ensure_initialized()
825
+
826
+ if not self.branch_id:
827
+ return False
828
+
829
+ # Check metadata database
830
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
831
+ if not tenant_info:
832
+ return False
833
+
834
+ # Tenant is lazy if it's not materialized
835
+ return not tenant_info['materialized']
836
+
837
+ def get_tenant_db_path_for_operation(self, tenant_name: str, is_write: bool = False) -> Path:
838
+ """Get the appropriate database path for a tenant operation.
839
+
840
+ For lazy tenants:
841
+ - Read operations use __empty__ tenant
842
+ - Write operations trigger materialization
843
+
844
+ Args:
845
+ tenant_name: Name of the tenant
846
+ is_write: Whether this is for a write operation
847
+
848
+ Returns:
849
+ Path to the appropriate database file
850
+
851
+ Raises:
852
+ ValueError: If tenant doesn't exist
853
+ """
854
+ # Ensure initialization
855
+ self._ensure_initialized()
856
+
857
+ # Check if tenant exists in metadata
858
+ if not self.branch_id:
859
+ raise ValueError(f"Branch '{self.branch}' not found in metadata database")
860
+
861
+ if tenant_name != self._empty_tenant_name:
862
+ tenant_info = self.metadata_db.get_tenant(self.branch_id, tenant_name)
863
+ if not tenant_info:
864
+ raise ValueError(f"Tenant '{tenant_name}' does not exist")
865
+
866
+ # For lazy tenants
867
+ if self.is_tenant_lazy(tenant_name):
868
+ if is_write:
869
+ # Materialize the tenant for writes
870
+ self.materialize_tenant(tenant_name)
871
+ return self._get_sharded_tenant_db_path(tenant_name)
872
+ else:
873
+ # Use __empty__ tenant for reads
874
+ self._ensure_empty_tenant()
875
+ return self._get_sharded_tenant_db_path(self._empty_tenant_name)
876
+ else:
877
+ # For materialized tenants, use their actual database
878
+ return self._get_sharded_tenant_db_path(tenant_name)
349
879
 
350
- def get_tenant_connection(self, tenant_name: str) -> DatabaseConnection:
880
+ def get_tenant_connection(self, tenant_name: str, is_write: bool = False) -> DatabaseConnection:
351
881
  """Get a database connection for a tenant.
352
882
 
883
+ IMPORTANT: The returned connection must be used with a context manager (with statement)
884
+ to ensure proper resource cleanup and prevent file descriptor leaks.
885
+
353
886
  Args:
354
887
  tenant_name: Tenant name
888
+ is_write: Whether this connection will be used for writes
355
889
 
356
890
  Returns:
357
- DatabaseConnection object
891
+ DatabaseConnection object (must be used with 'with' statement)
358
892
 
359
893
  Raises:
360
894
  ValueError: If tenant doesn't exist
895
+
896
+ Example:
897
+ with tenant_manager.get_tenant_connection("main") as conn:
898
+ conn.execute("SELECT * FROM table")
361
899
  """
362
- if tenant_name not in list_tenants(
363
- self.project_root, self.database, self.branch
364
- ):
365
- raise ValueError(f"Tenant '{tenant_name}' does not exist")
366
-
367
- db_path = get_tenant_db_path(
368
- self.project_root, self.database, self.branch, tenant_name
369
- )
900
+ db_path = self.get_tenant_db_path_for_operation(tenant_name, is_write)
370
901
  return DatabaseConnection(db_path)