cinchdb 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,29 @@
1
1
  """Project initialization for CinchDB."""
2
2
 
3
+ import hashlib
3
4
  import json
5
+ import uuid
4
6
  from datetime import datetime, timezone
5
7
  from pathlib import Path
6
8
  from typing import Optional
7
9
 
8
10
  from cinchdb.core.connection import DatabaseConnection
9
11
  from cinchdb.config import ProjectConfig
12
+ from cinchdb.infrastructure.metadata_db import MetadataDB
13
+ from cinchdb.infrastructure.metadata_connection_pool import get_metadata_db
14
+
15
+
16
+ def _calculate_shard(tenant_name: str) -> str:
17
+ """Calculate the shard directory for a tenant using SHA256 hash.
18
+
19
+ Args:
20
+ tenant_name: Name of the tenant
21
+
22
+ Returns:
23
+ Two-character hex string (e.g., "a0", "ff")
24
+ """
25
+ hash_val = hashlib.sha256(tenant_name.encode('utf-8')).hexdigest()
26
+ return hash_val[:2]
10
27
 
11
28
 
12
29
  class ProjectInitializer:
@@ -21,6 +38,14 @@ class ProjectInitializer:
21
38
  self.project_dir = Path(project_dir) if project_dir else Path.cwd()
22
39
  self.config_dir = self.project_dir / ".cinchdb"
23
40
  self.config_path = self.config_dir / "config.toml"
41
+ self._metadata_db = None
42
+
43
+ @property
44
+ def metadata_db(self) -> MetadataDB:
45
+ """Get metadata database connection (lazy-initialized from pool)."""
46
+ if self._metadata_db is None:
47
+ self._metadata_db = get_metadata_db(self.project_dir)
48
+ return self._metadata_db
24
49
 
25
50
  def init_project(
26
51
  self, database_name: str = "main", branch_name: str = "main"
@@ -36,7 +61,13 @@ class ProjectInitializer:
36
61
 
37
62
  Raises:
38
63
  FileExistsError: If project already exists at the location
64
+ InvalidNameError: If database name is invalid
39
65
  """
66
+ from cinchdb.utils.name_validator import validate_name
67
+
68
+ # Validate database name
69
+ validate_name(database_name, "database")
70
+
40
71
  if self.config_path.exists():
41
72
  raise FileExistsError(f"Project already exists at {self.config_dir}")
42
73
 
@@ -48,9 +79,57 @@ class ProjectInitializer:
48
79
 
49
80
  # Save config
50
81
  self._save_config(config)
82
+
83
+ # Add initial database to metadata (metadata_db property will auto-initialize)
84
+ database_id = str(uuid.uuid4())
85
+ self.metadata_db.create_database(
86
+ database_id, database_name,
87
+ description="Initial database",
88
+ metadata={"initial_branch": branch_name}
89
+ )
90
+
91
+ # Add initial branch to metadata
92
+ branch_id = str(uuid.uuid4())
93
+ self.metadata_db.create_branch(
94
+ branch_id, database_id, branch_name,
95
+ parent_branch=None,
96
+ schema_version="v1.0.0",
97
+ metadata={"created_at": datetime.now(timezone.utc).isoformat()}
98
+ )
51
99
 
52
- # Create default database structure
53
- self._create_database_structure(database_name, branch_name)
100
+ # Create default database structure (materialized by default for initial database)
101
+ self._create_database_structure(database_name, branch_name, create_tenant_files=True)
102
+
103
+ # Mark as materialized since we created the structure
104
+ self.metadata_db.mark_database_materialized(database_id)
105
+ self.metadata_db.mark_branch_materialized(branch_id)
106
+
107
+ # Also create main tenant in metadata
108
+ tenant_id = str(uuid.uuid4())
109
+ main_shard = _calculate_shard("main")
110
+ self.metadata_db.create_tenant(
111
+ tenant_id, branch_id, "main", main_shard,
112
+ metadata={"created_at": datetime.now(timezone.utc).isoformat()}
113
+ )
114
+ self.metadata_db.mark_tenant_materialized(tenant_id)
115
+
116
+ # Create __empty__ tenant in metadata (for lazy tenant reads)
117
+ empty_tenant_id = str(uuid.uuid4())
118
+ empty_shard = _calculate_shard("__empty__")
119
+ self.metadata_db.create_tenant(
120
+ empty_tenant_id, branch_id, "__empty__", empty_shard,
121
+ metadata={
122
+ "system": True,
123
+ "description": "Template for lazy tenants",
124
+ "created_at": datetime.now(timezone.utc).isoformat()
125
+ }
126
+ )
127
+ self.metadata_db.mark_tenant_materialized(empty_tenant_id)
128
+
129
+ # Create physical __empty__ tenant with schema from main
130
+ from cinchdb.managers.tenant import TenantManager
131
+ tenant_mgr = TenantManager(self.project_dir, database_name, branch_name)
132
+ tenant_mgr._ensure_empty_tenant()
54
133
 
55
134
  return config
56
135
 
@@ -59,6 +138,7 @@ class ProjectInitializer:
59
138
  database_name: str,
60
139
  branch_name: str = "main",
61
140
  description: Optional[str] = None,
141
+ lazy: bool = True,
62
142
  ) -> None:
63
143
  """Initialize a new database within an existing project.
64
144
 
@@ -66,26 +146,78 @@ class ProjectInitializer:
66
146
  database_name: Name for the database
67
147
  branch_name: Initial branch name (default: "main")
68
148
  description: Optional description for the database
149
+ lazy: If True, don't create actual database files until first use
69
150
 
70
151
  Raises:
71
152
  FileNotFoundError: If project doesn't exist
72
153
  FileExistsError: If database already exists
154
+ InvalidNameError: If database name is invalid
73
155
  """
156
+ from cinchdb.utils.name_validator import validate_name
157
+
158
+ # Validate database name
159
+ validate_name(database_name, "database")
160
+
74
161
  if not self.config_path.exists():
75
162
  raise FileNotFoundError(f"No CinchDB project found at {self.config_dir}")
76
-
77
- db_path = self.config_dir / "databases" / database_name
78
- if db_path.exists():
163
+
164
+ # Check if database already exists in metadata
165
+ existing_db = self.metadata_db.get_database(database_name)
166
+ if existing_db:
79
167
  raise FileExistsError(f"Database '{database_name}' already exists")
80
168
 
81
- # Create database structure
82
- self._create_database_structure(database_name, branch_name, description)
169
+ # Create database ID
170
+ database_id = str(uuid.uuid4())
171
+
172
+ # Create database in metadata
173
+ metadata = {
174
+ "description": description,
175
+ "initial_branch": branch_name,
176
+ "created_at": datetime.now(timezone.utc).isoformat(),
177
+ }
178
+ self.metadata_db.create_database(database_id, database_name, description, metadata)
179
+
180
+ # Create initial branch in metadata
181
+ branch_id = str(uuid.uuid4())
182
+ self.metadata_db.create_branch(
183
+ branch_id, database_id, branch_name,
184
+ parent_branch=None,
185
+ schema_version="v1.0.0",
186
+ metadata={"created_at": datetime.now(timezone.utc).isoformat()}
187
+ )
188
+
189
+ # Create main tenant entry in metadata (will be materialized if database is not lazy)
190
+ main_tenant_id = str(uuid.uuid4())
191
+ main_shard = _calculate_shard("main")
192
+ self.metadata_db.create_tenant(
193
+ main_tenant_id, branch_id, "main", main_shard,
194
+ metadata={"description": "Default tenant", "created_at": datetime.now(timezone.utc).isoformat()}
195
+ )
196
+
197
+ # Create __empty__ tenant entry in metadata (lazy)
198
+ # This serves as a template for all lazy tenants in this branch
199
+ empty_tenant_id = str(uuid.uuid4())
200
+ empty_shard = _calculate_shard("__empty__")
201
+ self.metadata_db.create_tenant(
202
+ empty_tenant_id, branch_id, "__empty__", empty_shard,
203
+ metadata={"system": True, "description": "Template for lazy tenants"}
204
+ )
205
+
206
+ if not lazy:
207
+ # Create actual database structure
208
+ self._create_database_structure(database_name, branch_name, description)
209
+
210
+ # Mark as materialized
211
+ self.metadata_db.mark_database_materialized(database_id)
212
+ self.metadata_db.mark_branch_materialized(branch_id)
213
+ self.metadata_db.mark_tenant_materialized(main_tenant_id)
83
214
 
84
215
  def _create_database_structure(
85
216
  self,
86
217
  database_name: str,
87
218
  branch_name: str = "main",
88
219
  description: Optional[str] = None,
220
+ create_tenant_files: bool = False,
89
221
  ) -> None:
90
222
  """Create the directory structure for a database.
91
223
 
@@ -119,8 +251,12 @@ class ProjectInitializer:
119
251
  tenant_dir = branch_path / "tenants"
120
252
  tenant_dir.mkdir(exist_ok=True)
121
253
 
122
- # Create and initialize main tenant database
123
- self._init_tenant_database(tenant_dir / "main.db")
254
+ # Create main tenant database in sharded directory (only if requested)
255
+ if create_tenant_files:
256
+ main_shard = _calculate_shard("main")
257
+ main_shard_dir = tenant_dir / main_shard
258
+ main_shard_dir.mkdir(parents=True, exist_ok=True)
259
+ self._init_tenant_database(main_shard_dir / "main.db")
124
260
 
125
261
  def _init_tenant_database(self, db_path: Path) -> None:
126
262
  """Initialize a tenant database with proper PRAGMAs.
@@ -140,6 +276,46 @@ class ProjectInitializer:
140
276
  # - foreign_keys = ON
141
277
  pass
142
278
 
279
+ def materialize_database(self, database_name: str) -> None:
280
+ """Materialize a lazy database into actual database structure.
281
+
282
+ Args:
283
+ database_name: Name of the database to materialize
284
+
285
+ Raises:
286
+ ValueError: If database doesn't exist or is already materialized
287
+ """
288
+ # Get database info from metadata
289
+ db_info = self.metadata_db.get_database(database_name)
290
+ if not db_info:
291
+ raise ValueError(f"Database '{database_name}' does not exist")
292
+
293
+ # Check if already materialized
294
+ if db_info['materialized']:
295
+ return # Already materialized
296
+
297
+ db_path = self.config_dir / "databases" / database_name
298
+ if db_path.exists():
299
+ # Mark as materialized in metadata if directory already exists
300
+ self.metadata_db.mark_database_materialized(db_info['id'])
301
+ return
302
+
303
+ # Get metadata details
304
+ metadata = json.loads(db_info['metadata']) if db_info['metadata'] else {}
305
+ branch_name = metadata.get("initial_branch", "main")
306
+ description = db_info.get('description')
307
+
308
+ # Create the actual database structure (no tenant files - those are created when tables are added)
309
+ self._create_database_structure(database_name, branch_name, description, create_tenant_files=False)
310
+
311
+ # Mark database as materialized in metadata
312
+ self.metadata_db.mark_database_materialized(db_info['id'])
313
+
314
+ # Also mark the initial branch as materialized
315
+ branch_info = self.metadata_db.get_branch(db_info['id'], branch_name)
316
+ if branch_info:
317
+ self.metadata_db.mark_branch_materialized(branch_info['id'])
318
+
143
319
  def _save_config(self, config: ProjectConfig) -> None:
144
320
  """Save configuration to disk.
145
321
 
@@ -194,6 +370,7 @@ def init_database(
194
370
  database_name: str = "main",
195
371
  branch_name: str = "main",
196
372
  description: Optional[str] = None,
373
+ lazy: bool = True,
197
374
  ) -> None:
198
375
  """Initialize a new database within an existing project.
199
376
 
@@ -205,10 +382,11 @@ def init_database(
205
382
  database_name: Name for the database
206
383
  branch_name: Initial branch name (default: "main")
207
384
  description: Optional description
385
+ lazy: If True, don't create actual database files until first use
208
386
 
209
387
  Raises:
210
388
  FileNotFoundError: If project doesn't exist
211
389
  FileExistsError: If database already exists
212
390
  """
213
391
  initializer = ProjectInitializer(project_dir)
214
- initializer.init_database(database_name, branch_name, description)
392
+ initializer.init_database(database_name, branch_name, description, lazy)
@@ -1,7 +1,7 @@
1
1
  """Path utilities for CinchDB."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import List
4
+ from typing import List, Optional
5
5
 
6
6
 
7
7
  def get_project_root(start_path: Path) -> Path:
@@ -73,7 +73,7 @@ def get_tenant_path(
73
73
  def get_tenant_db_path(
74
74
  project_root: Path, database: str, branch: str, tenant: str
75
75
  ) -> Path:
76
- """Get path to tenant database file.
76
+ """Get path to tenant database file using hash-based sharding.
77
77
 
78
78
  Args:
79
79
  project_root: Project root directory
@@ -82,9 +82,19 @@ def get_tenant_db_path(
82
82
  tenant: Tenant name
83
83
 
84
84
  Returns:
85
- Path to tenant database file
85
+ Path to tenant database file in sharded directory structure
86
86
  """
87
- return get_tenant_path(project_root, database, branch, tenant) / f"{tenant}.db"
87
+ import hashlib
88
+
89
+ # Calculate shard using SHA256 hash (same as TenantManager)
90
+ hash_val = hashlib.sha256(tenant.encode('utf-8')).hexdigest()
91
+ shard = hash_val[:2]
92
+
93
+ # Build sharded path: /tenants/{shard}/{tenant}.db
94
+ tenants_dir = get_tenant_path(project_root, database, branch, tenant)
95
+ shard_dir = tenants_dir / shard
96
+
97
+ return shard_dir / f"{tenant}.db"
88
98
 
89
99
 
90
100
  def ensure_directory(path: Path) -> None:
@@ -105,11 +115,14 @@ def list_databases(project_root: Path) -> List[str]:
105
115
  Returns:
106
116
  List of database names
107
117
  """
108
- db_dir = project_root / ".cinchdb" / "databases"
109
- if not db_dir.exists():
118
+ metadata_db_path = project_root / ".cinchdb" / "metadata.db"
119
+ if not metadata_db_path.exists():
110
120
  return []
111
-
112
- return sorted([d.name for d in db_dir.iterdir() if d.is_dir()])
121
+
122
+ from cinchdb.infrastructure.metadata_db import MetadataDB
123
+ with MetadataDB(project_root) as metadata_db:
124
+ db_records = metadata_db.list_databases()
125
+ return sorted(record['name'] for record in db_records)
113
126
 
114
127
 
115
128
  def list_branches(project_root: Path, database: str) -> List[str]:
@@ -122,11 +135,17 @@ def list_branches(project_root: Path, database: str) -> List[str]:
122
135
  Returns:
123
136
  List of branch names
124
137
  """
125
- branches_dir = get_database_path(project_root, database) / "branches"
126
- if not branches_dir.exists():
138
+ metadata_db_path = project_root / ".cinchdb" / "metadata.db"
139
+ if not metadata_db_path.exists():
127
140
  return []
128
-
129
- return sorted([b.name for b in branches_dir.iterdir() if b.is_dir()])
141
+
142
+ from cinchdb.infrastructure.metadata_db import MetadataDB
143
+ with MetadataDB(project_root) as metadata_db:
144
+ db_info = metadata_db.get_database(database)
145
+ if not db_info:
146
+ return []
147
+ branch_records = metadata_db.list_branches(db_info['id'])
148
+ return sorted(record['name'] for record in branch_records)
130
149
 
131
150
 
132
151
  def list_tenants(project_root: Path, database: str, branch: str) -> List[str]:
@@ -140,19 +159,17 @@ def list_tenants(project_root: Path, database: str, branch: str) -> List[str]:
140
159
  Returns:
141
160
  List of tenant names
142
161
  """
143
- tenants_dir = get_branch_path(project_root, database, branch) / "tenants"
144
- if not tenants_dir.exists():
162
+ metadata_db_path = project_root / ".cinchdb" / "metadata.db"
163
+ if not metadata_db_path.exists():
145
164
  return []
146
-
147
- # List both .db files and .meta files for lazy tenants
148
- tenants = set()
149
- for f in tenants_dir.iterdir():
150
- if f.is_file():
151
- if f.suffix == ".db":
152
- tenants.add(f.stem)
153
- elif f.suffix == ".meta" and f.name.startswith("."):
154
- # Lazy tenant metadata files are named .{tenant_name}.meta
155
- tenant_name = f.stem[1:] # Remove leading dot
156
- tenants.add(tenant_name)
157
-
158
- return sorted(list(tenants))
165
+
166
+ from cinchdb.infrastructure.metadata_db import MetadataDB
167
+ with MetadataDB(project_root) as metadata_db:
168
+ db_info = metadata_db.get_database(database)
169
+ if not db_info:
170
+ return []
171
+ branch_info = metadata_db.get_branch(db_info['id'], branch)
172
+ if not branch_info:
173
+ return []
174
+ tenant_records = metadata_db.list_tenants(branch_info['id'])
175
+ return sorted(record['name'] for record in tenant_records)
@@ -0,0 +1,145 @@
1
+ """Connection pool for MetadataDB to ensure efficient connection reuse."""
2
+
3
+ import threading
4
+ from pathlib import Path
5
+ from typing import Optional, Dict
6
+ from weakref import WeakValueDictionary
7
+
8
+ from cinchdb.infrastructure.metadata_db import MetadataDB
9
+
10
+
11
+ class MetadataConnectionPool:
12
+ """Thread-safe, lazy-initialized connection pool for MetadataDB.
13
+
14
+ Uses a singleton pattern per project directory to ensure connection reuse
15
+ across all managers and operations within a project.
16
+ """
17
+
18
+ _instances: Dict[str, 'MetadataConnectionPool'] = {}
19
+ _lock = threading.Lock()
20
+
21
+ def __init__(self, project_path: Path):
22
+ """Initialize the connection pool (but don't create connection yet).
23
+
24
+ Args:
25
+ project_path: Path to the project directory
26
+ """
27
+ self.project_path = Path(project_path)
28
+ self._connection: Optional[MetadataDB] = None
29
+ self._connection_lock = threading.Lock()
30
+ self._ref_count = 0
31
+
32
+ @classmethod
33
+ def get_instance(cls, project_path: Path) -> 'MetadataConnectionPool':
34
+ """Get or create a connection pool for the given project.
35
+
36
+ Args:
37
+ project_path: Path to the project directory
38
+
39
+ Returns:
40
+ MetadataConnectionPool instance for this project
41
+ """
42
+ path_str = str(project_path.resolve())
43
+
44
+ # Fast path - check if instance exists
45
+ if path_str in cls._instances:
46
+ return cls._instances[path_str]
47
+
48
+ # Slow path - create new instance with lock
49
+ with cls._lock:
50
+ # Double-check pattern
51
+ if path_str not in cls._instances:
52
+ cls._instances[path_str] = cls(project_path)
53
+ return cls._instances[path_str]
54
+
55
+ def get_connection(self) -> MetadataDB:
56
+ """Get or create a MetadataDB connection (lazy initialization).
57
+
58
+ Returns:
59
+ MetadataDB instance (shared across all callers for this project)
60
+ """
61
+ # Fast path - connection already exists
62
+ if self._connection is not None:
63
+ return self._connection
64
+
65
+ # Slow path - create connection with lock
66
+ with self._connection_lock:
67
+ # Double-check pattern
68
+ if self._connection is None:
69
+ self._connection = MetadataDB(self.project_path)
70
+ return self._connection
71
+
72
+ def acquire(self) -> MetadataDB:
73
+ """Acquire a reference to the connection.
74
+
75
+ Returns:
76
+ MetadataDB instance
77
+ """
78
+ with self._connection_lock:
79
+ self._ref_count += 1
80
+ return self.get_connection()
81
+
82
+ def release(self) -> None:
83
+ """Release a reference to the connection.
84
+
85
+ When ref count reaches 0, we could close the connection,
86
+ but we keep it open for performance since SQLite handles
87
+ concurrent access well with WAL mode.
88
+ """
89
+ with self._connection_lock:
90
+ self._ref_count = max(0, self._ref_count - 1)
91
+
92
+ def close(self) -> None:
93
+ """Explicitly close the connection (called on shutdown)."""
94
+ with self._connection_lock:
95
+ if self._connection is not None:
96
+ self._connection.close()
97
+ self._connection = None
98
+ self._ref_count = 0
99
+
100
+ @classmethod
101
+ def close_all(cls) -> None:
102
+ """Close all connection pools (useful for cleanup in tests)."""
103
+ with cls._lock:
104
+ for pool in cls._instances.values():
105
+ pool.close()
106
+ cls._instances.clear()
107
+
108
+
109
+ class MetadataDBHandle:
110
+ """Context manager for safely acquiring and releasing metadata connections."""
111
+
112
+ def __init__(self, project_path: Path):
113
+ """Initialize handle for metadata connection.
114
+
115
+ Args:
116
+ project_path: Path to the project directory
117
+ """
118
+ self.pool = MetadataConnectionPool.get_instance(project_path)
119
+ self.connection: Optional[MetadataDB] = None
120
+
121
+ def __enter__(self) -> MetadataDB:
122
+ """Acquire connection from pool."""
123
+ self.connection = self.pool.acquire()
124
+ return self.connection
125
+
126
+ def __exit__(self, exc_type, exc_val, exc_tb):
127
+ """Release connection back to pool."""
128
+ self.pool.release()
129
+ self.connection = None
130
+
131
+
132
+ def get_metadata_db(project_path: Path) -> MetadataDB:
133
+ """Get a metadata database connection from the pool.
134
+
135
+ This is a convenience function for code that doesn't use context managers.
136
+ The connection is shared and should NOT be closed by the caller.
137
+
138
+ Args:
139
+ project_path: Path to the project directory
140
+
141
+ Returns:
142
+ MetadataDB instance (shared, do not close)
143
+ """
144
+ pool = MetadataConnectionPool.get_instance(project_path)
145
+ return pool.get_connection()