rdf-starbase 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,407 @@
1
+ """
2
+ Repository Manager for RDF-StarBase.
3
+
4
+ Manages multiple named TripleStore instances (repositories/projects).
5
+ Similar to how GraphDB or Neo4j manage multiple databases.
6
+
7
+ Features:
8
+ - Create/delete named repositories
9
+ - Persist repositories to disk
10
+ - Switch between repositories
11
+ - List all repositories with metadata
12
+ """
13
+
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+ from typing import Optional, Dict, Any
18
+ import json
19
+ import shutil
20
+
21
+ from rdf_starbase.store import TripleStore
22
+
23
+
24
+ @dataclass
25
+ class RepositoryInfo:
26
+ """Metadata about a repository."""
27
+ name: str
28
+ created_at: datetime
29
+ description: str = ""
30
+ tags: list[str] = field(default_factory=list)
31
+
32
+ # Stats (populated on demand)
33
+ triple_count: int = 0
34
+ subject_count: int = 0
35
+ predicate_count: int = 0
36
+
37
+ def to_dict(self) -> dict:
38
+ return {
39
+ "name": self.name,
40
+ "created_at": self.created_at.isoformat(),
41
+ "description": self.description,
42
+ "tags": self.tags,
43
+ "triple_count": self.triple_count,
44
+ "subject_count": self.subject_count,
45
+ "predicate_count": self.predicate_count,
46
+ }
47
+
48
+ @classmethod
49
+ def from_dict(cls, data: dict) -> "RepositoryInfo":
50
+ return cls(
51
+ name=data["name"],
52
+ created_at=datetime.fromisoformat(data["created_at"]),
53
+ description=data.get("description", ""),
54
+ tags=data.get("tags", []),
55
+ triple_count=data.get("triple_count", 0),
56
+ subject_count=data.get("subject_count", 0),
57
+ predicate_count=data.get("predicate_count", 0),
58
+ )
59
+
60
+
61
+ class RepositoryManager:
62
+ """
63
+ Manages multiple named TripleStore repositories.
64
+
65
+ Provides:
66
+ - CRUD operations for repositories
67
+ - Persistence to a workspace directory
68
+ - In-memory caching of active repositories
69
+
70
+ Usage:
71
+ manager = RepositoryManager("./data/repositories")
72
+
73
+ # Create a new repository
74
+ manager.create("my-project", description="Test project")
75
+
76
+ # Get the store for a repository
77
+ store = manager.get_store("my-project")
78
+ store.add_triple(...)
79
+
80
+ # List all repositories
81
+ repos = manager.list_repositories()
82
+
83
+ # Persist changes
84
+ manager.save("my-project")
85
+ """
86
+
87
+ def __init__(self, workspace_path: str | Path):
88
+ """
89
+ Initialize the repository manager.
90
+
91
+ Args:
92
+ workspace_path: Directory to store all repositories
93
+ """
94
+ self.workspace_path = Path(workspace_path)
95
+ self.workspace_path.mkdir(parents=True, exist_ok=True)
96
+
97
+ # In-memory cache of loaded repositories
98
+ self._stores: Dict[str, TripleStore] = {}
99
+ self._info: Dict[str, RepositoryInfo] = {}
100
+
101
+ # Load existing repository metadata
102
+ self._load_metadata()
103
+
104
+ def _load_metadata(self) -> None:
105
+ """Load metadata for all repositories in the workspace."""
106
+ for repo_dir in self.workspace_path.iterdir():
107
+ if repo_dir.is_dir():
108
+ meta_file = repo_dir / "repository.json"
109
+ if meta_file.exists():
110
+ try:
111
+ with open(meta_file) as f:
112
+ data = json.load(f)
113
+ self._info[repo_dir.name] = RepositoryInfo.from_dict(data)
114
+ except Exception as e:
115
+ print(f"Warning: Failed to load metadata for {repo_dir.name}: {e}")
116
+
117
+ def _save_metadata(self, name: str) -> None:
118
+ """Save metadata for a repository."""
119
+ repo_dir = self.workspace_path / name
120
+ repo_dir.mkdir(parents=True, exist_ok=True)
121
+
122
+ info = self._info.get(name)
123
+ if info:
124
+ # Update stats if store is loaded
125
+ if name in self._stores:
126
+ stats = self._stores[name].stats()
127
+ info.triple_count = stats.get("total_assertions", 0)
128
+ info.subject_count = stats.get("unique_subjects", 0)
129
+ info.predicate_count = stats.get("unique_predicates", 0)
130
+
131
+ with open(repo_dir / "repository.json", "w") as f:
132
+ json.dump(info.to_dict(), f, indent=2)
133
+
134
+ def create(
135
+ self,
136
+ name: str,
137
+ description: str = "",
138
+ tags: Optional[list[str]] = None,
139
+ ) -> RepositoryInfo:
140
+ """
141
+ Create a new repository.
142
+
143
+ Args:
144
+ name: Unique repository name (alphanumeric + hyphens)
145
+ description: Human-readable description
146
+ tags: Optional tags for categorization
147
+
148
+ Returns:
149
+ RepositoryInfo for the new repository
150
+
151
+ Raises:
152
+ ValueError: If name is invalid or already exists
153
+ """
154
+ # Validate name
155
+ if not name:
156
+ raise ValueError("Repository name cannot be empty")
157
+ if not all(c.isalnum() or c in '-_' for c in name):
158
+ raise ValueError("Repository name can only contain alphanumeric characters, hyphens, and underscores")
159
+ if name in self._info:
160
+ raise ValueError(f"Repository '{name}' already exists")
161
+
162
+ # Create repository
163
+ info = RepositoryInfo(
164
+ name=name,
165
+ created_at=datetime.now(timezone.utc),
166
+ description=description,
167
+ tags=tags or [],
168
+ )
169
+
170
+ self._info[name] = info
171
+ self._stores[name] = TripleStore()
172
+
173
+ # Persist metadata
174
+ self._save_metadata(name)
175
+
176
+ return info
177
+
178
+ def delete(self, name: str, force: bool = False) -> bool:
179
+ """
180
+ Delete a repository.
181
+
182
+ Args:
183
+ name: Repository name
184
+ force: If True, delete even if repository has data
185
+
186
+ Returns:
187
+ True if deleted
188
+
189
+ Raises:
190
+ ValueError: If repository doesn't exist
191
+ ValueError: If repository has data and force=False
192
+ """
193
+ if name not in self._info:
194
+ raise ValueError(f"Repository '{name}' does not exist")
195
+
196
+ # Check if repository has data
197
+ store = self.get_store(name)
198
+ stats = store.stats()
199
+ if stats.get("total_assertions", 0) > 0 and not force:
200
+ raise ValueError(
201
+ f"Repository '{name}' contains {stats['total_assertions']} assertions. "
202
+ "Use force=True to delete anyway."
203
+ )
204
+
205
+ # Remove from memory
206
+ self._stores.pop(name, None)
207
+ self._info.pop(name, None)
208
+
209
+ # Remove from disk
210
+ repo_dir = self.workspace_path / name
211
+ if repo_dir.exists():
212
+ shutil.rmtree(repo_dir)
213
+
214
+ return True
215
+
216
+ def get_store(self, name: str) -> TripleStore:
217
+ """
218
+ Get the TripleStore for a repository.
219
+
220
+ Loads from disk if not already in memory.
221
+
222
+ Args:
223
+ name: Repository name
224
+
225
+ Returns:
226
+ TripleStore instance
227
+
228
+ Raises:
229
+ ValueError: If repository doesn't exist
230
+ """
231
+ if name not in self._info:
232
+ raise ValueError(f"Repository '{name}' does not exist")
233
+
234
+ if name not in self._stores:
235
+ # Load from disk
236
+ repo_dir = self.workspace_path / name
237
+ store_file = repo_dir / "store.parquet"
238
+
239
+ if store_file.exists():
240
+ self._stores[name] = TripleStore.load(store_file)
241
+ else:
242
+ self._stores[name] = TripleStore()
243
+
244
+ return self._stores[name]
245
+
246
+ def get_info(self, name: str) -> RepositoryInfo:
247
+ """
248
+ Get metadata for a repository.
249
+
250
+ Args:
251
+ name: Repository name
252
+
253
+ Returns:
254
+ RepositoryInfo
255
+
256
+ Raises:
257
+ ValueError: If repository doesn't exist
258
+ """
259
+ if name not in self._info:
260
+ raise ValueError(f"Repository '{name}' does not exist")
261
+
262
+ info = self._info[name]
263
+
264
+ # Update stats if store is loaded
265
+ if name in self._stores:
266
+ stats = self._stores[name].stats()
267
+ info.triple_count = stats.get("total_assertions", 0)
268
+ info.subject_count = stats.get("unique_subjects", 0)
269
+ info.predicate_count = stats.get("unique_predicates", 0)
270
+
271
+ return info
272
+
273
+ def list_repositories(self) -> list[RepositoryInfo]:
274
+ """
275
+ List all repositories with their metadata.
276
+
277
+ Returns:
278
+ List of RepositoryInfo objects
279
+ """
280
+ result = []
281
+ for name, info in sorted(self._info.items()):
282
+ # Update stats if store is loaded
283
+ if name in self._stores:
284
+ stats = self._stores[name].stats()
285
+ info.triple_count = stats.get("total_assertions", 0)
286
+ info.subject_count = stats.get("unique_subjects", 0)
287
+ info.predicate_count = stats.get("unique_predicates", 0)
288
+ result.append(info)
289
+ return result
290
+
291
+ def save(self, name: str) -> None:
292
+ """
293
+ Persist a repository to disk.
294
+
295
+ Args:
296
+ name: Repository name
297
+
298
+ Raises:
299
+ ValueError: If repository doesn't exist or isn't loaded
300
+ """
301
+ if name not in self._info:
302
+ raise ValueError(f"Repository '{name}' does not exist")
303
+ if name not in self._stores:
304
+ # Nothing to save - not loaded
305
+ return
306
+
307
+ repo_dir = self.workspace_path / name
308
+ repo_dir.mkdir(parents=True, exist_ok=True)
309
+
310
+ # Save store
311
+ store_file = repo_dir / "store.parquet"
312
+ self._stores[name].save(store_file)
313
+
314
+ # Update metadata
315
+ self._save_metadata(name)
316
+
317
+ def save_all(self) -> None:
318
+ """Persist all loaded repositories to disk."""
319
+ for name in self._stores:
320
+ self.save(name)
321
+
322
+ def exists(self, name: str) -> bool:
323
+ """Check if a repository exists."""
324
+ return name in self._info
325
+
326
+ def unload(self, name: str) -> None:
327
+ """
328
+ Unload a repository from memory (after saving).
329
+
330
+ Useful for freeing memory when many repositories exist.
331
+
332
+ Args:
333
+ name: Repository name
334
+ """
335
+ if name in self._stores:
336
+ self.save(name)
337
+ del self._stores[name]
338
+
339
+ def rename(self, old_name: str, new_name: str) -> RepositoryInfo:
340
+ """
341
+ Rename a repository.
342
+
343
+ Args:
344
+ old_name: Current repository name
345
+ new_name: New repository name
346
+
347
+ Returns:
348
+ Updated RepositoryInfo
349
+ """
350
+ if old_name not in self._info:
351
+ raise ValueError(f"Repository '{old_name}' does not exist")
352
+ if new_name in self._info:
353
+ raise ValueError(f"Repository '{new_name}' already exists")
354
+ if not all(c.isalnum() or c in '-_' for c in new_name):
355
+ raise ValueError("Repository name can only contain alphanumeric characters, hyphens, and underscores")
356
+
357
+ # Save first
358
+ if old_name in self._stores:
359
+ self.save(old_name)
360
+
361
+ # Move directory
362
+ old_dir = self.workspace_path / old_name
363
+ new_dir = self.workspace_path / new_name
364
+ if old_dir.exists():
365
+ old_dir.rename(new_dir)
366
+
367
+ # Update in-memory state
368
+ info = self._info.pop(old_name)
369
+ info.name = new_name
370
+ self._info[new_name] = info
371
+
372
+ if old_name in self._stores:
373
+ self._stores[new_name] = self._stores.pop(old_name)
374
+
375
+ # Update metadata file
376
+ self._save_metadata(new_name)
377
+
378
+ return info
379
+
380
+ def update_info(
381
+ self,
382
+ name: str,
383
+ description: Optional[str] = None,
384
+ tags: Optional[list[str]] = None,
385
+ ) -> RepositoryInfo:
386
+ """
387
+ Update repository metadata.
388
+
389
+ Args:
390
+ name: Repository name
391
+ description: New description (if provided)
392
+ tags: New tags (if provided)
393
+
394
+ Returns:
395
+ Updated RepositoryInfo
396
+ """
397
+ if name not in self._info:
398
+ raise ValueError(f"Repository '{name}' does not exist")
399
+
400
+ info = self._info[name]
401
+ if description is not None:
402
+ info.description = description
403
+ if tags is not None:
404
+ info.tags = tags
405
+
406
+ self._save_metadata(name)
407
+ return info