rdf-starbase 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_starbase/__init__.py +57 -0
- rdf_starbase/ai_grounding.py +728 -0
- rdf_starbase/compat/__init__.py +26 -0
- rdf_starbase/compat/rdflib.py +1104 -0
- rdf_starbase/formats/__init__.py +29 -0
- rdf_starbase/formats/jsonld.py +488 -0
- rdf_starbase/formats/ntriples.py +419 -0
- rdf_starbase/formats/rdfxml.py +434 -0
- rdf_starbase/formats/turtle.py +882 -0
- rdf_starbase/models.py +92 -0
- rdf_starbase/registry.py +540 -0
- rdf_starbase/repositories.py +407 -0
- rdf_starbase/repository_api.py +739 -0
- rdf_starbase/sparql/__init__.py +35 -0
- rdf_starbase/sparql/ast.py +910 -0
- rdf_starbase/sparql/executor.py +1925 -0
- rdf_starbase/sparql/parser.py +1716 -0
- rdf_starbase/storage/__init__.py +44 -0
- rdf_starbase/storage/executor.py +1914 -0
- rdf_starbase/storage/facts.py +850 -0
- rdf_starbase/storage/lsm.py +531 -0
- rdf_starbase/storage/persistence.py +338 -0
- rdf_starbase/storage/quoted_triples.py +292 -0
- rdf_starbase/storage/reasoner.py +1035 -0
- rdf_starbase/storage/terms.py +628 -0
- rdf_starbase/store.py +1049 -0
- rdf_starbase/store_legacy.py +748 -0
- rdf_starbase/web.py +568 -0
- rdf_starbase-0.1.0.dist-info/METADATA +706 -0
- rdf_starbase-0.1.0.dist-info/RECORD +31 -0
- rdf_starbase-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Repository Manager for RDF-StarBase.
|
|
3
|
+
|
|
4
|
+
Manages multiple named TripleStore instances (repositories/projects).
|
|
5
|
+
Similar to how GraphDB or Neo4j manage multiple databases.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Create/delete named repositories
|
|
9
|
+
- Persist repositories to disk
|
|
10
|
+
- Switch between repositories
|
|
11
|
+
- List all repositories with metadata
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional, Dict, Any
|
|
18
|
+
import json
|
|
19
|
+
import shutil
|
|
20
|
+
|
|
21
|
+
from rdf_starbase.store import TripleStore
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class RepositoryInfo:
|
|
26
|
+
"""Metadata about a repository."""
|
|
27
|
+
name: str
|
|
28
|
+
created_at: datetime
|
|
29
|
+
description: str = ""
|
|
30
|
+
tags: list[str] = field(default_factory=list)
|
|
31
|
+
|
|
32
|
+
# Stats (populated on demand)
|
|
33
|
+
triple_count: int = 0
|
|
34
|
+
subject_count: int = 0
|
|
35
|
+
predicate_count: int = 0
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> dict:
|
|
38
|
+
return {
|
|
39
|
+
"name": self.name,
|
|
40
|
+
"created_at": self.created_at.isoformat(),
|
|
41
|
+
"description": self.description,
|
|
42
|
+
"tags": self.tags,
|
|
43
|
+
"triple_count": self.triple_count,
|
|
44
|
+
"subject_count": self.subject_count,
|
|
45
|
+
"predicate_count": self.predicate_count,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_dict(cls, data: dict) -> "RepositoryInfo":
|
|
50
|
+
return cls(
|
|
51
|
+
name=data["name"],
|
|
52
|
+
created_at=datetime.fromisoformat(data["created_at"]),
|
|
53
|
+
description=data.get("description", ""),
|
|
54
|
+
tags=data.get("tags", []),
|
|
55
|
+
triple_count=data.get("triple_count", 0),
|
|
56
|
+
subject_count=data.get("subject_count", 0),
|
|
57
|
+
predicate_count=data.get("predicate_count", 0),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class RepositoryManager:
|
|
62
|
+
"""
|
|
63
|
+
Manages multiple named TripleStore repositories.
|
|
64
|
+
|
|
65
|
+
Provides:
|
|
66
|
+
- CRUD operations for repositories
|
|
67
|
+
- Persistence to a workspace directory
|
|
68
|
+
- In-memory caching of active repositories
|
|
69
|
+
|
|
70
|
+
Usage:
|
|
71
|
+
manager = RepositoryManager("./data/repositories")
|
|
72
|
+
|
|
73
|
+
# Create a new repository
|
|
74
|
+
manager.create("my-project", description="Test project")
|
|
75
|
+
|
|
76
|
+
# Get the store for a repository
|
|
77
|
+
store = manager.get_store("my-project")
|
|
78
|
+
store.add_triple(...)
|
|
79
|
+
|
|
80
|
+
# List all repositories
|
|
81
|
+
repos = manager.list_repositories()
|
|
82
|
+
|
|
83
|
+
# Persist changes
|
|
84
|
+
manager.save("my-project")
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def __init__(self, workspace_path: str | Path):
|
|
88
|
+
"""
|
|
89
|
+
Initialize the repository manager.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
workspace_path: Directory to store all repositories
|
|
93
|
+
"""
|
|
94
|
+
self.workspace_path = Path(workspace_path)
|
|
95
|
+
self.workspace_path.mkdir(parents=True, exist_ok=True)
|
|
96
|
+
|
|
97
|
+
# In-memory cache of loaded repositories
|
|
98
|
+
self._stores: Dict[str, TripleStore] = {}
|
|
99
|
+
self._info: Dict[str, RepositoryInfo] = {}
|
|
100
|
+
|
|
101
|
+
# Load existing repository metadata
|
|
102
|
+
self._load_metadata()
|
|
103
|
+
|
|
104
|
+
def _load_metadata(self) -> None:
|
|
105
|
+
"""Load metadata for all repositories in the workspace."""
|
|
106
|
+
for repo_dir in self.workspace_path.iterdir():
|
|
107
|
+
if repo_dir.is_dir():
|
|
108
|
+
meta_file = repo_dir / "repository.json"
|
|
109
|
+
if meta_file.exists():
|
|
110
|
+
try:
|
|
111
|
+
with open(meta_file) as f:
|
|
112
|
+
data = json.load(f)
|
|
113
|
+
self._info[repo_dir.name] = RepositoryInfo.from_dict(data)
|
|
114
|
+
except Exception as e:
|
|
115
|
+
print(f"Warning: Failed to load metadata for {repo_dir.name}: {e}")
|
|
116
|
+
|
|
117
|
+
def _save_metadata(self, name: str) -> None:
|
|
118
|
+
"""Save metadata for a repository."""
|
|
119
|
+
repo_dir = self.workspace_path / name
|
|
120
|
+
repo_dir.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
|
|
122
|
+
info = self._info.get(name)
|
|
123
|
+
if info:
|
|
124
|
+
# Update stats if store is loaded
|
|
125
|
+
if name in self._stores:
|
|
126
|
+
stats = self._stores[name].stats()
|
|
127
|
+
info.triple_count = stats.get("total_assertions", 0)
|
|
128
|
+
info.subject_count = stats.get("unique_subjects", 0)
|
|
129
|
+
info.predicate_count = stats.get("unique_predicates", 0)
|
|
130
|
+
|
|
131
|
+
with open(repo_dir / "repository.json", "w") as f:
|
|
132
|
+
json.dump(info.to_dict(), f, indent=2)
|
|
133
|
+
|
|
134
|
+
def create(
|
|
135
|
+
self,
|
|
136
|
+
name: str,
|
|
137
|
+
description: str = "",
|
|
138
|
+
tags: Optional[list[str]] = None,
|
|
139
|
+
) -> RepositoryInfo:
|
|
140
|
+
"""
|
|
141
|
+
Create a new repository.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
name: Unique repository name (alphanumeric + hyphens)
|
|
145
|
+
description: Human-readable description
|
|
146
|
+
tags: Optional tags for categorization
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
RepositoryInfo for the new repository
|
|
150
|
+
|
|
151
|
+
Raises:
|
|
152
|
+
ValueError: If name is invalid or already exists
|
|
153
|
+
"""
|
|
154
|
+
# Validate name
|
|
155
|
+
if not name:
|
|
156
|
+
raise ValueError("Repository name cannot be empty")
|
|
157
|
+
if not all(c.isalnum() or c in '-_' for c in name):
|
|
158
|
+
raise ValueError("Repository name can only contain alphanumeric characters, hyphens, and underscores")
|
|
159
|
+
if name in self._info:
|
|
160
|
+
raise ValueError(f"Repository '{name}' already exists")
|
|
161
|
+
|
|
162
|
+
# Create repository
|
|
163
|
+
info = RepositoryInfo(
|
|
164
|
+
name=name,
|
|
165
|
+
created_at=datetime.now(timezone.utc),
|
|
166
|
+
description=description,
|
|
167
|
+
tags=tags or [],
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
self._info[name] = info
|
|
171
|
+
self._stores[name] = TripleStore()
|
|
172
|
+
|
|
173
|
+
# Persist metadata
|
|
174
|
+
self._save_metadata(name)
|
|
175
|
+
|
|
176
|
+
return info
|
|
177
|
+
|
|
178
|
+
def delete(self, name: str, force: bool = False) -> bool:
|
|
179
|
+
"""
|
|
180
|
+
Delete a repository.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
name: Repository name
|
|
184
|
+
force: If True, delete even if repository has data
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
True if deleted
|
|
188
|
+
|
|
189
|
+
Raises:
|
|
190
|
+
ValueError: If repository doesn't exist
|
|
191
|
+
ValueError: If repository has data and force=False
|
|
192
|
+
"""
|
|
193
|
+
if name not in self._info:
|
|
194
|
+
raise ValueError(f"Repository '{name}' does not exist")
|
|
195
|
+
|
|
196
|
+
# Check if repository has data
|
|
197
|
+
store = self.get_store(name)
|
|
198
|
+
stats = store.stats()
|
|
199
|
+
if stats.get("total_assertions", 0) > 0 and not force:
|
|
200
|
+
raise ValueError(
|
|
201
|
+
f"Repository '{name}' contains {stats['total_assertions']} assertions. "
|
|
202
|
+
"Use force=True to delete anyway."
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Remove from memory
|
|
206
|
+
self._stores.pop(name, None)
|
|
207
|
+
self._info.pop(name, None)
|
|
208
|
+
|
|
209
|
+
# Remove from disk
|
|
210
|
+
repo_dir = self.workspace_path / name
|
|
211
|
+
if repo_dir.exists():
|
|
212
|
+
shutil.rmtree(repo_dir)
|
|
213
|
+
|
|
214
|
+
return True
|
|
215
|
+
|
|
216
|
+
def get_store(self, name: str) -> TripleStore:
|
|
217
|
+
"""
|
|
218
|
+
Get the TripleStore for a repository.
|
|
219
|
+
|
|
220
|
+
Loads from disk if not already in memory.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
name: Repository name
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
TripleStore instance
|
|
227
|
+
|
|
228
|
+
Raises:
|
|
229
|
+
ValueError: If repository doesn't exist
|
|
230
|
+
"""
|
|
231
|
+
if name not in self._info:
|
|
232
|
+
raise ValueError(f"Repository '{name}' does not exist")
|
|
233
|
+
|
|
234
|
+
if name not in self._stores:
|
|
235
|
+
# Load from disk
|
|
236
|
+
repo_dir = self.workspace_path / name
|
|
237
|
+
store_file = repo_dir / "store.parquet"
|
|
238
|
+
|
|
239
|
+
if store_file.exists():
|
|
240
|
+
self._stores[name] = TripleStore.load(store_file)
|
|
241
|
+
else:
|
|
242
|
+
self._stores[name] = TripleStore()
|
|
243
|
+
|
|
244
|
+
return self._stores[name]
|
|
245
|
+
|
|
246
|
+
def get_info(self, name: str) -> RepositoryInfo:
|
|
247
|
+
"""
|
|
248
|
+
Get metadata for a repository.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
name: Repository name
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
RepositoryInfo
|
|
255
|
+
|
|
256
|
+
Raises:
|
|
257
|
+
ValueError: If repository doesn't exist
|
|
258
|
+
"""
|
|
259
|
+
if name not in self._info:
|
|
260
|
+
raise ValueError(f"Repository '{name}' does not exist")
|
|
261
|
+
|
|
262
|
+
info = self._info[name]
|
|
263
|
+
|
|
264
|
+
# Update stats if store is loaded
|
|
265
|
+
if name in self._stores:
|
|
266
|
+
stats = self._stores[name].stats()
|
|
267
|
+
info.triple_count = stats.get("total_assertions", 0)
|
|
268
|
+
info.subject_count = stats.get("unique_subjects", 0)
|
|
269
|
+
info.predicate_count = stats.get("unique_predicates", 0)
|
|
270
|
+
|
|
271
|
+
return info
|
|
272
|
+
|
|
273
|
+
def list_repositories(self) -> list[RepositoryInfo]:
|
|
274
|
+
"""
|
|
275
|
+
List all repositories with their metadata.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
List of RepositoryInfo objects
|
|
279
|
+
"""
|
|
280
|
+
result = []
|
|
281
|
+
for name, info in sorted(self._info.items()):
|
|
282
|
+
# Update stats if store is loaded
|
|
283
|
+
if name in self._stores:
|
|
284
|
+
stats = self._stores[name].stats()
|
|
285
|
+
info.triple_count = stats.get("total_assertions", 0)
|
|
286
|
+
info.subject_count = stats.get("unique_subjects", 0)
|
|
287
|
+
info.predicate_count = stats.get("unique_predicates", 0)
|
|
288
|
+
result.append(info)
|
|
289
|
+
return result
|
|
290
|
+
|
|
291
|
+
def save(self, name: str) -> None:
|
|
292
|
+
"""
|
|
293
|
+
Persist a repository to disk.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
name: Repository name
|
|
297
|
+
|
|
298
|
+
Raises:
|
|
299
|
+
ValueError: If repository doesn't exist or isn't loaded
|
|
300
|
+
"""
|
|
301
|
+
if name not in self._info:
|
|
302
|
+
raise ValueError(f"Repository '{name}' does not exist")
|
|
303
|
+
if name not in self._stores:
|
|
304
|
+
# Nothing to save - not loaded
|
|
305
|
+
return
|
|
306
|
+
|
|
307
|
+
repo_dir = self.workspace_path / name
|
|
308
|
+
repo_dir.mkdir(parents=True, exist_ok=True)
|
|
309
|
+
|
|
310
|
+
# Save store
|
|
311
|
+
store_file = repo_dir / "store.parquet"
|
|
312
|
+
self._stores[name].save(store_file)
|
|
313
|
+
|
|
314
|
+
# Update metadata
|
|
315
|
+
self._save_metadata(name)
|
|
316
|
+
|
|
317
|
+
def save_all(self) -> None:
|
|
318
|
+
"""Persist all loaded repositories to disk."""
|
|
319
|
+
for name in self._stores:
|
|
320
|
+
self.save(name)
|
|
321
|
+
|
|
322
|
+
def exists(self, name: str) -> bool:
|
|
323
|
+
"""Check if a repository exists."""
|
|
324
|
+
return name in self._info
|
|
325
|
+
|
|
326
|
+
def unload(self, name: str) -> None:
|
|
327
|
+
"""
|
|
328
|
+
Unload a repository from memory (after saving).
|
|
329
|
+
|
|
330
|
+
Useful for freeing memory when many repositories exist.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
name: Repository name
|
|
334
|
+
"""
|
|
335
|
+
if name in self._stores:
|
|
336
|
+
self.save(name)
|
|
337
|
+
del self._stores[name]
|
|
338
|
+
|
|
339
|
+
def rename(self, old_name: str, new_name: str) -> RepositoryInfo:
|
|
340
|
+
"""
|
|
341
|
+
Rename a repository.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
old_name: Current repository name
|
|
345
|
+
new_name: New repository name
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
Updated RepositoryInfo
|
|
349
|
+
"""
|
|
350
|
+
if old_name not in self._info:
|
|
351
|
+
raise ValueError(f"Repository '{old_name}' does not exist")
|
|
352
|
+
if new_name in self._info:
|
|
353
|
+
raise ValueError(f"Repository '{new_name}' already exists")
|
|
354
|
+
if not all(c.isalnum() or c in '-_' for c in new_name):
|
|
355
|
+
raise ValueError("Repository name can only contain alphanumeric characters, hyphens, and underscores")
|
|
356
|
+
|
|
357
|
+
# Save first
|
|
358
|
+
if old_name in self._stores:
|
|
359
|
+
self.save(old_name)
|
|
360
|
+
|
|
361
|
+
# Move directory
|
|
362
|
+
old_dir = self.workspace_path / old_name
|
|
363
|
+
new_dir = self.workspace_path / new_name
|
|
364
|
+
if old_dir.exists():
|
|
365
|
+
old_dir.rename(new_dir)
|
|
366
|
+
|
|
367
|
+
# Update in-memory state
|
|
368
|
+
info = self._info.pop(old_name)
|
|
369
|
+
info.name = new_name
|
|
370
|
+
self._info[new_name] = info
|
|
371
|
+
|
|
372
|
+
if old_name in self._stores:
|
|
373
|
+
self._stores[new_name] = self._stores.pop(old_name)
|
|
374
|
+
|
|
375
|
+
# Update metadata file
|
|
376
|
+
self._save_metadata(new_name)
|
|
377
|
+
|
|
378
|
+
return info
|
|
379
|
+
|
|
380
|
+
def update_info(
|
|
381
|
+
self,
|
|
382
|
+
name: str,
|
|
383
|
+
description: Optional[str] = None,
|
|
384
|
+
tags: Optional[list[str]] = None,
|
|
385
|
+
) -> RepositoryInfo:
|
|
386
|
+
"""
|
|
387
|
+
Update repository metadata.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
name: Repository name
|
|
391
|
+
description: New description (if provided)
|
|
392
|
+
tags: New tags (if provided)
|
|
393
|
+
|
|
394
|
+
Returns:
|
|
395
|
+
Updated RepositoryInfo
|
|
396
|
+
"""
|
|
397
|
+
if name not in self._info:
|
|
398
|
+
raise ValueError(f"Repository '{name}' does not exist")
|
|
399
|
+
|
|
400
|
+
info = self._info[name]
|
|
401
|
+
if description is not None:
|
|
402
|
+
info.description = description
|
|
403
|
+
if tags is not None:
|
|
404
|
+
info.tags = tags
|
|
405
|
+
|
|
406
|
+
self._save_metadata(name)
|
|
407
|
+
return info
|