rdf-starbase 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rdf_starbase/web.py ADDED
@@ -0,0 +1,568 @@
1
+ """
2
+ RDF-StarBase Web API
3
+
4
+ FastAPI-based REST API for querying and managing the knowledge graph.
5
+ Provides endpoints for:
6
+ - SPARQL queries
7
+ - Triple management
8
+ - Provenance inspection
9
+ - Competing claims analysis
10
+ - Source registry
11
+ """
12
+
13
+ from datetime import datetime
14
+ from typing import Any, Optional, Union
15
+ from uuid import UUID
16
+ import json
17
+
18
+ from fastapi import FastAPI, HTTPException, Query
19
+ from fastapi.middleware.cors import CORSMiddleware
20
+ from fastapi.responses import JSONResponse
21
+ from pydantic import BaseModel, Field
22
+ import polars as pl
23
+
24
+ from rdf_starbase import (
25
+ TripleStore,
26
+ ProvenanceContext,
27
+ AssertionRegistry,
28
+ SourceType,
29
+ SourceStatus,
30
+ execute_sparql,
31
+ parse_query,
32
+ )
33
+ from rdf_starbase.ai_grounding import create_ai_router
34
+ from rdf_starbase.repository_api import create_repository_router
35
+
36
+
37
+ # Pydantic models for API
38
+ class ProvenanceInput(BaseModel):
39
+ """Provenance context for adding triples."""
40
+ source: str = Field(..., description="Source system or person")
41
+ confidence: float = Field(default=1.0, ge=0.0, le=1.0)
42
+ process: Optional[str] = None
43
+
44
+ def to_context(self) -> ProvenanceContext:
45
+ return ProvenanceContext(
46
+ source=self.source,
47
+ confidence=self.confidence,
48
+ process=self.process,
49
+ )
50
+
51
+
52
+ class TripleInput(BaseModel):
53
+ """Input for adding a triple."""
54
+ subject: str
55
+ predicate: str
56
+ object: Union[str, int, float, bool]
57
+ provenance: ProvenanceInput
58
+ graph: Optional[str] = None
59
+
60
+
61
+ class BatchTripleInput(BaseModel):
62
+ """Input for batch adding triples."""
63
+ triples: list[dict] = Field(..., description="List of triple dicts with subject, predicate, object, source, confidence, process")
64
+
65
+
66
+ class SPARQLQuery(BaseModel):
67
+ """SPARQL query request."""
68
+ query: str = Field(..., description="SPARQL-Star query string")
69
+
70
+
71
+ class SourceInput(BaseModel):
72
+ """Input for registering a source."""
73
+ name: str
74
+ source_type: str = Field(..., description="One of: dataset, api, mapping, process, manual")
75
+ uri: Optional[str] = None
76
+ description: Optional[str] = None
77
+ owner: Optional[str] = None
78
+ sync_frequency: Optional[str] = None
79
+ tags: list[str] = Field(default_factory=list)
80
+
81
+
82
+ def dataframe_to_records(df: pl.DataFrame) -> list[dict[str, Any]]:
83
+ """Convert Polars DataFrame to list of dicts for JSON serialization."""
84
+ records = []
85
+ for row in df.iter_rows(named=True):
86
+ record = {}
87
+ for k, v in row.items():
88
+ if isinstance(v, datetime):
89
+ record[k] = v.isoformat()
90
+ elif v is None:
91
+ record[k] = None
92
+ else:
93
+ record[k] = v
94
+ records.append(record)
95
+ return records
96
+
97
+
98
+ def create_app(store: Optional[TripleStore] = None, registry: Optional[AssertionRegistry] = None) -> FastAPI:
99
+ """
100
+ Create the FastAPI application.
101
+
102
+ Args:
103
+ store: Optional TripleStore instance (creates new if not provided)
104
+ registry: Optional AssertionRegistry instance
105
+
106
+ Returns:
107
+ Configured FastAPI application
108
+ """
109
+ app = FastAPI(
110
+ title="RDF-StarBase API",
111
+ description="A blazingly fast RDF★ database with native provenance tracking",
112
+ version="0.1.0",
113
+ docs_url="/docs",
114
+ redoc_url="/redoc",
115
+ )
116
+
117
+ # Add CORS middleware
118
+ app.add_middleware(
119
+ CORSMiddleware,
120
+ allow_origins=["*"], # Configure appropriately for production
121
+ allow_credentials=True,
122
+ allow_methods=["*"],
123
+ allow_headers=["*"],
124
+ )
125
+
126
+ # State
127
+ app.state.store = store or TripleStore()
128
+ app.state.registry = registry or AssertionRegistry()
129
+
130
+ # Add Repository Management router
131
+ repo_router, repo_manager = create_repository_router()
132
+ app.include_router(repo_router)
133
+ app.state.repo_manager = repo_manager
134
+
135
+ # Add AI Grounding API router
136
+ ai_router = create_ai_router(app.state.store)
137
+ app.include_router(ai_router)
138
+
139
+ # ==========================================================================
140
+ # Health & Info
141
+ # ==========================================================================
142
+
143
+ @app.get("/", tags=["Info"])
144
+ async def root():
145
+ """API root with basic info."""
146
+ return {
147
+ "name": "RDF-StarBase",
148
+ "version": "0.1.0",
149
+ "description": "A blazingly fast RDF★ database with native provenance tracking",
150
+ "docs": "/docs",
151
+ }
152
+
153
+ @app.get("/health", tags=["Info"])
154
+ async def health():
155
+ """Health check endpoint."""
156
+ return {"status": "healthy"}
157
+
158
+ @app.get("/stats", tags=["Info"])
159
+ async def stats():
160
+ """Get store and registry statistics."""
161
+ return {
162
+ "store": app.state.store.stats(),
163
+ "registry": app.state.registry.get_stats(),
164
+ }
165
+
166
+ # ==========================================================================
167
+ # Triples
168
+ # ==========================================================================
169
+
170
+ @app.post("/triples", tags=["Triples"])
171
+ async def add_triple(triple: TripleInput):
172
+ """Add a triple with provenance to the store."""
173
+ try:
174
+ assertion_id = app.state.store.add_triple(
175
+ subject=triple.subject,
176
+ predicate=triple.predicate,
177
+ obj=triple.object,
178
+ provenance=triple.provenance.to_context(),
179
+ graph=triple.graph,
180
+ )
181
+ return {"assertion_id": str(assertion_id)}
182
+ except Exception as e:
183
+ raise HTTPException(status_code=400, detail=str(e))
184
+
185
+ @app.post("/triples/batch", tags=["Triples"])
186
+ async def add_triples_batch(batch: BatchTripleInput):
187
+ """
188
+ Add multiple triples in a single batch operation.
189
+
190
+ This is MUCH faster than calling POST /triples repeatedly.
191
+ Each triple dict should have: subject, predicate, object, source, confidence (optional), process (optional).
192
+ """
193
+ try:
194
+ count = app.state.store.add_triples_batch(batch.triples)
195
+ return {
196
+ "success": True,
197
+ "count": count,
198
+ "message": f"Added {count} triples",
199
+ }
200
+ except Exception as e:
201
+ raise HTTPException(status_code=400, detail=str(e))
202
+
203
+ @app.get("/triples", tags=["Triples"])
204
+ async def get_triples(
205
+ subject: Optional[str] = Query(None, description="Filter by subject"),
206
+ predicate: Optional[str] = Query(None, description="Filter by predicate"),
207
+ object: Optional[str] = Query(None, description="Filter by object"),
208
+ source: Optional[str] = Query(None, description="Filter by source"),
209
+ min_confidence: Optional[float] = Query(None, ge=0, le=1, description="Minimum confidence"),
210
+ limit: int = Query(100, ge=1, le=10000, description="Maximum results"),
211
+ ):
212
+ """Query triples with optional filters."""
213
+ df = app.state.store.get_triples(
214
+ subject=subject,
215
+ predicate=predicate,
216
+ obj=object,
217
+ source=source,
218
+ min_confidence=min_confidence,
219
+ )
220
+
221
+ df = df.head(limit)
222
+
223
+ return {
224
+ "count": len(df),
225
+ "triples": dataframe_to_records(df),
226
+ }
227
+
228
+ @app.get("/triples/{subject_encoded:path}/claims", tags=["Triples"])
229
+ async def get_competing_claims(
230
+ subject_encoded: str,
231
+ predicate: str = Query(..., description="Predicate to check for conflicts"),
232
+ ):
233
+ """Get competing claims for a subject-predicate pair."""
234
+ # URL decode the subject
235
+ import urllib.parse
236
+ subject = urllib.parse.unquote(subject_encoded)
237
+
238
+ df = app.state.store.get_competing_claims(subject, predicate)
239
+
240
+ if len(df) == 0:
241
+ return {"count": 0, "has_conflicts": False, "claims": []}
242
+
243
+ unique_values = df["object"].n_unique()
244
+
245
+ return {
246
+ "count": len(df),
247
+ "has_conflicts": unique_values > 1,
248
+ "unique_values": unique_values,
249
+ "claims": dataframe_to_records(df),
250
+ }
251
+
252
+ @app.get("/triples/{subject_encoded:path}/timeline", tags=["Triples"])
253
+ async def get_provenance_timeline(
254
+ subject_encoded: str,
255
+ predicate: str = Query(..., description="Predicate for timeline"),
256
+ ):
257
+ """Get provenance timeline for a subject-predicate pair."""
258
+ import urllib.parse
259
+ subject = urllib.parse.unquote(subject_encoded)
260
+
261
+ df = app.state.store.get_provenance_timeline(subject, predicate)
262
+
263
+ return {
264
+ "count": len(df),
265
+ "timeline": dataframe_to_records(df),
266
+ }
267
+
268
+ # ==========================================================================
269
+ # SPARQL
270
+ # ==========================================================================
271
+
272
+ @app.post("/sparql", tags=["SPARQL"])
273
+ async def execute_sparql_query(request: SPARQLQuery):
274
+ """Execute a SPARQL-Star query (SELECT, ASK, INSERT DATA, DELETE DATA)."""
275
+ try:
276
+ result = execute_sparql(app.state.store, request.query)
277
+
278
+ if isinstance(result, bool):
279
+ # ASK query
280
+ return {"type": "ask", "result": result}
281
+ elif isinstance(result, dict):
282
+ # UPDATE operation (INSERT DATA, DELETE DATA)
283
+ return {
284
+ "type": "update",
285
+ "operation": result.get("operation", "unknown"),
286
+ "count": result.get("count", 0),
287
+ "success": True,
288
+ }
289
+ elif isinstance(result, pl.DataFrame):
290
+ # SELECT query
291
+ return {
292
+ "type": "select",
293
+ "count": len(result),
294
+ "columns": result.columns,
295
+ "results": dataframe_to_records(result),
296
+ }
297
+ else:
298
+ return {"type": "unknown", "result": str(result)}
299
+
300
+ except Exception as e:
301
+ raise HTTPException(status_code=400, detail=f"Query error: {str(e)}")
302
+
303
+ @app.post("/sparql/update", tags=["SPARQL"])
304
+ async def execute_sparql_update(request: SPARQLQuery):
305
+ """Execute a SPARQL UPDATE operation (INSERT DATA, DELETE DATA).
306
+
307
+ Supports provenance headers:
308
+ - X-Provenance-Source: Source identifier (default: SPARQL_INSERT)
309
+ - X-Provenance-Confidence: Confidence score 0.0-1.0 (default: 1.0)
310
+ - X-Provenance-Process: Process identifier (optional)
311
+ """
312
+ try:
313
+ # For now, use default provenance
314
+ # TODO: Extract from headers
315
+ from rdf_starbase.models import ProvenanceContext
316
+ provenance = ProvenanceContext(source="SPARQL_UPDATE", confidence=1.0)
317
+
318
+ result = execute_sparql(app.state.store, request.query, provenance)
319
+
320
+ if isinstance(result, dict):
321
+ return {
322
+ "type": "update",
323
+ "operation": result.get("operation", "unknown"),
324
+ "count": result.get("count", 0),
325
+ "success": result.get("status") != "not_implemented",
326
+ "message": f"Processed {result.get('count', 0)} triples",
327
+ }
328
+ else:
329
+ raise HTTPException(
330
+ status_code=400,
331
+ detail="Expected an UPDATE operation (INSERT DATA, DELETE DATA)"
332
+ )
333
+
334
+ except HTTPException:
335
+ raise
336
+ except Exception as e:
337
+ raise HTTPException(status_code=400, detail=f"Update error: {str(e)}")
338
+
339
+ except Exception as e:
340
+ raise HTTPException(status_code=400, detail=f"Query error: {str(e)}")
341
+
342
+ @app.post("/sparql/parse", tags=["SPARQL"])
343
+ async def parse_sparql(request: SPARQLQuery):
344
+ """Parse a SPARQL query and return the AST structure."""
345
+ try:
346
+ ast = parse_query(request.query)
347
+
348
+ return {
349
+ "type": type(ast).__name__,
350
+ "prefixes": ast.prefixes,
351
+ "pattern_count": len(ast.where.patterns) if ast.where else 0,
352
+ "filter_count": len(ast.where.filters) if ast.where else 0,
353
+ }
354
+ except Exception as e:
355
+ raise HTTPException(status_code=400, detail=f"Parse error: {str(e)}")
356
+
357
+ # ==========================================================================
358
+ # Registry
359
+ # ==========================================================================
360
+
361
+ @app.post("/sources", tags=["Registry"])
362
+ async def register_source(source: SourceInput):
363
+ """Register a new data source."""
364
+ try:
365
+ src_type = SourceType(source.source_type)
366
+ except ValueError:
367
+ raise HTTPException(
368
+ status_code=400,
369
+ detail=f"Invalid source_type. Must be one of: {[t.value for t in SourceType]}"
370
+ )
371
+
372
+ registered = app.state.registry.register_source(
373
+ name=source.name,
374
+ source_type=src_type,
375
+ uri=source.uri,
376
+ description=source.description,
377
+ owner=source.owner,
378
+ sync_frequency=source.sync_frequency,
379
+ tags=source.tags,
380
+ )
381
+
382
+ return {
383
+ "id": str(registered.id),
384
+ "name": registered.name,
385
+ "source_type": registered.source_type.value,
386
+ }
387
+
388
+ @app.get("/sources", tags=["Registry"])
389
+ async def get_sources(
390
+ source_type: Optional[str] = Query(None, description="Filter by type"),
391
+ owner: Optional[str] = Query(None, description="Filter by owner"),
392
+ tag: Optional[str] = Query(None, description="Filter by tag"),
393
+ ):
394
+ """List registered sources with optional filters."""
395
+ kwargs = {}
396
+
397
+ if source_type:
398
+ try:
399
+ kwargs["source_type"] = SourceType(source_type)
400
+ except ValueError:
401
+ raise HTTPException(status_code=400, detail=f"Invalid source_type: {source_type}")
402
+
403
+ if owner:
404
+ kwargs["owner"] = owner
405
+ if tag:
406
+ kwargs["tag"] = tag
407
+
408
+ sources = app.state.registry.get_sources(**kwargs)
409
+
410
+ return {
411
+ "count": len(sources),
412
+ "sources": [
413
+ {
414
+ "id": str(s.id),
415
+ "name": s.name,
416
+ "source_type": s.source_type.value,
417
+ "uri": s.uri,
418
+ "status": s.status.value,
419
+ "owner": s.owner,
420
+ "last_sync": s.last_sync.isoformat() if s.last_sync else None,
421
+ "tags": s.tags,
422
+ }
423
+ for s in sources
424
+ ],
425
+ }
426
+
427
+ @app.get("/sources/{source_id}", tags=["Registry"])
428
+ async def get_source(source_id: str):
429
+ """Get details of a specific source."""
430
+ try:
431
+ uid = UUID(source_id)
432
+ except ValueError:
433
+ raise HTTPException(status_code=400, detail="Invalid UUID format")
434
+
435
+ source = app.state.registry.get_source(uid)
436
+ if source is None:
437
+ raise HTTPException(status_code=404, detail="Source not found")
438
+
439
+ return {
440
+ "id": str(source.id),
441
+ "name": source.name,
442
+ "source_type": source.source_type.value,
443
+ "uri": source.uri,
444
+ "description": source.description,
445
+ "status": source.status.value,
446
+ "created_at": source.created_at.isoformat(),
447
+ "last_sync": source.last_sync.isoformat() if source.last_sync else None,
448
+ "owner": source.owner,
449
+ "sync_frequency": source.sync_frequency,
450
+ "tags": source.tags,
451
+ }
452
+
453
+ @app.get("/sources/{source_id}/syncs", tags=["Registry"])
454
+ async def get_sync_history(
455
+ source_id: str,
456
+ limit: int = Query(20, ge=1, le=100),
457
+ ):
458
+ """Get sync history for a source."""
459
+ try:
460
+ uid = UUID(source_id)
461
+ except ValueError:
462
+ raise HTTPException(status_code=400, detail="Invalid UUID format")
463
+
464
+ history = app.state.registry.get_sync_history(uid, limit=limit)
465
+
466
+ return {
467
+ "count": len(history),
468
+ "syncs": dataframe_to_records(history),
469
+ }
470
+
471
+ # ==========================================================================
472
+ # Graph Visualization Data
473
+ # ==========================================================================
474
+
475
+ @app.get("/graph/nodes", tags=["Visualization"])
476
+ async def get_graph_nodes(
477
+ limit: int = Query(100, ge=1, le=1000),
478
+ ):
479
+ """Get unique nodes (subjects and objects) for graph visualization."""
480
+ df = app.state.store._df
481
+
482
+ subjects = df["subject"].unique().to_list()[:limit]
483
+ objects = df.filter(
484
+ pl.col("object_type") == "uri"
485
+ )["object"].unique().to_list()[:limit]
486
+
487
+ all_nodes = list(set(subjects + objects))[:limit]
488
+
489
+ return {
490
+ "count": len(all_nodes),
491
+ "nodes": [{"id": n, "label": n.split("/")[-1]} for n in all_nodes],
492
+ }
493
+
494
+ @app.get("/graph/edges", tags=["Visualization"])
495
+ async def get_graph_edges(
496
+ limit: int = Query(500, ge=1, le=5000),
497
+ ):
498
+ """Get edges (triples) for graph visualization."""
499
+ df = app.state.store._df.head(limit)
500
+
501
+ # Only include edges where target is also a URI node (not literals)
502
+ # This makes the graph visualizable
503
+ df_uri_objects = df.filter(pl.col("object_type") == "uri")
504
+
505
+ edges = []
506
+ for row in df_uri_objects.iter_rows(named=True):
507
+ edges.append({
508
+ "source": row["subject"],
509
+ "target": row["object"],
510
+ "predicate": row["predicate"],
511
+ "label": row["predicate"].split("/")[-1],
512
+ "confidence": row["confidence"],
513
+ "provenance_source": row["source"],
514
+ })
515
+
516
+ return {
517
+ "count": len(edges),
518
+ "edges": edges,
519
+ }
520
+
521
+ @app.get("/graph/subgraph/{node_encoded:path}", tags=["Visualization"])
522
+ async def get_subgraph(
523
+ node_encoded: str,
524
+ depth: int = Query(1, ge=1, le=3, description="Traversal depth"),
525
+ ):
526
+ """Get subgraph around a specific node."""
527
+ import urllib.parse
528
+ node = urllib.parse.unquote(node_encoded)
529
+
530
+ # Get triples where node is subject or object
531
+ df = app.state.store._df
532
+
533
+ outgoing = df.filter(pl.col("subject") == node)
534
+ incoming = df.filter(pl.col("object") == node)
535
+
536
+ related = pl.concat([outgoing, incoming]).unique()
537
+
538
+ nodes = set()
539
+ edges = []
540
+
541
+ for row in related.iter_rows(named=True):
542
+ nodes.add(row["subject"])
543
+ # Only add object as a node if it's a URI, not a literal
544
+ if row["object_type"] == "uri":
545
+ nodes.add(row["object"])
546
+ edges.append({
547
+ "source": row["subject"],
548
+ "target": row["object"],
549
+ "predicate": row["predicate"],
550
+ "confidence": row["confidence"],
551
+ })
552
+
553
+ return {
554
+ "center": node,
555
+ "nodes": [{"id": n, "label": n.split("/")[-1] if "/" in n else n} for n in nodes],
556
+ "edges": edges,
557
+ }
558
+
559
+ return app
560
+
561
+
562
+ # Default app instance for running directly
563
+ app = create_app()
564
+
565
+
566
+ if __name__ == "__main__":
567
+ import uvicorn
568
+ uvicorn.run("rdf_starbase.web:app", host="0.0.0.0", port=8000, reload=True)