rdf-starbase 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_starbase/__init__.py +57 -0
- rdf_starbase/ai_grounding.py +728 -0
- rdf_starbase/compat/__init__.py +26 -0
- rdf_starbase/compat/rdflib.py +1104 -0
- rdf_starbase/formats/__init__.py +29 -0
- rdf_starbase/formats/jsonld.py +488 -0
- rdf_starbase/formats/ntriples.py +419 -0
- rdf_starbase/formats/rdfxml.py +434 -0
- rdf_starbase/formats/turtle.py +882 -0
- rdf_starbase/models.py +92 -0
- rdf_starbase/registry.py +540 -0
- rdf_starbase/repositories.py +407 -0
- rdf_starbase/repository_api.py +739 -0
- rdf_starbase/sparql/__init__.py +35 -0
- rdf_starbase/sparql/ast.py +910 -0
- rdf_starbase/sparql/executor.py +1925 -0
- rdf_starbase/sparql/parser.py +1716 -0
- rdf_starbase/storage/__init__.py +44 -0
- rdf_starbase/storage/executor.py +1914 -0
- rdf_starbase/storage/facts.py +850 -0
- rdf_starbase/storage/lsm.py +531 -0
- rdf_starbase/storage/persistence.py +338 -0
- rdf_starbase/storage/quoted_triples.py +292 -0
- rdf_starbase/storage/reasoner.py +1035 -0
- rdf_starbase/storage/terms.py +628 -0
- rdf_starbase/store.py +1049 -0
- rdf_starbase/store_legacy.py +748 -0
- rdf_starbase/web.py +568 -0
- rdf_starbase-0.1.0.dist-info/METADATA +706 -0
- rdf_starbase-0.1.0.dist-info/RECORD +31 -0
- rdf_starbase-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,728 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AI Grounding API
|
|
3
|
+
|
|
4
|
+
A specialized API layer designed for AI/LLM consumption, providing:
|
|
5
|
+
- Structured fact retrieval with provenance for RAG (Retrieval-Augmented Generation)
|
|
6
|
+
- Claim verification against the knowledge base
|
|
7
|
+
- Entity context with full provenance chain
|
|
8
|
+
- Inference materialization with attribution
|
|
9
|
+
|
|
10
|
+
This API is separate from the visualization API (/graph/*) because:
|
|
11
|
+
1. Different response formats (facts+citations vs nodes+edges)
|
|
12
|
+
2. Different filtering needs (confidence thresholds, freshness)
|
|
13
|
+
3. Different latency requirements (sub-100ms for tool calls)
|
|
14
|
+
4. Different auth model (API keys for agents vs sessions for users)
|
|
15
|
+
|
|
16
|
+
Endpoints:
|
|
17
|
+
- POST /ai/query - Structured fact retrieval for grounding
|
|
18
|
+
- POST /ai/verify - Verify if a claim is supported
|
|
19
|
+
- GET /ai/context/{iri} - All facts about an entity
|
|
20
|
+
- POST /ai/materialize - Trigger reasoning and persist inferences
|
|
21
|
+
- GET /ai/inferences - List materialized inferences
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from datetime import datetime, timedelta
|
|
25
|
+
from typing import Any, Optional, Union, List
|
|
26
|
+
from enum import Enum
|
|
27
|
+
|
|
28
|
+
from fastapi import APIRouter, HTTPException, Query, Depends
|
|
29
|
+
from pydantic import BaseModel, Field
|
|
30
|
+
import polars as pl
|
|
31
|
+
|
|
32
|
+
from rdf_starbase import TripleStore, execute_sparql
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# =============================================================================
|
|
36
|
+
# Pydantic Models for AI Grounding API
|
|
37
|
+
# =============================================================================
|
|
38
|
+
|
|
39
|
+
class ConfidenceLevel(str, Enum):
|
|
40
|
+
"""Pre-defined confidence thresholds for AI consumption."""
|
|
41
|
+
HIGH = "high" # >= 0.9
|
|
42
|
+
MEDIUM = "medium" # >= 0.7
|
|
43
|
+
LOW = "low" # >= 0.5
|
|
44
|
+
ANY = "any" # >= 0.0
|
|
45
|
+
|
|
46
|
+
def to_threshold(self) -> float:
|
|
47
|
+
return {
|
|
48
|
+
"high": 0.9,
|
|
49
|
+
"medium": 0.7,
|
|
50
|
+
"low": 0.5,
|
|
51
|
+
"any": 0.0,
|
|
52
|
+
}[self.value]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class FactWithProvenance(BaseModel):
|
|
56
|
+
"""A single fact with full provenance chain."""
|
|
57
|
+
subject: str
|
|
58
|
+
predicate: str
|
|
59
|
+
object: Union[str, int, float, bool]
|
|
60
|
+
source: str
|
|
61
|
+
confidence: float
|
|
62
|
+
timestamp: str
|
|
63
|
+
process: Optional[str] = None
|
|
64
|
+
is_inferred: bool = False
|
|
65
|
+
|
|
66
|
+
class Config:
|
|
67
|
+
json_schema_extra = {
|
|
68
|
+
"example": {
|
|
69
|
+
"subject": "http://example.org/customer/123",
|
|
70
|
+
"predicate": "http://xmlns.com/foaf/0.1/name",
|
|
71
|
+
"object": "Alice Johnson",
|
|
72
|
+
"source": "CRM_System",
|
|
73
|
+
"confidence": 0.95,
|
|
74
|
+
"timestamp": "2026-01-15T10:30:00Z",
|
|
75
|
+
"process": "api_sync",
|
|
76
|
+
"is_inferred": False,
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class Citation(BaseModel):
|
|
82
|
+
"""Citation information for attribution."""
|
|
83
|
+
fact_hash: str = Field(..., description="Unique identifier for the fact")
|
|
84
|
+
source: str = Field(..., description="Originating system or person")
|
|
85
|
+
confidence: float = Field(..., description="Confidence score 0.0-1.0")
|
|
86
|
+
timestamp: str = Field(..., description="When the assertion was made")
|
|
87
|
+
retrieval_time: str = Field(..., description="When this was retrieved")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class GroundedFact(BaseModel):
|
|
91
|
+
"""A fact with its citation for AI grounding."""
|
|
92
|
+
subject: str
|
|
93
|
+
predicate: str
|
|
94
|
+
object: Union[str, int, float, bool]
|
|
95
|
+
citation: Citation
|
|
96
|
+
|
|
97
|
+
def to_natural_language(self) -> str:
|
|
98
|
+
"""Convert to natural language assertion."""
|
|
99
|
+
pred_label = self.predicate.split("/")[-1].split("#")[-1]
|
|
100
|
+
return f"{self.subject} {pred_label} {self.object}"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# Request/Response Models
|
|
104
|
+
|
|
105
|
+
class AIQueryRequest(BaseModel):
|
|
106
|
+
"""Request for structured fact retrieval."""
|
|
107
|
+
subject: Optional[str] = Field(None, description="Filter by subject IRI")
|
|
108
|
+
predicate: Optional[str] = Field(None, description="Filter by predicate IRI")
|
|
109
|
+
object: Optional[str] = Field(None, description="Filter by object value")
|
|
110
|
+
sources: Optional[List[str]] = Field(None, description="Filter to specific sources")
|
|
111
|
+
min_confidence: Optional[ConfidenceLevel] = Field(
|
|
112
|
+
ConfidenceLevel.MEDIUM,
|
|
113
|
+
description="Minimum confidence threshold"
|
|
114
|
+
)
|
|
115
|
+
max_age_days: Optional[int] = Field(
|
|
116
|
+
None,
|
|
117
|
+
description="Maximum age of facts in days (freshness filter)"
|
|
118
|
+
)
|
|
119
|
+
include_inferred: bool = Field(
|
|
120
|
+
True,
|
|
121
|
+
description="Include inferred triples from reasoning"
|
|
122
|
+
)
|
|
123
|
+
limit: int = Field(100, ge=1, le=1000, description="Maximum results")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class AIQueryResponse(BaseModel):
|
|
127
|
+
"""Response with grounded facts for AI consumption."""
|
|
128
|
+
facts: List[GroundedFact]
|
|
129
|
+
total_count: int
|
|
130
|
+
filtered_count: int
|
|
131
|
+
confidence_threshold: float
|
|
132
|
+
retrieval_timestamp: str
|
|
133
|
+
sources_used: List[str]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class ClaimVerificationRequest(BaseModel):
|
|
137
|
+
"""Request to verify a claim against the knowledge base."""
|
|
138
|
+
subject: str = Field(..., description="Subject of the claim")
|
|
139
|
+
predicate: str = Field(..., description="Predicate of the claim")
|
|
140
|
+
expected_object: Optional[str] = Field(
|
|
141
|
+
None,
|
|
142
|
+
description="Expected object value (if checking specific value)"
|
|
143
|
+
)
|
|
144
|
+
min_confidence: ConfidenceLevel = Field(
|
|
145
|
+
ConfidenceLevel.MEDIUM,
|
|
146
|
+
description="Minimum confidence for supporting evidence"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class ClaimVerificationResponse(BaseModel):
|
|
151
|
+
"""Response indicating whether a claim is supported."""
|
|
152
|
+
claim_supported: bool = Field(..., description="Whether the claim is supported")
|
|
153
|
+
confidence: Optional[float] = Field(
|
|
154
|
+
None,
|
|
155
|
+
description="Confidence of the best supporting fact"
|
|
156
|
+
)
|
|
157
|
+
supporting_facts: List[GroundedFact] = Field(
|
|
158
|
+
default_factory=list,
|
|
159
|
+
description="Facts that support the claim"
|
|
160
|
+
)
|
|
161
|
+
contradicting_facts: List[GroundedFact] = Field(
|
|
162
|
+
default_factory=list,
|
|
163
|
+
description="Facts that contradict the claim"
|
|
164
|
+
)
|
|
165
|
+
has_conflicts: bool = Field(
|
|
166
|
+
False,
|
|
167
|
+
description="Whether there are conflicting assertions"
|
|
168
|
+
)
|
|
169
|
+
recommendation: str = Field(
|
|
170
|
+
...,
|
|
171
|
+
description="Recommendation for the AI on how to use this information"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class EntityContextResponse(BaseModel):
|
|
176
|
+
"""Full context about an entity for grounding."""
|
|
177
|
+
entity: str
|
|
178
|
+
facts: List[GroundedFact]
|
|
179
|
+
related_entities: List[str]
|
|
180
|
+
sources: List[str]
|
|
181
|
+
confidence_summary: dict
|
|
182
|
+
retrieval_timestamp: str
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class MaterializeRequest(BaseModel):
|
|
186
|
+
"""Request to materialize inferences."""
|
|
187
|
+
enable_rdfs: bool = Field(True, description="Apply RDFS entailment rules")
|
|
188
|
+
enable_owl: bool = Field(True, description="Apply OWL 2 RL entailment rules")
|
|
189
|
+
max_iterations: int = Field(100, ge=1, le=1000, description="Max reasoning iterations")
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class MaterializeResponse(BaseModel):
|
|
193
|
+
"""Response from materialization."""
|
|
194
|
+
success: bool
|
|
195
|
+
iterations: int
|
|
196
|
+
triples_inferred: int
|
|
197
|
+
rdfs_inferences: int
|
|
198
|
+
owl_inferences: int
|
|
199
|
+
breakdown: dict
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# =============================================================================
|
|
203
|
+
# Helper Functions
|
|
204
|
+
# =============================================================================
|
|
205
|
+
|
|
206
|
+
def dataframe_to_grounded_facts(
|
|
207
|
+
df: pl.DataFrame,
|
|
208
|
+
retrieval_time: datetime,
|
|
209
|
+
) -> List[GroundedFact]:
|
|
210
|
+
"""Convert DataFrame rows to GroundedFact objects."""
|
|
211
|
+
facts = []
|
|
212
|
+
|
|
213
|
+
for row in df.iter_rows(named=True):
|
|
214
|
+
# Create a unique hash for the fact
|
|
215
|
+
fact_hash = f"{row['subject']}|{row['predicate']}|{row['object']}|{row['source']}"
|
|
216
|
+
import hashlib
|
|
217
|
+
hash_id = hashlib.sha256(fact_hash.encode()).hexdigest()[:12]
|
|
218
|
+
|
|
219
|
+
timestamp = row.get("timestamp")
|
|
220
|
+
if isinstance(timestamp, datetime):
|
|
221
|
+
timestamp_str = timestamp.isoformat()
|
|
222
|
+
else:
|
|
223
|
+
timestamp_str = str(timestamp) if timestamp else retrieval_time.isoformat()
|
|
224
|
+
|
|
225
|
+
citation = Citation(
|
|
226
|
+
fact_hash=hash_id,
|
|
227
|
+
source=row.get("source", "unknown"),
|
|
228
|
+
confidence=row.get("confidence", 1.0),
|
|
229
|
+
timestamp=timestamp_str,
|
|
230
|
+
retrieval_time=retrieval_time.isoformat(),
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
facts.append(GroundedFact(
|
|
234
|
+
subject=row["subject"],
|
|
235
|
+
predicate=row["predicate"],
|
|
236
|
+
object=row["object"],
|
|
237
|
+
citation=citation,
|
|
238
|
+
))
|
|
239
|
+
|
|
240
|
+
return facts
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# =============================================================================
|
|
244
|
+
# AI Grounding Router
|
|
245
|
+
# =============================================================================
|
|
246
|
+
|
|
247
|
+
def create_ai_router(store: TripleStore) -> APIRouter:
|
|
248
|
+
"""
|
|
249
|
+
Create the AI Grounding API router.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
store: TripleStore instance to query
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
FastAPI APIRouter with AI grounding endpoints
|
|
256
|
+
"""
|
|
257
|
+
router = APIRouter(prefix="/ai", tags=["AI Grounding"])
|
|
258
|
+
|
|
259
|
+
# =========================================================================
|
|
260
|
+
# POST /ai/query - Structured Fact Retrieval
|
|
261
|
+
# =========================================================================
|
|
262
|
+
|
|
263
|
+
@router.post(
|
|
264
|
+
"/query",
|
|
265
|
+
response_model=AIQueryResponse,
|
|
266
|
+
summary="Query facts for AI grounding",
|
|
267
|
+
description="""
|
|
268
|
+
Retrieve facts from the knowledge base with provenance for AI grounding.
|
|
269
|
+
|
|
270
|
+
Use this endpoint when your AI needs to:
|
|
271
|
+
- Ground responses in verified facts
|
|
272
|
+
- Retrieve information with confidence scores
|
|
273
|
+
- Get citations for attribution
|
|
274
|
+
|
|
275
|
+
The response includes full provenance chains and citation information
|
|
276
|
+
suitable for RAG (Retrieval-Augmented Generation) pipelines.
|
|
277
|
+
""",
|
|
278
|
+
)
|
|
279
|
+
async def ai_query(request: AIQueryRequest) -> AIQueryResponse:
|
|
280
|
+
retrieval_time = datetime.utcnow()
|
|
281
|
+
|
|
282
|
+
# Build filters
|
|
283
|
+
confidence_threshold = request.min_confidence.to_threshold()
|
|
284
|
+
|
|
285
|
+
# Get triples with filters
|
|
286
|
+
df = store.get_triples(
|
|
287
|
+
subject=request.subject,
|
|
288
|
+
predicate=request.predicate,
|
|
289
|
+
obj=request.object,
|
|
290
|
+
min_confidence=confidence_threshold,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
total_count = len(df)
|
|
294
|
+
|
|
295
|
+
# Apply source filter
|
|
296
|
+
if request.sources:
|
|
297
|
+
df = df.filter(pl.col("source").is_in(request.sources))
|
|
298
|
+
|
|
299
|
+
# Apply freshness filter
|
|
300
|
+
if request.max_age_days:
|
|
301
|
+
cutoff = datetime.utcnow() - timedelta(days=request.max_age_days)
|
|
302
|
+
df = df.filter(pl.col("timestamp") >= cutoff)
|
|
303
|
+
|
|
304
|
+
# Filter out inferred if requested
|
|
305
|
+
if not request.include_inferred and "process" in df.columns:
|
|
306
|
+
df = df.filter(
|
|
307
|
+
(pl.col("process").is_null()) |
|
|
308
|
+
(pl.col("process") != "reasoner")
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Apply limit
|
|
312
|
+
df = df.head(request.limit)
|
|
313
|
+
|
|
314
|
+
# Convert to grounded facts
|
|
315
|
+
facts = dataframe_to_grounded_facts(df, retrieval_time)
|
|
316
|
+
|
|
317
|
+
# Get unique sources
|
|
318
|
+
sources_used = df["source"].unique().to_list() if len(df) > 0 else []
|
|
319
|
+
|
|
320
|
+
return AIQueryResponse(
|
|
321
|
+
facts=facts,
|
|
322
|
+
total_count=total_count,
|
|
323
|
+
filtered_count=len(facts),
|
|
324
|
+
confidence_threshold=confidence_threshold,
|
|
325
|
+
retrieval_timestamp=retrieval_time.isoformat(),
|
|
326
|
+
sources_used=sources_used,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# =========================================================================
|
|
330
|
+
# POST /ai/verify - Claim Verification
|
|
331
|
+
# =========================================================================
|
|
332
|
+
|
|
333
|
+
@router.post(
|
|
334
|
+
"/verify",
|
|
335
|
+
response_model=ClaimVerificationResponse,
|
|
336
|
+
summary="Verify a claim against the knowledge base",
|
|
337
|
+
description="""
|
|
338
|
+
Check if a claim is supported by the knowledge base.
|
|
339
|
+
|
|
340
|
+
Use this endpoint when your AI needs to:
|
|
341
|
+
- Verify a statement before including it in a response
|
|
342
|
+
- Check for contradictions in the knowledge base
|
|
343
|
+
- Get supporting evidence for a claim
|
|
344
|
+
|
|
345
|
+
The response indicates whether the claim is supported, provides
|
|
346
|
+
supporting/contradicting evidence, and gives a recommendation.
|
|
347
|
+
""",
|
|
348
|
+
)
|
|
349
|
+
async def ai_verify(request: ClaimVerificationRequest) -> ClaimVerificationResponse:
|
|
350
|
+
retrieval_time = datetime.utcnow()
|
|
351
|
+
confidence_threshold = request.min_confidence.to_threshold()
|
|
352
|
+
|
|
353
|
+
# Get all facts matching subject + predicate
|
|
354
|
+
df = store.get_triples(
|
|
355
|
+
subject=request.subject,
|
|
356
|
+
predicate=request.predicate,
|
|
357
|
+
min_confidence=confidence_threshold,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
if len(df) == 0:
|
|
361
|
+
return ClaimVerificationResponse(
|
|
362
|
+
claim_supported=False,
|
|
363
|
+
confidence=None,
|
|
364
|
+
supporting_facts=[],
|
|
365
|
+
contradicting_facts=[],
|
|
366
|
+
has_conflicts=False,
|
|
367
|
+
recommendation="No facts found for this subject-predicate pair. "
|
|
368
|
+
"The AI should not make claims about this topic or "
|
|
369
|
+
"clearly state that information is not available.",
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Convert all to grounded facts
|
|
373
|
+
all_facts = dataframe_to_grounded_facts(df, retrieval_time)
|
|
374
|
+
|
|
375
|
+
# Check if expected object matches
|
|
376
|
+
if request.expected_object:
|
|
377
|
+
supporting = []
|
|
378
|
+
contradicting = []
|
|
379
|
+
|
|
380
|
+
for fact in all_facts:
|
|
381
|
+
if str(fact.object) == str(request.expected_object):
|
|
382
|
+
supporting.append(fact)
|
|
383
|
+
else:
|
|
384
|
+
contradicting.append(fact)
|
|
385
|
+
|
|
386
|
+
has_conflicts = len(supporting) > 0 and len(contradicting) > 0
|
|
387
|
+
best_confidence = max(
|
|
388
|
+
(f.citation.confidence for f in supporting),
|
|
389
|
+
default=None
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
if supporting:
|
|
393
|
+
if has_conflicts:
|
|
394
|
+
recommendation = (
|
|
395
|
+
f"The claim is supported by {len(supporting)} source(s) but "
|
|
396
|
+
f"contradicted by {len(contradicting)} source(s). "
|
|
397
|
+
"The AI should present this as contested information with "
|
|
398
|
+
"sources for both perspectives."
|
|
399
|
+
)
|
|
400
|
+
else:
|
|
401
|
+
recommendation = (
|
|
402
|
+
f"The claim is supported by {len(supporting)} source(s) with "
|
|
403
|
+
f"confidence up to {best_confidence:.0%}. "
|
|
404
|
+
"The AI can confidently state this fact with attribution."
|
|
405
|
+
)
|
|
406
|
+
else:
|
|
407
|
+
recommendation = (
|
|
408
|
+
f"The claim is NOT supported. {len(contradicting)} source(s) "
|
|
409
|
+
"report different values. "
|
|
410
|
+
"The AI should NOT make this claim and instead report "
|
|
411
|
+
"what the knowledge base actually contains."
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
return ClaimVerificationResponse(
|
|
415
|
+
claim_supported=len(supporting) > 0,
|
|
416
|
+
confidence=best_confidence,
|
|
417
|
+
supporting_facts=supporting,
|
|
418
|
+
contradicting_facts=contradicting,
|
|
419
|
+
has_conflicts=has_conflicts,
|
|
420
|
+
recommendation=recommendation,
|
|
421
|
+
)
|
|
422
|
+
else:
|
|
423
|
+
# No specific value expected - just return what we have
|
|
424
|
+
best_confidence = max(
|
|
425
|
+
(f.citation.confidence for f in all_facts),
|
|
426
|
+
default=None
|
|
427
|
+
)
|
|
428
|
+
unique_values = len(set(str(f.object) for f in all_facts))
|
|
429
|
+
has_conflicts = unique_values > 1
|
|
430
|
+
|
|
431
|
+
if has_conflicts:
|
|
432
|
+
recommendation = (
|
|
433
|
+
f"Multiple values found ({unique_values} distinct) from different sources. "
|
|
434
|
+
"The AI should acknowledge the competing claims and cite sources."
|
|
435
|
+
)
|
|
436
|
+
else:
|
|
437
|
+
recommendation = (
|
|
438
|
+
f"Single consistent value found across {len(all_facts)} source(s). "
|
|
439
|
+
"The AI can state this fact with confidence."
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
return ClaimVerificationResponse(
|
|
443
|
+
claim_supported=True,
|
|
444
|
+
confidence=best_confidence,
|
|
445
|
+
supporting_facts=all_facts,
|
|
446
|
+
contradicting_facts=[],
|
|
447
|
+
has_conflicts=has_conflicts,
|
|
448
|
+
recommendation=recommendation,
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
# =========================================================================
|
|
452
|
+
# GET /ai/context/{iri} - Entity Context
|
|
453
|
+
# =========================================================================
|
|
454
|
+
|
|
455
|
+
@router.get(
|
|
456
|
+
"/context/{iri:path}",
|
|
457
|
+
response_model=EntityContextResponse,
|
|
458
|
+
summary="Get full context for an entity",
|
|
459
|
+
description="""
|
|
460
|
+
Retrieve all known facts about an entity with full provenance.
|
|
461
|
+
|
|
462
|
+
Use this endpoint when your AI needs to:
|
|
463
|
+
- Understand everything known about a specific entity
|
|
464
|
+
- Get a complete picture before answering questions
|
|
465
|
+
- Gather context for entity-centric responses
|
|
466
|
+
|
|
467
|
+
Returns all facts where the entity appears as subject or object,
|
|
468
|
+
along with confidence summaries and related entity links.
|
|
469
|
+
""",
|
|
470
|
+
)
|
|
471
|
+
async def ai_context(
|
|
472
|
+
iri: str,
|
|
473
|
+
min_confidence: ConfidenceLevel = Query(
|
|
474
|
+
ConfidenceLevel.LOW,
|
|
475
|
+
description="Minimum confidence threshold"
|
|
476
|
+
),
|
|
477
|
+
include_incoming: bool = Query(
|
|
478
|
+
True,
|
|
479
|
+
description="Include facts where entity is the object"
|
|
480
|
+
),
|
|
481
|
+
limit: int = Query(100, ge=1, le=500, description="Maximum facts to return"),
|
|
482
|
+
) -> EntityContextResponse:
|
|
483
|
+
import urllib.parse
|
|
484
|
+
entity = urllib.parse.unquote(iri)
|
|
485
|
+
retrieval_time = datetime.utcnow()
|
|
486
|
+
confidence_threshold = min_confidence.to_threshold()
|
|
487
|
+
|
|
488
|
+
# Get outgoing facts (entity as subject)
|
|
489
|
+
df_out = store.get_triples(
|
|
490
|
+
subject=entity,
|
|
491
|
+
min_confidence=confidence_threshold,
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
# Get incoming facts (entity as object) if requested
|
|
495
|
+
if include_incoming:
|
|
496
|
+
df_in = store.get_triples(
|
|
497
|
+
obj=entity,
|
|
498
|
+
min_confidence=confidence_threshold,
|
|
499
|
+
)
|
|
500
|
+
df = pl.concat([df_out, df_in]).unique()
|
|
501
|
+
else:
|
|
502
|
+
df = df_out
|
|
503
|
+
|
|
504
|
+
df = df.head(limit)
|
|
505
|
+
|
|
506
|
+
# Convert to grounded facts
|
|
507
|
+
facts = dataframe_to_grounded_facts(df, retrieval_time)
|
|
508
|
+
|
|
509
|
+
# Find related entities (other URIs in the facts)
|
|
510
|
+
related = set()
|
|
511
|
+
for fact in facts:
|
|
512
|
+
if fact.subject != entity and fact.subject.startswith("http"):
|
|
513
|
+
related.add(fact.subject)
|
|
514
|
+
obj_str = str(fact.object)
|
|
515
|
+
if obj_str != entity and obj_str.startswith("http"):
|
|
516
|
+
related.add(obj_str)
|
|
517
|
+
|
|
518
|
+
# Get unique sources
|
|
519
|
+
sources = list(set(f.citation.source for f in facts))
|
|
520
|
+
|
|
521
|
+
# Confidence summary
|
|
522
|
+
confidences = [f.citation.confidence for f in facts]
|
|
523
|
+
conf_summary = {
|
|
524
|
+
"min": min(confidences) if confidences else 0,
|
|
525
|
+
"max": max(confidences) if confidences else 0,
|
|
526
|
+
"avg": sum(confidences) / len(confidences) if confidences else 0,
|
|
527
|
+
"high_confidence_count": sum(1 for c in confidences if c >= 0.9),
|
|
528
|
+
"medium_confidence_count": sum(1 for c in confidences if 0.7 <= c < 0.9),
|
|
529
|
+
"low_confidence_count": sum(1 for c in confidences if c < 0.7),
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
return EntityContextResponse(
|
|
533
|
+
entity=entity,
|
|
534
|
+
facts=facts,
|
|
535
|
+
related_entities=list(related)[:20], # Limit related entities
|
|
536
|
+
sources=sources,
|
|
537
|
+
confidence_summary=conf_summary,
|
|
538
|
+
retrieval_timestamp=retrieval_time.isoformat(),
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
# =========================================================================
|
|
542
|
+
# POST /ai/materialize - Inference Materialization
|
|
543
|
+
# =========================================================================
|
|
544
|
+
|
|
545
|
+
@router.post(
|
|
546
|
+
"/materialize",
|
|
547
|
+
response_model=MaterializeResponse,
|
|
548
|
+
summary="Materialize inferences from reasoning",
|
|
549
|
+
description="""
|
|
550
|
+
Run the reasoning engine and persist inferred triples.
|
|
551
|
+
|
|
552
|
+
This executes RDFS and OWL 2 RL forward-chaining inference,
|
|
553
|
+
materializing entailments into the store with provenance:
|
|
554
|
+
- source: "reasoner"
|
|
555
|
+
- confidence: 1.0 (logical entailment)
|
|
556
|
+
- process: "inference_engine"
|
|
557
|
+
|
|
558
|
+
Materialized inferences can then be queried like any other facts,
|
|
559
|
+
with the `is_inferred` flag indicating their origin.
|
|
560
|
+
""",
|
|
561
|
+
)
|
|
562
|
+
async def ai_materialize(request: MaterializeRequest) -> MaterializeResponse:
|
|
563
|
+
try:
|
|
564
|
+
from rdf_starbase.storage.reasoner import RDFSReasoner
|
|
565
|
+
from rdf_starbase.storage.terms import TermDict
|
|
566
|
+
from rdf_starbase.storage.facts import FactStore
|
|
567
|
+
|
|
568
|
+
# Check if store uses new storage layer
|
|
569
|
+
if hasattr(store, "_term_dict") and hasattr(store, "_fact_store"):
|
|
570
|
+
reasoner = RDFSReasoner(
|
|
571
|
+
term_dict=store._term_dict,
|
|
572
|
+
fact_store=store._fact_store,
|
|
573
|
+
max_iterations=request.max_iterations,
|
|
574
|
+
enable_owl=request.enable_owl,
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
stats = reasoner.reason()
|
|
578
|
+
|
|
579
|
+
return MaterializeResponse(
|
|
580
|
+
success=True,
|
|
581
|
+
iterations=stats.iterations,
|
|
582
|
+
triples_inferred=stats.triples_inferred,
|
|
583
|
+
rdfs_inferences=(
|
|
584
|
+
stats.rdfs2_inferences + stats.rdfs3_inferences +
|
|
585
|
+
stats.rdfs5_inferences + stats.rdfs7_inferences +
|
|
586
|
+
stats.rdfs9_inferences + stats.rdfs11_inferences
|
|
587
|
+
),
|
|
588
|
+
owl_inferences=(
|
|
589
|
+
stats.owl_same_as_inferences +
|
|
590
|
+
stats.owl_equivalent_class_inferences +
|
|
591
|
+
stats.owl_equivalent_property_inferences +
|
|
592
|
+
stats.owl_inverse_of_inferences +
|
|
593
|
+
stats.owl_transitive_inferences +
|
|
594
|
+
stats.owl_symmetric_inferences +
|
|
595
|
+
stats.owl_functional_inferences +
|
|
596
|
+
stats.owl_inverse_functional_inferences +
|
|
597
|
+
stats.owl_has_value_inferences
|
|
598
|
+
),
|
|
599
|
+
breakdown={
|
|
600
|
+
"rdfs2_domain": stats.rdfs2_inferences,
|
|
601
|
+
"rdfs3_range": stats.rdfs3_inferences,
|
|
602
|
+
"rdfs5_subPropertyOf_transitivity": stats.rdfs5_inferences,
|
|
603
|
+
"rdfs7_property_inheritance": stats.rdfs7_inferences,
|
|
604
|
+
"rdfs9_type_inheritance": stats.rdfs9_inferences,
|
|
605
|
+
"rdfs11_subClassOf_transitivity": stats.rdfs11_inferences,
|
|
606
|
+
"owl_sameAs": stats.owl_same_as_inferences,
|
|
607
|
+
"owl_equivalentClass": stats.owl_equivalent_class_inferences,
|
|
608
|
+
"owl_equivalentProperty": stats.owl_equivalent_property_inferences,
|
|
609
|
+
"owl_inverseOf": stats.owl_inverse_of_inferences,
|
|
610
|
+
"owl_transitive": stats.owl_transitive_inferences,
|
|
611
|
+
"owl_symmetric": stats.owl_symmetric_inferences,
|
|
612
|
+
"owl_functional": stats.owl_functional_inferences,
|
|
613
|
+
"owl_inverseFunctional": stats.owl_inverse_functional_inferences,
|
|
614
|
+
"owl_hasValue": stats.owl_has_value_inferences,
|
|
615
|
+
},
|
|
616
|
+
)
|
|
617
|
+
else:
|
|
618
|
+
# Legacy store - reasoning not available
|
|
619
|
+
raise HTTPException(
|
|
620
|
+
status_code=501,
|
|
621
|
+
detail="Reasoning requires the new storage layer. "
|
|
622
|
+
"Use TripleStore with FactStore backend.",
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
except ImportError as e:
|
|
626
|
+
raise HTTPException(
|
|
627
|
+
status_code=501,
|
|
628
|
+
detail=f"Reasoning engine not available: {str(e)}",
|
|
629
|
+
)
|
|
630
|
+
except Exception as e:
|
|
631
|
+
raise HTTPException(
|
|
632
|
+
status_code=500,
|
|
633
|
+
detail=f"Materialization failed: {str(e)}",
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
# =========================================================================
|
|
637
|
+
# GET /ai/inferences - List Inferred Facts
|
|
638
|
+
# =========================================================================
|
|
639
|
+
|
|
640
|
+
@router.get(
|
|
641
|
+
"/inferences",
|
|
642
|
+
summary="List materialized inferences",
|
|
643
|
+
description="""
|
|
644
|
+
Get facts that were inferred by the reasoning engine.
|
|
645
|
+
|
|
646
|
+
These are triples that were not explicitly asserted but were
|
|
647
|
+
derived through RDFS/OWL entailment rules.
|
|
648
|
+
""",
|
|
649
|
+
)
|
|
650
|
+
async def ai_inferences(
|
|
651
|
+
limit: int = Query(100, ge=1, le=1000),
|
|
652
|
+
):
|
|
653
|
+
retrieval_time = datetime.utcnow()
|
|
654
|
+
|
|
655
|
+
# Get triples with process='reasoner' or source='reasoner'
|
|
656
|
+
df = store.get_triples()
|
|
657
|
+
|
|
658
|
+
# Filter for inferred triples
|
|
659
|
+
if "process" in df.columns:
|
|
660
|
+
df = df.filter(
|
|
661
|
+
(pl.col("process") == "reasoner") |
|
|
662
|
+
(pl.col("process") == "inference_engine") |
|
|
663
|
+
(pl.col("source") == "reasoner")
|
|
664
|
+
)
|
|
665
|
+
elif "source" in df.columns:
|
|
666
|
+
df = df.filter(pl.col("source") == "reasoner")
|
|
667
|
+
else:
|
|
668
|
+
return {"count": 0, "inferences": [], "message": "No inference markers found"}
|
|
669
|
+
|
|
670
|
+
df = df.head(limit)
|
|
671
|
+
|
|
672
|
+
facts = dataframe_to_grounded_facts(df, retrieval_time)
|
|
673
|
+
|
|
674
|
+
return {
|
|
675
|
+
"count": len(facts),
|
|
676
|
+
"inferences": [f.model_dump() for f in facts],
|
|
677
|
+
"retrieval_timestamp": retrieval_time.isoformat(),
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
# =========================================================================
|
|
681
|
+
# GET /ai/health - AI API Health Check
|
|
682
|
+
# =========================================================================
|
|
683
|
+
|
|
684
|
+
@router.get(
|
|
685
|
+
"/health",
|
|
686
|
+
summary="AI API health check",
|
|
687
|
+
description="Check if the AI Grounding API is operational.",
|
|
688
|
+
)
|
|
689
|
+
async def ai_health():
|
|
690
|
+
"""Health check for AI Grounding API."""
|
|
691
|
+
try:
|
|
692
|
+
# Quick store check
|
|
693
|
+
stats = store.stats()
|
|
694
|
+
return {
|
|
695
|
+
"status": "healthy",
|
|
696
|
+
"api": "ai_grounding",
|
|
697
|
+
"version": "1.0.0",
|
|
698
|
+
"store_stats": {
|
|
699
|
+
"total_triples": stats.get("total_triples", 0),
|
|
700
|
+
"unique_subjects": stats.get("unique_subjects", 0),
|
|
701
|
+
},
|
|
702
|
+
"capabilities": [
|
|
703
|
+
"query",
|
|
704
|
+
"verify",
|
|
705
|
+
"context",
|
|
706
|
+
"materialize",
|
|
707
|
+
"inferences",
|
|
708
|
+
],
|
|
709
|
+
}
|
|
710
|
+
except Exception as e:
|
|
711
|
+
return {
|
|
712
|
+
"status": "degraded",
|
|
713
|
+
"error": str(e),
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
return router
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
def integrate_ai_router(app, store: TripleStore):
|
|
720
|
+
"""
|
|
721
|
+
Integrate the AI Grounding router into an existing FastAPI app.
|
|
722
|
+
|
|
723
|
+
Args:
|
|
724
|
+
app: FastAPI application
|
|
725
|
+
store: TripleStore instance
|
|
726
|
+
"""
|
|
727
|
+
router = create_ai_router(store)
|
|
728
|
+
app.include_router(router)
|