mcp-agentic-pipelines 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +93 -0
- package/README.md +258 -0
- package/package.json +70 -0
- package/packages/clinical/package.json +22 -0
- package/packages/clinical/src/index.ts +262 -0
- package/packages/clinical/tsconfig.json +13 -0
- package/packages/core/package.json +21 -0
- package/packages/core/src/config.ts +138 -0
- package/packages/core/src/errors.ts +100 -0
- package/packages/core/src/index.ts +104 -0
- package/packages/core/src/llm-config.ts +213 -0
- package/packages/core/src/logging.ts +66 -0
- package/packages/core/src/python-bridge.ts +384 -0
- package/packages/core/src/rate-limiter.ts +136 -0
- package/packages/core/src/types.ts +203 -0
- package/packages/core/src/validation.ts +101 -0
- package/packages/core/tsconfig.json +10 -0
- package/packages/deeppipe/package.json +21 -0
- package/packages/deeppipe/src/index.ts +424 -0
- package/packages/deeppipe/tsconfig.json +13 -0
- package/packages/piste/package.json +20 -0
- package/packages/piste/src/index.ts +48 -0
- package/packages/piste/tsconfig.json +13 -0
- package/packages/precis/package.json +20 -0
- package/packages/precis/src/index.ts +67 -0
- package/packages/precis/tsconfig.json +13 -0
- package/packages/server/package.json +31 -0
- package/packages/server/src/index.ts +427 -0
- package/packages/server/tsconfig.json +17 -0
- package/setup.mjs +141 -0
- package/test.mjs +337 -0
- package/vendors/clinical-intake/pipeline.mjs +349 -0
- package/vendors/clinical-intake/questions/en.txt +9 -0
- package/vendors/clinical-intake/questions/fr.txt +9 -0
- package/vendors/piste/.env.example +73 -0
- package/vendors/piste/app/core/__init__.py +4 -0
- package/vendors/piste/app/core/config.py +83 -0
- package/vendors/piste/app/core/debuglog.py +16 -0
- package/vendors/piste/app/core/middleware.py +40 -0
- package/vendors/piste/bridge_piste.py +301 -0
- package/vendors/piste/pipeline/__init__.py +4 -0
- package/vendors/piste/pipeline/compiler.py +68 -0
- package/vendors/piste/pipeline/offline/__init__.py +28 -0
- package/vendors/piste/pipeline/offline/verifaid_pipeline.py +247 -0
- package/vendors/piste/pipeline/replay.py +15 -0
- package/vendors/piste/pipeline/replay_engine.py +249 -0
- package/vendors/piste/pipeline/signatures/__init__.py +4 -0
- package/vendors/piste/pipeline/signatures/signatures.py +136 -0
- package/vendors/piste/pipeline/stage1/__init__.py +21 -0
- package/vendors/piste/pipeline/stage1/atomic_decomposer.py +61 -0
- package/vendors/piste/pipeline/stage1/check_worthiness.py +100 -0
- package/vendors/piste/pipeline/stage1/orchestrator.py +175 -0
- package/vendors/piste/pipeline/stage1/test_stage1.py +162 -0
- package/vendors/piste/pipeline/stage2/__init__.py +34 -0
- package/vendors/piste/pipeline/stage2/blind_retriever.py +303 -0
- package/vendors/piste/pipeline/stage2/canonical_mapper.py +124 -0
- package/vendors/piste/pipeline/stage2/credibility_scorer.py +85 -0
- package/vendors/piste/pipeline/stage2/orchestrator.py +311 -0
- package/vendors/piste/pipeline/stage2/query_refiner.py +88 -0
- package/vendors/piste/pipeline/stage2/search_decision.py +69 -0
- package/vendors/piste/pipeline/stage2/test_stage2.py +265 -0
- package/vendors/piste/pipeline/stage3/__init__.py +20 -0
- package/vendors/piste/pipeline/stage3/classifier.py +79 -0
- package/vendors/piste/pipeline/stage3/orchestrator.py +225 -0
- package/vendors/piste/pipeline/stage3/test_stage3.py +101 -0
- package/vendors/piste/pipeline/stage4/__init__.py +33 -0
- package/vendors/piste/pipeline/stage4/criticality_gate.py +177 -0
- package/vendors/piste/pipeline/stage4/orchestrator.py +269 -0
- package/vendors/piste/pipeline/stage4/test_stage4.py +192 -0
- package/vendors/piste/pipeline/stage4/verdict_aggregator.py +157 -0
- package/vendors/piste/requirements.txt +53 -0
- package/vendors/precis/backend/__init__.py +6 -0
- package/vendors/precis/backend/agents/__init__.py +3 -0
- package/vendors/precis/backend/agents/data_synthesis.py +105 -0
- package/vendors/precis/backend/agents/dist_free_synth.py +97 -0
- package/vendors/precis/backend/agents/exact_hash_retriever.py +327 -0
- package/vendors/precis/backend/agents/fusion_ranker.py +64 -0
- package/vendors/precis/backend/agents/guardrail.py +175 -0
- package/vendors/precis/backend/agents/query_expander.py +89 -0
- package/vendors/precis/backend/agents/radial_interpol.py +99 -0
- package/vendors/precis/backend/agents/report_generator.py +92 -0
- package/vendors/precis/backend/agents/semantic_reranker.py +135 -0
- package/vendors/precis/backend/agents/stat_anomaly.py +93 -0
- package/vendors/precis/backend/agents/vector_index.py +123 -0
- package/vendors/precis/backend/agents/veri_score.py +341 -0
- package/vendors/precis/backend/agents/work_order_extractor.py +205 -0
- package/vendors/precis/backend/api/__init__.py +3 -0
- package/vendors/precis/backend/api/routes/__init__.py +3 -0
- package/vendors/precis/backend/config.py +88 -0
- package/vendors/precis/backend/core/__init__.py +13 -0
- package/vendors/precis/backend/core/hashing.py +22 -0
- package/vendors/precis/backend/core/metrics.py +77 -0
- package/vendors/precis/backend/core/multitoken.py +166 -0
- package/vendors/precis/backend/core/pmi.py +54 -0
- package/vendors/precis/backend/core/stemming.py +74 -0
- package/vendors/precis/backend/core/tracing.py +150 -0
- package/vendors/precis/backend/data/__init__.py +3 -0
- package/vendors/precis/backend/data/chunker.py +57 -0
- package/vendors/precis/backend/data/pdf_parser.py +42 -0
- package/vendors/precis/backend/db/__init__.py +3 -0
- package/vendors/precis/backend/db/models.py +173 -0
- package/vendors/precis/backend/db/repository.py +269 -0
- package/vendors/precis/backend/llm/__init__.py +3 -0
- package/vendors/precis/backend/llm/anthropic_provider.py +39 -0
- package/vendors/precis/backend/llm/base.py +147 -0
- package/vendors/precis/backend/llm/deepseek_provider.py +43 -0
- package/vendors/precis/backend/llm/factory.py +60 -0
- package/vendors/precis/backend/llm/google_provider.py +39 -0
- package/vendors/precis/backend/llm/ollama_provider.py +54 -0
- package/vendors/precis/backend/llm/openai_provider.py +50 -0
- package/vendors/precis/backend/main.py +677 -0
- package/vendors/precis/backend/orchestrator/__init__.py +3 -0
- package/vendors/precis/backend/orchestrator/planner.py +81 -0
- package/vendors/precis/backend/orchestrator/router.py +319 -0
- package/vendors/precis/backend/orchestrator/types.py +58 -0
- package/vendors/precis/bridge_precis.py +185 -0
- package/vendors/precis/data/sample_reports/README.md +8 -0
- package/vendors/precis/data/seed_data.py +115 -0
- package/vendors/precis/requirements.txt +19 -0
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
"""© JINAN KORDAB — 2026 AI HYBRID AGENTIC RETRIEVAL-AUGMENTED GENERATION RAG PIPELINE - PERSONAL PROJECT"""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from typing import Any, Dict, List, Optional, Set
|
|
8
|
+
|
|
9
|
+
from backend.core.stemming import PrecisStemmer
|
|
10
|
+
from backend.core.tracing import TraceEventType
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# ── Known stopwords (NLTK minus content-bearing words + Precis custom) ─
|
|
14
|
+
_CONTENT_WORDS_TO_KEEP: Set[str] = {
|
|
15
|
+
"other", "more", "most", "some", "such", "only", "own", "same",
|
|
16
|
+
"very", "just", "both", "few", "each", "every", "any", "all",
|
|
17
|
+
"no", "not", "nor",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
def _load_veri_stopwords() -> Set[str]:
|
|
21
|
+
"""NLTK English stopwords minus content-bearing words."""
|
|
22
|
+
try:
|
|
23
|
+
from nltk.corpus import stopwords
|
|
24
|
+
return set(stopwords.words("english")) - _CONTENT_WORDS_TO_KEEP
|
|
25
|
+
except (ImportError, LookupError, OSError):
|
|
26
|
+
pass
|
|
27
|
+
return {
|
|
28
|
+
"i", "me", "my", "we", "our", "you", "your", "he", "him", "his",
|
|
29
|
+
"she", "her", "it", "its", "they", "them", "their", "this", "that",
|
|
30
|
+
"these", "those", "am", "is", "are", "was", "were", "be", "been",
|
|
31
|
+
"being", "have", "has", "had", "do", "does", "did", "a", "an", "the",
|
|
32
|
+
"and", "but", "if", "or", "because", "as", "of", "at", "by", "for",
|
|
33
|
+
"with", "about", "between", "into", "through", "during", "before",
|
|
34
|
+
"after", "to", "from", "in", "on", "off", "over", "under",
|
|
35
|
+
"can", "will", "should", "now",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
_STOPWORDS: Set[str] = _load_veri_stopwords() | {
|
|
39
|
+
# Precis-specific query-structure words
|
|
40
|
+
"summarize", "summary", "summarise", "explain", "describe",
|
|
41
|
+
"list", "identify", "compare", "contrast", "discuss", "analyze",
|
|
42
|
+
"key", "finding", "findings", "detail", "details", "overview",
|
|
43
|
+
"section", "chapter", "paragraph", "figure", "table", "page",
|
|
44
|
+
"get", "make", "made", "see", "show", "shown", "find", "found",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class VeriScoreReport:
|
|
50
|
+
relevancy_score: float = 0.0
|
|
51
|
+
trustworthiness_score: float = 0.0
|
|
52
|
+
exhaustivity_score: float = 0.0
|
|
53
|
+
hallucination_rate: float = 0.0
|
|
54
|
+
citation_coverage: float = 0.0
|
|
55
|
+
per_chunk_scores: List[Dict[str, Any]] = field(default_factory=list)
|
|
56
|
+
flagged_issues: List[str] = field(default_factory=list)
|
|
57
|
+
evaluation_timestamp: str = ""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class VeriScoreEvaluator:
|
|
61
|
+
"""Self-evaluation engine. Scores every Precis output on 5 quality dimensions.
|
|
62
|
+
|
|
63
|
+
All scoring methods use Porter-stemmed tokens for consistency with the
|
|
64
|
+
retrieval layer (NestedHashIndex). Stopwords are filtered before scoring.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(self) -> None:
|
|
68
|
+
self.min_relevancy = 0.6
|
|
69
|
+
self.min_trustworthiness = 0.5
|
|
70
|
+
self._stemmer = PrecisStemmer()
|
|
71
|
+
|
|
72
|
+
# ── Public API ───────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
async def evaluate(self, query: str, retrieved_chunks: List[Dict[str, Any]],
|
|
75
|
+
generated_response: str, citations: List[Dict[str, Any]],
|
|
76
|
+
trace=None) -> VeriScoreReport:
|
|
77
|
+
"""Run all five quality checks **in parallel** and return a VeriScoreReport.
|
|
78
|
+
|
|
79
|
+
The five dimensions are fully independent — same inputs, no shared mutable
|
|
80
|
+
state — so we compute them concurrently via asyncio.gather.
|
|
81
|
+
"""
|
|
82
|
+
# Launch all five dimension checks + per-chunk scoring concurrently.
|
|
83
|
+
# Each is a sync CPU method offloaded to a thread so the event loop
|
|
84
|
+
# stays free for other work (LLM calls, WebSocket streaming, etc.).
|
|
85
|
+
(rel, trust, exh, hall,
|
|
86
|
+
sentence_count, per_chunk) = await asyncio.gather(
|
|
87
|
+
asyncio.to_thread(self._compute_relevancy, query, retrieved_chunks),
|
|
88
|
+
asyncio.to_thread(self._compute_trustworthiness, retrieved_chunks),
|
|
89
|
+
asyncio.to_thread(self._compute_exhaustivity, query, retrieved_chunks),
|
|
90
|
+
asyncio.to_thread(self._compute_hallucination_rate, generated_response, retrieved_chunks),
|
|
91
|
+
asyncio.to_thread(self._count_sentences, generated_response),
|
|
92
|
+
asyncio.to_thread(self._score_per_chunk, query, retrieved_chunks),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
cit_cov = min(len(citations) / max(1, sentence_count), 1.0)
|
|
96
|
+
|
|
97
|
+
# Flagged issues depend on the computed scores — run after gather
|
|
98
|
+
flagged = self._collect_flagged_issues(rel, trust, exh, hall, cit_cov, per_chunk)
|
|
99
|
+
|
|
100
|
+
report = VeriScoreReport(
|
|
101
|
+
relevancy_score=rel,
|
|
102
|
+
trustworthiness_score=trust,
|
|
103
|
+
exhaustivity_score=exh,
|
|
104
|
+
hallucination_rate=hall,
|
|
105
|
+
citation_coverage=cit_cov,
|
|
106
|
+
per_chunk_scores=per_chunk,
|
|
107
|
+
flagged_issues=flagged,
|
|
108
|
+
evaluation_timestamp=datetime.now(timezone.utc).isoformat(),
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
if trace:
|
|
112
|
+
trace.event(
|
|
113
|
+
TraceEventType.EVALUATION_COMPLETED,
|
|
114
|
+
agent_name="VeriScore",
|
|
115
|
+
message=f"R:{rel:.2f} T:{trust:.2f} H:{hall:.2f}",
|
|
116
|
+
data={
|
|
117
|
+
"relevancy": rel,
|
|
118
|
+
"trust": trust,
|
|
119
|
+
"hallucination": hall,
|
|
120
|
+
"exhaustivity": exh,
|
|
121
|
+
"citation_coverage": cit_cov,
|
|
122
|
+
},
|
|
123
|
+
)
|
|
124
|
+
return report
|
|
125
|
+
|
|
126
|
+
# ── Dimension 1: Relevancy ───────────────────────────────────────
|
|
127
|
+
|
|
128
|
+
def _compute_relevancy(self, query: str, chunks: List[Dict]) -> float:
|
|
129
|
+
"""Average Jaccard similarity between stemmed query tokens and each chunk.
|
|
130
|
+
|
|
131
|
+
Uses the same PrecisStemmer as the retrieval layer so that "running"
|
|
132
|
+
and "runs" are recognised as the same concept.
|
|
133
|
+
"""
|
|
134
|
+
if not chunks:
|
|
135
|
+
return 0.0
|
|
136
|
+
query_stems = self._stem_set(query)
|
|
137
|
+
if not query_stems:
|
|
138
|
+
return 0.0
|
|
139
|
+
scores: List[float] = []
|
|
140
|
+
for c in chunks:
|
|
141
|
+
chunk_stems = self._stem_set(c.get("text", ""))
|
|
142
|
+
if not chunk_stems:
|
|
143
|
+
scores.append(0.0)
|
|
144
|
+
continue
|
|
145
|
+
inter = len(query_stems & chunk_stems)
|
|
146
|
+
union = len(query_stems | chunk_stems)
|
|
147
|
+
scores.append(inter / union if union else 0.0)
|
|
148
|
+
return sum(scores) / len(scores)
|
|
149
|
+
|
|
150
|
+
# ── Dimension 2: Trustworthiness ─────────────────────────────────
|
|
151
|
+
|
|
152
|
+
def _compute_trustworthiness(self, chunks: List[Dict]) -> float:
|
|
153
|
+
"""Score source reliability from chunk metadata.
|
|
154
|
+
|
|
155
|
+
Returns 0.0 when there are no chunks (no evidence = no trust),
|
|
156
|
+
rather than a misleading default of 0.5.
|
|
157
|
+
"""
|
|
158
|
+
if not chunks:
|
|
159
|
+
return 0.0 # ← was 0.5 — no evidence means no trust
|
|
160
|
+
scores: List[float] = []
|
|
161
|
+
for c in chunks:
|
|
162
|
+
score = 0.5 # Neutral base
|
|
163
|
+
text = c.get("text", "")
|
|
164
|
+
if len(text) > 100:
|
|
165
|
+
score += 0.2
|
|
166
|
+
elif len(text) > 30:
|
|
167
|
+
score += 0.1
|
|
168
|
+
# Has a source document = verifiable
|
|
169
|
+
if c.get("source"):
|
|
170
|
+
score += 0.15
|
|
171
|
+
# Match quality
|
|
172
|
+
mt = c.get("match_type", "")
|
|
173
|
+
if mt == "exact":
|
|
174
|
+
score += 0.15
|
|
175
|
+
elif mt == "subset":
|
|
176
|
+
score += 0.10
|
|
177
|
+
elif mt == "semantic":
|
|
178
|
+
score += 0.05 # semantic matches are fuzzier → lower bonus
|
|
179
|
+
# Structural signals (preserved from MultiToken)
|
|
180
|
+
if c.get("is_title") or c.get("is_header"):
|
|
181
|
+
score += 0.10
|
|
182
|
+
if c.get("token_type") == "contextual":
|
|
183
|
+
score += 0.05
|
|
184
|
+
scores.append(min(score, 1.0))
|
|
185
|
+
return sum(scores) / len(scores)
|
|
186
|
+
|
|
187
|
+
# ── Dimension 3: Exhaustivity ────────────────────────────────────
|
|
188
|
+
|
|
189
|
+
def _compute_exhaustivity(self, query: str, chunks: List[Dict]) -> float:
|
|
190
|
+
"""Fraction of stemmed query tokens that appear in at least one chunk.
|
|
191
|
+
|
|
192
|
+
Uses set intersection (word-boundary) rather than naive substring
|
|
193
|
+
matching to avoid false positives (e.g. "in" matching "interesting").
|
|
194
|
+
"""
|
|
195
|
+
if not chunks:
|
|
196
|
+
return 0.0
|
|
197
|
+
query_stems = self._stem_set(query)
|
|
198
|
+
if not query_stems:
|
|
199
|
+
return 1.0 # Query had only stopwords → fully covered
|
|
200
|
+
# Build the union of all stemmed tokens across all chunks
|
|
201
|
+
all_stems: Set[str] = set()
|
|
202
|
+
for c in chunks:
|
|
203
|
+
all_stems |= self._stem_set(c.get("text", ""))
|
|
204
|
+
covered = len(query_stems & all_stems)
|
|
205
|
+
return covered / len(query_stems)
|
|
206
|
+
|
|
207
|
+
# ── Dimension 4: Hallucination Rate ──────────────────────────────
|
|
208
|
+
|
|
209
|
+
def _compute_hallucination_rate(self, response: str, chunks: List[Dict]) -> float:
|
|
210
|
+
"""Proportion of response sentences whose *content words* are not
|
|
211
|
+
attested in any retrieved source chunk.
|
|
212
|
+
|
|
213
|
+
Uses nltk-style sentence splitting (robust against abbreviations,
|
|
214
|
+
decimal numbers, and bullet lists) and Porter-stemmed token matching
|
|
215
|
+
for consistency with the retrieval layer.
|
|
216
|
+
|
|
217
|
+
Returns 0.0 when:
|
|
218
|
+
- Response is empty
|
|
219
|
+
- No source evidence is available (can't assess)
|
|
220
|
+
- Response has no substantive sentences (< 20 chars)
|
|
221
|
+
"""
|
|
222
|
+
if not response.strip():
|
|
223
|
+
return 0.0
|
|
224
|
+
|
|
225
|
+
# Build evidence: union of all stemmed tokens from source chunks ONLY.
|
|
226
|
+
# IMPORTANT: do NOT include the synthesis/generated text here —
|
|
227
|
+
# otherwise you're comparing the response against itself.
|
|
228
|
+
evidence_stems: Set[str] = set()
|
|
229
|
+
for c in chunks:
|
|
230
|
+
evidence_stems |= self._stem_set(c.get("text", ""))
|
|
231
|
+
|
|
232
|
+
if not evidence_stems:
|
|
233
|
+
return 0.0 # No evidence → can't assess
|
|
234
|
+
|
|
235
|
+
# Split into sentences (handles ., ?, !, abbreviations, decimals)
|
|
236
|
+
sentences = self._split_sentences(response)
|
|
237
|
+
substantive = [s for s in sentences if len(s.strip()) > 20]
|
|
238
|
+
if not substantive:
|
|
239
|
+
return 0.0
|
|
240
|
+
|
|
241
|
+
unsupported = 0
|
|
242
|
+
for sent in substantive:
|
|
243
|
+
sent_stems = self._stem_set(sent)
|
|
244
|
+
# Filter to content words only (len > 4 to avoid noise)
|
|
245
|
+
content_words = {s for s in sent_stems if len(s) > 4}
|
|
246
|
+
if not content_words:
|
|
247
|
+
continue # Skip sentences with only short/stop words
|
|
248
|
+
# A sentence is "supported" if at least ONE content word
|
|
249
|
+
# appears in the evidence
|
|
250
|
+
if not (content_words & evidence_stems):
|
|
251
|
+
unsupported += 1
|
|
252
|
+
|
|
253
|
+
return unsupported / len(substantive) if substantive else 0.0
|
|
254
|
+
|
|
255
|
+
# ── Helpers ──────────────────────────────────────────────────────
|
|
256
|
+
|
|
257
|
+
def _stem_set(self, text: str) -> Set[str]:
|
|
258
|
+
"""Stem every word in *text*, filtering stopwords and empty results.
|
|
259
|
+
|
|
260
|
+
Returns a set of stemmed tokens for fast intersection / union ops.
|
|
261
|
+
"""
|
|
262
|
+
if not text or not text.strip():
|
|
263
|
+
return set()
|
|
264
|
+
words = text.lower().split()
|
|
265
|
+
stems = self._stemmer.stem_tokens(words)
|
|
266
|
+
return {s for s in stems if s and s not in _STOPWORDS}
|
|
267
|
+
|
|
268
|
+
@staticmethod
|
|
269
|
+
def _split_sentences(text: str) -> List[str]:
|
|
270
|
+
"""Split *text* into sentences, robust against abbreviations and decimals.
|
|
271
|
+
|
|
272
|
+
Falls back to simple split if nltk is unavailable.
|
|
273
|
+
"""
|
|
274
|
+
try:
|
|
275
|
+
from nltk.tokenize import sent_tokenize
|
|
276
|
+
return sent_tokenize(text)
|
|
277
|
+
except (ImportError, LookupError):
|
|
278
|
+
pass
|
|
279
|
+
# Fallback: split on sentence-ending punctuation followed by space + capital
|
|
280
|
+
return [s.strip() for s in re.split(r'(?<=[.!?])\s+(?=[A-Z])', text) if s.strip()]
|
|
281
|
+
|
|
282
|
+
@staticmethod
|
|
283
|
+
def _count_sentences(text: str) -> int:
|
|
284
|
+
"""Count sentences in *text*. Used for citation-coverage denominator."""
|
|
285
|
+
try:
|
|
286
|
+
from nltk.tokenize import sent_tokenize
|
|
287
|
+
return len(sent_tokenize(text))
|
|
288
|
+
except (ImportError, LookupError):
|
|
289
|
+
pass
|
|
290
|
+
return max(1, len(re.findall(r'[.!?]\s', text)) + 1)
|
|
291
|
+
|
|
292
|
+
def _score_per_chunk(self, query: str, chunks: List[Dict]) -> List[Dict[str, Any]]:
|
|
293
|
+
"""Score each chunk individually and return a list of per-chunk reports."""
|
|
294
|
+
query_stems = self._stem_set(query)
|
|
295
|
+
per_chunk: List[Dict[str, Any]] = []
|
|
296
|
+
for c in chunks:
|
|
297
|
+
text = c.get("text", "")
|
|
298
|
+
chunk_stems = self._stem_set(text)
|
|
299
|
+
inter = len(query_stems & chunk_stems)
|
|
300
|
+
union = len(query_stems | chunk_stems)
|
|
301
|
+
jaccard = inter / union if union else 0.0
|
|
302
|
+
per_chunk.append({
|
|
303
|
+
"text_preview": text[:120],
|
|
304
|
+
"source": c.get("source", ""),
|
|
305
|
+
"page": c.get("page", 1),
|
|
306
|
+
"match_type": c.get("match_type", ""),
|
|
307
|
+
"jaccard": round(jaccard, 3),
|
|
308
|
+
"char_length": len(text),
|
|
309
|
+
})
|
|
310
|
+
return per_chunk
|
|
311
|
+
|
|
312
|
+
def _collect_flagged_issues(
|
|
313
|
+
self,
|
|
314
|
+
rel: float, trust: float, exh: float, hall: float,
|
|
315
|
+
cit_cov: float, per_chunk: List[Dict],
|
|
316
|
+
) -> List[str]:
|
|
317
|
+
"""Aggregate issues across all quality dimensions into a human-readable list."""
|
|
318
|
+
issues: List[str] = []
|
|
319
|
+
if rel < self.min_relevancy:
|
|
320
|
+
issues.append(f"Low relevancy ({rel:.2f} < {self.min_relevancy}) — "
|
|
321
|
+
"query terms not well covered by retrieved chunks")
|
|
322
|
+
if trust < self.min_trustworthiness:
|
|
323
|
+
issues.append(f"Low trustworthiness ({trust:.2f} < {self.min_trustworthiness}) — "
|
|
324
|
+
"sources may be unreliable or too short")
|
|
325
|
+
if exh < 0.5:
|
|
326
|
+
issues.append(f"Low exhaustivity ({exh:.2f}) — "
|
|
327
|
+
"many query terms not found in any chunk")
|
|
328
|
+
if hall > 0.3:
|
|
329
|
+
issues.append(f"High hallucination rate ({hall:.0%} > 30%) — "
|
|
330
|
+
f"many response claims are not supported by source evidence")
|
|
331
|
+
if hall > 0.1:
|
|
332
|
+
issues.append(f"Elevated hallucination rate ({hall:.0%}) — review recommended")
|
|
333
|
+
if cit_cov < 0.2:
|
|
334
|
+
issues.append(f"Low citation coverage ({cit_cov:.0%}) — "
|
|
335
|
+
"few sources cited relative to response length")
|
|
336
|
+
# Per-chunk issues
|
|
337
|
+
zero_jaccard = [c for c in per_chunk if c.get("jaccard", 0) == 0.0]
|
|
338
|
+
if zero_jaccard and len(zero_jaccard) == len(per_chunk):
|
|
339
|
+
issues.append("All chunks have zero Jaccard overlap with query — "
|
|
340
|
+
"retrieval may have returned irrelevant content")
|
|
341
|
+
return issues
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""© JINAN KORDAB — 2026 AI HYBRID AGENTIC RETRIEVAL-AUGMENTED GENERATION RAG PIPELINE - PERSONAL PROJECT
|
|
2
|
+
|
|
3
|
+
Extracts structured fields from semi-structured work orders without OCR training,
|
|
4
|
+
without ML models, without vector databases. Uses regex patterns + stemming for
|
|
5
|
+
field label detection and value extraction.
|
|
6
|
+
|
|
7
|
+
Common aviation work order fields:
|
|
8
|
+
Tail Number, Work Order #, Date, Aircraft Model, Part Number, Serial Number,
|
|
9
|
+
Mechanic ID, Station, Work Performed, Hours, AD/SB Compliance, Inspector Stamp
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ExtractedField:
|
|
20
|
+
field_name: str # e.g., "tail_number"
|
|
21
|
+
raw_label: str # e.g., "Tail #:", "REG:", "A/C REG NO"
|
|
22
|
+
value: str # e.g., "N737AG"
|
|
23
|
+
confidence: float # 0.0 - 1.0
|
|
24
|
+
page: int
|
|
25
|
+
line_number: int
|
|
26
|
+
context: str = "" # surrounding text for verification
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class WorkOrder:
|
|
31
|
+
"""Structured work order record extracted from a document."""
|
|
32
|
+
source_file: str
|
|
33
|
+
tail_number: Optional[str] = None
|
|
34
|
+
work_order_number: Optional[str] = None
|
|
35
|
+
date: Optional[str] = None
|
|
36
|
+
aircraft_model: Optional[str] = None
|
|
37
|
+
part_numbers: List[str] = field(default_factory=list)
|
|
38
|
+
part_descriptions: List[str] = field(default_factory=list)
|
|
39
|
+
serial_numbers: List[str] = field(default_factory=list)
|
|
40
|
+
mechanic_id: Optional[str] = None
|
|
41
|
+
station: Optional[str] = None
|
|
42
|
+
work_performed: Optional[str] = None
|
|
43
|
+
hours_worked: Optional[str] = None
|
|
44
|
+
ad_sb_references: List[str] = field(default_factory=list)
|
|
45
|
+
inspector_stamp: Optional[str] = None
|
|
46
|
+
extracted_fields: List[ExtractedField] = field(default_factory=list)
|
|
47
|
+
raw_text_snippet: str = ""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class WorkOrderExtractor:
|
|
51
|
+
"""Extracts structured aviation work order fields using token-pattern matching."""
|
|
52
|
+
|
|
53
|
+
# Field label patterns — key is the canonical field name, value is list of regex patterns
|
|
54
|
+
FIELD_PATTERNS: Dict[str, List[str]] = {
|
|
55
|
+
"tail_number": [
|
|
56
|
+
r"(?:tail|a/?c|aircraft|registration|reg|n-?number)[\s#:.-]*([\s]*[nN]\d{1,6}[a-zA-Z]{0,2})",
|
|
57
|
+
r"(?:tail|a/?c|aircraft|registration|reg)[\s#:.-]*([\s]*\d{1,6}[a-zA-Z]{0,2})",
|
|
58
|
+
r"\b([nN]\d{1,6}[a-zA-Z]{0,2})\b",
|
|
59
|
+
],
|
|
60
|
+
"work_order_number": [
|
|
61
|
+
r"(?:wo|w/?o|work\s*order|job\s*card|task\s*order)[\s#:.-]*([\s]*[a-zA-Z0-9]{4,20})",
|
|
62
|
+
r"\b(?:WO|W/O)\s*[:#.-]*\s*([a-zA-Z0-9]{4,20})",
|
|
63
|
+
],
|
|
64
|
+
"date": [
|
|
65
|
+
r"(?:date|dated|performed|completed)[\s:.-]*([\s]*\d{1,2}[/-]\d{1,2}[/-]\d{2,4})",
|
|
66
|
+
r"(?:date|dated)[\s:.-]*([\s]*\d{4}-\d{2}-\d{2})",
|
|
67
|
+
r"(?:date|dated)[\s:.-]*([\s]*(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2},?\s*\d{4})",
|
|
68
|
+
],
|
|
69
|
+
"aircraft_model": [
|
|
70
|
+
r"(?:model|a/?c\s*type|aircraft\s*type)[\s:.-]*([\s]*(?:B|b)(?:7[3-9]\d|7[3-9]\d[-\s]*[a-zA-Z0-9]{0,6}))",
|
|
71
|
+
r"(?:model|a/?c\s*type|aircraft\s*type)[\s:.-]*([\s]*(?:A|a)(?:3[12]\d|3[12]\d[-\s]*[a-zA-Z0-9]{0,4}))",
|
|
72
|
+
r"(?:model|a/?c\s*type|aircraft\s*type)[\s:.-]*([\s]*[a-zA-Z]{1,4}[-\s]*\d{2,4}[a-zA-Z0-9]{0,4})",
|
|
73
|
+
],
|
|
74
|
+
"part_number": [
|
|
75
|
+
r"(?:p/?n|part\s*(?:no|number|#)|p/n)[\s:.-]*([\s]*[a-zA-Z0-9]{3,30})",
|
|
76
|
+
r"\b([a-zA-Z]{2,6}\d{3,10}[a-zA-Z]{0,4})\b",
|
|
77
|
+
r"\b([a-zA-Z]{1,3}\d{5,12})\b",
|
|
78
|
+
],
|
|
79
|
+
"serial_number": [
|
|
80
|
+
r"(?:s/?n|s/n|serial\s*(?:no|number|#))[\s:.-]*([\s]*[a-zA-Z0-9]{3,30})",
|
|
81
|
+
r"\b(?:SN|S/N)\s*[:#.-]*\s*([a-zA-Z0-9]{3,30})",
|
|
82
|
+
],
|
|
83
|
+
"mechanic_id": [
|
|
84
|
+
r"(?:mechanic|tech|technician|a&p|a/?p|inspector|performed\s*by)[\s:.-]*([\s]*[a-zA-Z0-9]{2,20})",
|
|
85
|
+
r"(?:mechanic|tech)[\s:.-]*\#?\s*([a-zA-Z]{1,3}\d{2,8})",
|
|
86
|
+
],
|
|
87
|
+
"station": [
|
|
88
|
+
r"(?:station|location|facility|base|gate)[\s:.-]*([\s]*[a-zA-Z]{3,6})",
|
|
89
|
+
r"\b(?:ATL|DFW|ORD|LAX|JFK|MIA|SEA|SFO|DEN|IAH|MCO|BOS|EWR|PHX|MSP|DTW|CLT|LAS|HNL)\b",
|
|
90
|
+
],
|
|
91
|
+
"hours_worked": [
|
|
92
|
+
r"(?:hours|man[-\s]*hours|labor\s*hrs|labor\s*hours)[\s:.-]*([\s]*\d+\.?\d*)",
|
|
93
|
+
r"(?:total\s*hrs|total\s*hours)[\s:.-]*([\s]*\d+\.?\d*)",
|
|
94
|
+
],
|
|
95
|
+
"ad_sb_reference": [
|
|
96
|
+
r"\b(AD\s*\d{4}[-\s]*\d{2}[-\s]*\d{2,4})\b",
|
|
97
|
+
r"\b(SB\s*[a-zA-Z0-9]{2,6}[-\s]*\d{2,5})\b",
|
|
98
|
+
r"(?:airworthiness\s*directive|a\.?d\.?|service\s*bulletin|s\.?b\.?)[\s:.-]*([a-zA-Z0-9]{4,20})",
|
|
99
|
+
],
|
|
100
|
+
"inspector_stamp": [
|
|
101
|
+
r"(?:inspector|inspected\s*by|stamp|signed\s*by|approved\s*by)[\s:.-]*([\s]*[a-zA-Z\s]{3,30})",
|
|
102
|
+
r"(?:RII|R\.?I\.?I\.?|buy\s*back)[\s:.-]*([\s]*[a-zA-Z]{2,20})",
|
|
103
|
+
],
|
|
104
|
+
"work_performed": [
|
|
105
|
+
r"(?:work\s*performed|description|task|action\s*taken|corrective\s*action)[\s:.-]*([\s]*[a-zA-Z0-9\s,;()]{20,500})",
|
|
106
|
+
],
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
def __init__(self) -> None:
|
|
110
|
+
self._compiled_patterns: Dict[str, List[re.Pattern]] = {}
|
|
111
|
+
for field_name, patterns in self.FIELD_PATTERNS.items():
|
|
112
|
+
self._compiled_patterns[field_name] = [re.compile(p, re.IGNORECASE) for p in patterns]
|
|
113
|
+
|
|
114
|
+
def extract(self, text: str, source_file: str = "unknown") -> WorkOrder:
|
|
115
|
+
"""Extract all known fields from work order text. Returns structured WorkOrder."""
|
|
116
|
+
wo = WorkOrder(source_file=source_file)
|
|
117
|
+
lines = text.split("\n")
|
|
118
|
+
|
|
119
|
+
for line_num, line in enumerate(lines):
|
|
120
|
+
for field_name, patterns in self._compiled_patterns.items():
|
|
121
|
+
for pattern in patterns:
|
|
122
|
+
match = pattern.search(line)
|
|
123
|
+
if match:
|
|
124
|
+
value = match.group(1).strip() if match.groups() else match.group(0).strip()
|
|
125
|
+
# Skip if value is too short or looks like a false positive
|
|
126
|
+
if len(value) < 2:
|
|
127
|
+
continue
|
|
128
|
+
if field_name == "tail_number" and not self._looks_like_tail(value):
|
|
129
|
+
continue
|
|
130
|
+
if field_name == "part_number" and len(value) < 4:
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
field = ExtractedField(
|
|
134
|
+
field_name=field_name,
|
|
135
|
+
raw_label=match.group(0)[:50],
|
|
136
|
+
value=value,
|
|
137
|
+
confidence=self._confidence(field_name, value),
|
|
138
|
+
page=1,
|
|
139
|
+
line_number=line_num,
|
|
140
|
+
context=self._get_context(lines, line_num),
|
|
141
|
+
)
|
|
142
|
+
wo.extracted_fields.append(field)
|
|
143
|
+
|
|
144
|
+
# Populate the structured fields
|
|
145
|
+
self._assign_field(wo, field_name, value)
|
|
146
|
+
|
|
147
|
+
# Capture raw text for the "work performed" field if not explicitly extracted
|
|
148
|
+
if not wo.work_performed:
|
|
149
|
+
# Take the longest paragraph as potential work description
|
|
150
|
+
paragraphs = [p.strip() for p in text.split("\n\n") if len(p.strip()) > 50]
|
|
151
|
+
if paragraphs:
|
|
152
|
+
wo.work_performed = max(paragraphs, key=len)[:500]
|
|
153
|
+
|
|
154
|
+
return wo
|
|
155
|
+
|
|
156
|
+
def _looks_like_tail(self, value: str) -> bool:
|
|
157
|
+
"""Check if a value looks like an N-number."""
|
|
158
|
+
# N-number: N followed by 1-5 digits, optionally 1-2 letters
|
|
159
|
+
return bool(re.match(r'^[nN]\d{1,5}[a-zA-Z]{0,2}$', value.strip()))
|
|
160
|
+
|
|
161
|
+
def _confidence(self, field_name: str, value: str) -> float:
|
|
162
|
+
"""Heuristic confidence score based on value quality."""
|
|
163
|
+
base = 0.7
|
|
164
|
+
if len(value) > 20:
|
|
165
|
+
base -= 0.2 # Too long, might be grabbing extra text
|
|
166
|
+
if len(value) < 3:
|
|
167
|
+
base -= 0.3
|
|
168
|
+
if field_name == "tail_number" and self._looks_like_tail(value):
|
|
169
|
+
base = 0.95 # High confidence for valid N-numbers
|
|
170
|
+
if field_name == "date" and re.search(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', value):
|
|
171
|
+
base = 0.90
|
|
172
|
+
return min(max(base, 0.3), 1.0)
|
|
173
|
+
|
|
174
|
+
def _get_context(self, lines: List[str], line_num: int, window: int = 1) -> str:
|
|
175
|
+
"""Get surrounding lines for context."""
|
|
176
|
+
start = max(0, line_num - window)
|
|
177
|
+
end = min(len(lines), line_num + window + 1)
|
|
178
|
+
return " | ".join(lines[start:end])
|
|
179
|
+
|
|
180
|
+
def _assign_field(self, wo: WorkOrder, field_name: str, value: str) -> None:
|
|
181
|
+
"""Assign extracted value to the structured WorkOrder."""
|
|
182
|
+
if field_name == "tail_number" and not wo.tail_number:
|
|
183
|
+
wo.tail_number = value
|
|
184
|
+
elif field_name == "work_order_number" and not wo.work_order_number:
|
|
185
|
+
wo.work_order_number = value
|
|
186
|
+
elif field_name == "date" and not wo.date:
|
|
187
|
+
wo.date = value
|
|
188
|
+
elif field_name == "aircraft_model" and not wo.aircraft_model:
|
|
189
|
+
wo.aircraft_model = value
|
|
190
|
+
elif field_name == "part_number" and value not in wo.part_numbers:
|
|
191
|
+
wo.part_numbers.append(value)
|
|
192
|
+
elif field_name == "serial_number" and value not in wo.serial_numbers:
|
|
193
|
+
wo.serial_numbers.append(value)
|
|
194
|
+
elif field_name == "mechanic_id" and not wo.mechanic_id:
|
|
195
|
+
wo.mechanic_id = value
|
|
196
|
+
elif field_name == "station" and not wo.station:
|
|
197
|
+
wo.station = value
|
|
198
|
+
elif field_name == "work_performed" and not wo.work_performed:
|
|
199
|
+
wo.work_performed = value[:500]
|
|
200
|
+
elif field_name == "hours_worked" and not wo.hours_worked:
|
|
201
|
+
wo.hours_worked = value
|
|
202
|
+
elif field_name == "ad_sb_reference" and value not in wo.ad_sb_references:
|
|
203
|
+
wo.ad_sb_references.append(value)
|
|
204
|
+
elif field_name == "inspector_stamp" and not wo.inspector_stamp:
|
|
205
|
+
wo.inspector_stamp = value
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# © JINAN KORDAB — 2026 AI HYBRID AGENTIC RETRIEVAL-AUGMENTED GENERATION RAG PIPELINE - PERSONAL PROJECT
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Centralized configuration loaded from environment variables and .env file.
|
|
5
|
+
# All settings are typed and validated by Pydantic at startup.
|
|
6
|
+
#
|
|
7
|
+
# Usage:
|
|
8
|
+
# from backend.config import settings
|
|
9
|
+
# api_key = settings.OPENAI_API_KEY # Auto-loaded from .env
|
|
10
|
+
#
|
|
11
|
+
# Related:
|
|
12
|
+
# .env.example — Template with all available settings
|
|
13
|
+
# docker-compose.yml — Passes env vars to containers
|
|
14
|
+
# =============================================================================
|
|
15
|
+
|
|
16
|
+
from typing import Optional, List
|
|
17
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Settings(BaseSettings):
|
|
21
|
+
"""
|
|
22
|
+
Precis application settings.
|
|
23
|
+
|
|
24
|
+
All fields are automatically loaded from environment variables
|
|
25
|
+
or the .env file. Pydantic validates types at startup — if a required
|
|
26
|
+
field is missing or has the wrong type, the app fails fast with a clear error.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
model_config = SettingsConfigDict(
|
|
30
|
+
env_file=".env",
|
|
31
|
+
env_file_encoding="utf-8",
|
|
32
|
+
case_sensitive=False,
|
|
33
|
+
extra="ignore", # Ignore unknown env vars (don't crash)
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# --- Application ---
|
|
37
|
+
APP_NAME: str = "Precis"
|
|
38
|
+
APP_VERSION: str = "1.0.0"
|
|
39
|
+
ENVIRONMENT: str = "development" # development | staging | production
|
|
40
|
+
LOG_LEVEL: str = "INFO" # DEBUG | INFO | WARNING | ERROR
|
|
41
|
+
|
|
42
|
+
# --- Database ---
|
|
43
|
+
DATABASE_URL: str = "sqlite:///data/app.db"
|
|
44
|
+
|
|
45
|
+
# --- Redis (Optional) ---
|
|
46
|
+
REDIS_URL: Optional[str] = None
|
|
47
|
+
|
|
48
|
+
# --- External LLM Providers ---
|
|
49
|
+
OPENAI_API_KEY: Optional[str] = None
|
|
50
|
+
ANTHROPIC_API_KEY: Optional[str] = None
|
|
51
|
+
GOOGLE_API_KEY: Optional[str] = None
|
|
52
|
+
DEEPSEEK_API_KEY: Optional[str] = None
|
|
53
|
+
|
|
54
|
+
# --- Local LLM (Ollama) ---
|
|
55
|
+
OLLAMA_BASE_URL: str = "http://localhost:11434"
|
|
56
|
+
OLLAMA_DEFAULT_MODEL: str = "llama3"
|
|
57
|
+
|
|
58
|
+
# --- Default LLM Provider ---
|
|
59
|
+
DEFAULT_LLM_PROVIDER: str = "deepseek" # deepseek | openai | anthropic | google | ollama
|
|
60
|
+
|
|
61
|
+
# --- RBF Predictor Hyperparameters ---
|
|
62
|
+
# See: backend/agents/radial_interpol.py for usage
|
|
63
|
+
# See: Alt_DNN.pdf Theorem 2.1 for mathematical foundation
|
|
64
|
+
RBF_TAU: float = 500.0 # Sharpness — higher τ → more exact interpolation at training points
|
|
65
|
+
RBF_GAMMA: float = 1.0 # Kernel width — controls influence radius of each training node
|
|
66
|
+
|
|
67
|
+
# --- NoGAN Synthesizer Defaults ---
|
|
68
|
+
# See: backend/agents/dist_free_synth.py for usage
|
|
69
|
+
NOGAN_DEFAULT_BINS: int = 50
|
|
70
|
+
NOGAN_MODE: str = "random_counts" # random_counts | fixed_counts
|
|
71
|
+
|
|
72
|
+
# --- Anomaly Detection Thresholds ---
|
|
73
|
+
# See: backend/agents/stat_anomaly.py for usage
|
|
74
|
+
ANOMALY_MULTI_ENTITY_THRESHOLD: int = 50
|
|
75
|
+
ANOMALY_SPIKE_SIGMA: float = 3.0
|
|
76
|
+
|
|
77
|
+
# --- Evaluation Thresholds ---
|
|
78
|
+
# See: backend/agents/veri_score.py for usage
|
|
79
|
+
EVAL_MIN_RELEVANCY: float = 0.6
|
|
80
|
+
EVAL_MIN_TRUSTWORTHINESS: float = 0.5
|
|
81
|
+
|
|
82
|
+
# --- Security ---
|
|
83
|
+
CORS_ORIGINS: str = "http://localhost:3000"
|
|
84
|
+
MAX_UPLOAD_SIZE_MB: int = 50
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# Global settings instance — import this everywhere
|
|
88
|
+
settings = Settings()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# © JINAN KORDAB — 2026 AI HYBRID AGENTIC RETRIEVAL-AUGMENTED GENERATION RAG PIPELINE - PERSONAL PROJECT
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# backend/core/ — Low-level utilities used across all agents.
|
|
5
|
+
# These are pure functions with no external dependencies (other than NLTK/NumPy).
|
|
6
|
+
#
|
|
7
|
+
# Modules:
|
|
8
|
+
# stemming.py — NLTK-based stemmer for multi-token normalization
|
|
9
|
+
# multitoken.py — MultiToken extraction from parsed documents
|
|
10
|
+
# hashing.py — Nested hash utilities for O(1) lookup
|
|
11
|
+
# pmi.py — Pointwise Mutual Information for relevancy scoring
|
|
12
|
+
# metrics.py — GenAI evaluation metrics (relevancy, trust, exhaustivity)
|
|
13
|
+
# =============================================================================
|