intentmind 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- intentmind/__init__.py +3 -0
- intentmind/benchmark/__init__.py +3 -0
- intentmind/benchmark/runner.py +345 -0
- intentmind/benchmark/utils.py +55 -0
- intentmind/builders/__init__.py +2 -0
- intentmind/builders/prompt_builder.py +122 -0
- intentmind/embeddings/__init__.py +8 -0
- intentmind/embeddings/base.py +12 -0
- intentmind/embeddings/fake.py +79 -0
- intentmind/embeddings/sentence_transformer.py +20 -0
- intentmind/engines/__init__.py +5 -0
- intentmind/engines/emotion_engine.py +36 -0
- intentmind/engines/energy_engine.py +239 -0
- intentmind/engines/intent_engine.py +750 -0
- intentmind/engines/recall_engine.py +562 -0
- intentmind/indices/__init__.py +5 -0
- intentmind/indices/base.py +21 -0
- intentmind/indices/exact.py +31 -0
- intentmind/indices/faiss.py +75 -0
- intentmind/integrations/__init__.py +5 -0
- intentmind/integrations/langchain.py +56 -0
- intentmind/models.py +118 -0
- intentmind/persistence/__init__.py +2 -0
- intentmind/persistence/json_persistence.py +41 -0
- intentmind/runtime.py +272 -0
- intentmind/store.py +167 -0
- intentmind/vis.py +72 -0
- intentmind-0.1.0.dist-info/METADATA +232 -0
- intentmind-0.1.0.dist-info/RECORD +32 -0
- intentmind-0.1.0.dist-info/WHEEL +5 -0
- intentmind-0.1.0.dist-info/licenses/LICENSE +21 -0
- intentmind-0.1.0.dist-info/top_level.txt +1 -0
intentmind/__init__.py
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from math import ceil
|
|
3
|
+
from typing import List, Dict, Any
|
|
4
|
+
from ..runtime import IntentmindMemory
|
|
5
|
+
from ..embeddings import cosine_similarity
|
|
6
|
+
|
|
7
|
+
class BenchmarkRunner:
|
|
8
|
+
def __init__(self, memory: IntentmindMemory):
|
|
9
|
+
self.memory = memory
|
|
10
|
+
|
|
11
|
+
def classic_rag_search(self, query: str, top_k: int = 5) -> List[Dict]:
|
|
12
|
+
"""Simulates a classic vector search RAG without Intentmind features."""
|
|
13
|
+
query_emb = self.memory.embedder.embed(query)
|
|
14
|
+
results = []
|
|
15
|
+
for chunk in self.memory._store.chunks.values():
|
|
16
|
+
sim = cosine_similarity(query_emb, chunk.embedding)
|
|
17
|
+
results.append({"chunk_id": chunk.chunk_id, "score": sim, "text": chunk.text})
|
|
18
|
+
|
|
19
|
+
results.sort(key=lambda x: x["score"], reverse=True)
|
|
20
|
+
return results[:top_k]
|
|
21
|
+
|
|
22
|
+
def evaluate_query(
|
|
23
|
+
self,
|
|
24
|
+
query: str,
|
|
25
|
+
expected_chunks: List[str],
|
|
26
|
+
expected_intents: List[str] = None,
|
|
27
|
+
expected_reason: str | None = None,
|
|
28
|
+
top_k: int = 5,
|
|
29
|
+
) -> Dict[str, Any]:
|
|
30
|
+
"""Evaluates a single query against both Classic RAG and Intentmind."""
|
|
31
|
+
|
|
32
|
+
# 1. Classic RAG
|
|
33
|
+
start_time = time.perf_counter()
|
|
34
|
+
rag_results = self.classic_rag_search(query, top_k=top_k)
|
|
35
|
+
rag_latency = time.perf_counter() - start_time
|
|
36
|
+
|
|
37
|
+
rag_retrieved_ids = [r["chunk_id"] for r in rag_results]
|
|
38
|
+
rag_tokens = sum(len(r["text"].split()) for r in rag_results)
|
|
39
|
+
|
|
40
|
+
# 2. Intentmind
|
|
41
|
+
start_time = time.perf_counter()
|
|
42
|
+
im_results = self.memory.query(query)
|
|
43
|
+
im_latency = time.perf_counter() - start_time
|
|
44
|
+
|
|
45
|
+
im_items = im_results["memories"]["items"]
|
|
46
|
+
im_retrieved_ids = [item["chunk_id"] for item in im_items]
|
|
47
|
+
im_tokens = sum(len(item["text"].split()) for item in im_items)
|
|
48
|
+
|
|
49
|
+
# Calculate Metrics
|
|
50
|
+
expected_set = set(expected_chunks)
|
|
51
|
+
|
|
52
|
+
rag_precision, rag_recall = self._precision_recall(rag_retrieved_ids, expected_set)
|
|
53
|
+
im_precision, im_recall = self._precision_recall(im_retrieved_ids, expected_set)
|
|
54
|
+
rag_f1 = self._f1(rag_precision, rag_recall)
|
|
55
|
+
im_f1 = self._f1(im_precision, im_recall)
|
|
56
|
+
|
|
57
|
+
# Token Saving (lower is better, but saving % is higher = better)
|
|
58
|
+
token_saving = 0.0
|
|
59
|
+
if rag_tokens > 0:
|
|
60
|
+
token_saving = (rag_tokens - im_tokens) / rag_tokens
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
"query": query,
|
|
64
|
+
"expected_chunks": expected_chunks,
|
|
65
|
+
"classic_rag": {
|
|
66
|
+
"latency_ms": round(rag_latency * 1000, 2),
|
|
67
|
+
"precision": round(rag_precision, 3),
|
|
68
|
+
"recall": round(rag_recall, 3),
|
|
69
|
+
"f1": round(rag_f1, 3),
|
|
70
|
+
"hit_at_k": self._hit_at_k(rag_retrieved_ids, expected_set, top_k),
|
|
71
|
+
"mrr": round(self._mrr(rag_retrieved_ids, expected_set), 3),
|
|
72
|
+
"retrieved_count": len(rag_retrieved_ids),
|
|
73
|
+
"tokens": rag_tokens,
|
|
74
|
+
"false_positives": [cid for cid in rag_retrieved_ids if cid not in expected_set],
|
|
75
|
+
"false_negatives": [cid for cid in expected_chunks if cid not in set(rag_retrieved_ids)],
|
|
76
|
+
"retrieved": [
|
|
77
|
+
{
|
|
78
|
+
"rank": idx + 1,
|
|
79
|
+
"chunk_id": item["chunk_id"],
|
|
80
|
+
"score": round(item["score"], 4),
|
|
81
|
+
"text": item["text"],
|
|
82
|
+
}
|
|
83
|
+
for idx, item in enumerate(rag_results)
|
|
84
|
+
],
|
|
85
|
+
},
|
|
86
|
+
"intentmind": {
|
|
87
|
+
"latency_ms": round(im_latency * 1000, 2),
|
|
88
|
+
"latency_breakdown": im_results.get("latency_breakdown", {}),
|
|
89
|
+
"precision": round(im_precision, 3),
|
|
90
|
+
"recall": round(im_recall, 3),
|
|
91
|
+
"f1": round(im_f1, 3),
|
|
92
|
+
"hit_at_k": self._hit_at_k(im_retrieved_ids, expected_set, top_k),
|
|
93
|
+
"mrr": round(self._mrr(im_retrieved_ids, expected_set), 3),
|
|
94
|
+
"retrieved_count": len(im_retrieved_ids),
|
|
95
|
+
"tokens": im_tokens,
|
|
96
|
+
"token_saving_pct": round(token_saving * 100, 2),
|
|
97
|
+
"rejected_count": im_results["memories"]["rejected"],
|
|
98
|
+
"false_positives": [cid for cid in im_retrieved_ids if cid not in expected_set],
|
|
99
|
+
"false_negatives": [cid for cid in expected_chunks if cid not in set(im_retrieved_ids)],
|
|
100
|
+
"direct_count": im_results["memories"]["direct"],
|
|
101
|
+
"associated_count": im_results["memories"]["associated"],
|
|
102
|
+
"weak_echo_count": im_results["memories"]["weak_echo"],
|
|
103
|
+
"retrieved": [
|
|
104
|
+
{
|
|
105
|
+
"rank": idx + 1,
|
|
106
|
+
"chunk_id": item["chunk_id"],
|
|
107
|
+
"score": round(item["score"], 4),
|
|
108
|
+
"layer": item["layer"],
|
|
109
|
+
"intent": item["intent"],
|
|
110
|
+
"called_by": item.get("called_by"),
|
|
111
|
+
"reason": item.get("reason"),
|
|
112
|
+
"path": item.get("path", []),
|
|
113
|
+
"edge": item.get("edge", {}),
|
|
114
|
+
"text": item["text"],
|
|
115
|
+
}
|
|
116
|
+
for idx, item in enumerate(im_items)
|
|
117
|
+
],
|
|
118
|
+
"trace": im_results.get("trace", []),
|
|
119
|
+
},
|
|
120
|
+
"analysis": {
|
|
121
|
+
"expected_intents": expected_intents or [],
|
|
122
|
+
"expected_reason": expected_reason or "",
|
|
123
|
+
"intentmind_paths": [
|
|
124
|
+
item.get("path", [])
|
|
125
|
+
for item in im_items
|
|
126
|
+
if item.get("path")
|
|
127
|
+
],
|
|
128
|
+
},
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
def run_suite(self, queries: List[Dict], top_k: int = 5) -> Dict[str, Any]:
|
|
132
|
+
"""Runs the benchmark on a list of test cases."""
|
|
133
|
+
results = []
|
|
134
|
+
for q_data in queries:
|
|
135
|
+
res = self.evaluate_query(
|
|
136
|
+
query=q_data["query"],
|
|
137
|
+
expected_chunks=q_data["expected_chunks"],
|
|
138
|
+
expected_intents=q_data.get("expected_intents", []),
|
|
139
|
+
expected_reason=q_data.get("expected_reason"),
|
|
140
|
+
top_k=top_k,
|
|
141
|
+
)
|
|
142
|
+
results.append(res)
|
|
143
|
+
|
|
144
|
+
# Aggregate
|
|
145
|
+
def avg(key1, key2):
|
|
146
|
+
return sum(r[key1][key2] for r in results) / len(results)
|
|
147
|
+
|
|
148
|
+
# Aggregate latency breakdown
|
|
149
|
+
breakdown_keys = ["embed_query_ms", "emotion_ms", "extractor_ms", "recall_ms", "prompt_ms"]
|
|
150
|
+
def breakdown_avg(key):
|
|
151
|
+
vals = [r["intentmind"].get("latency_breakdown", {}).get(key, 0.0) for r in results]
|
|
152
|
+
return round(sum(vals) / len(vals), 2) if vals else 0.0
|
|
153
|
+
|
|
154
|
+
summary = {
|
|
155
|
+
"total_queries": len(queries),
|
|
156
|
+
"top_k": top_k,
|
|
157
|
+
"classic_rag_avg_precision": round(avg("classic_rag", "precision"), 3),
|
|
158
|
+
"classic_rag_avg_recall": round(avg("classic_rag", "recall"), 3),
|
|
159
|
+
"classic_rag_avg_f1": round(avg("classic_rag", "f1"), 3),
|
|
160
|
+
"classic_rag_avg_hit_at_k": round(avg("classic_rag", "hit_at_k"), 3),
|
|
161
|
+
"classic_rag_avg_mrr": round(avg("classic_rag", "mrr"), 3),
|
|
162
|
+
"classic_rag_avg_latency_ms": round(avg("classic_rag", "latency_ms"), 3),
|
|
163
|
+
"classic_rag_p50_latency_ms": self._percentile([r["classic_rag"]["latency_ms"] for r in results], 50),
|
|
164
|
+
"classic_rag_p95_latency_ms": self._percentile([r["classic_rag"]["latency_ms"] for r in results], 95),
|
|
165
|
+
"classic_rag_avg_tokens": round(avg("classic_rag", "tokens"), 3),
|
|
166
|
+
"intentmind_avg_precision": round(avg("intentmind", "precision"), 3),
|
|
167
|
+
"intentmind_avg_recall": round(avg("intentmind", "recall"), 3),
|
|
168
|
+
"intentmind_avg_f1": round(avg("intentmind", "f1"), 3),
|
|
169
|
+
"intentmind_avg_hit_at_k": round(avg("intentmind", "hit_at_k"), 3),
|
|
170
|
+
"intentmind_avg_mrr": round(avg("intentmind", "mrr"), 3),
|
|
171
|
+
"intentmind_avg_latency_ms": round(avg("intentmind", "latency_ms"), 3),
|
|
172
|
+
"intentmind_p50_latency_ms": self._percentile([r["intentmind"]["latency_ms"] for r in results], 50),
|
|
173
|
+
"intentmind_p95_latency_ms": self._percentile([r["intentmind"]["latency_ms"] for r in results], 95),
|
|
174
|
+
"intentmind_avg_tokens": round(avg("intentmind", "tokens"), 3),
|
|
175
|
+
"intentmind_avg_token_saving_pct": round(avg("intentmind", "token_saving_pct"), 3),
|
|
176
|
+
"intentmind_avg_direct_count": round(avg("intentmind", "direct_count"), 3),
|
|
177
|
+
"intentmind_avg_associated_count": round(avg("intentmind", "associated_count"), 3),
|
|
178
|
+
"intentmind_latency_breakdown": {k: breakdown_avg(k) for k in breakdown_keys},
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
"summary": summary,
|
|
183
|
+
"details": results
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
def generate_markdown_report(self, results: Dict[str, Any], title: str = "Real World Benchmark Report") -> str:
|
|
187
|
+
summary = results["summary"]
|
|
188
|
+
details = results["details"]
|
|
189
|
+
wins = [
|
|
190
|
+
item for item in details
|
|
191
|
+
if item["intentmind"]["f1"] > item["classic_rag"]["f1"]
|
|
192
|
+
]
|
|
193
|
+
losses = [
|
|
194
|
+
item for item in details
|
|
195
|
+
if item["intentmind"]["f1"] < item["classic_rag"]["f1"]
|
|
196
|
+
]
|
|
197
|
+
ties = len(details) - len(wins) - len(losses)
|
|
198
|
+
|
|
199
|
+
lines = [
|
|
200
|
+
f"# {title}",
|
|
201
|
+
"",
|
|
202
|
+
"This report compares the same memory corpus with classic vector RAG and Intentmind.",
|
|
203
|
+
"The goal is not to claim universal superiority, but to show where associative recall helps, where it fails, and why.",
|
|
204
|
+
"",
|
|
205
|
+
"## Summary",
|
|
206
|
+
"",
|
|
207
|
+
f"- Total queries: {summary['total_queries']}",
|
|
208
|
+
f"- Top-K baseline: {summary['top_k']}",
|
|
209
|
+
f"- Intentmind wins/ties/losses by F1: {len(wins)} / {ties} / {len(losses)}",
|
|
210
|
+
"",
|
|
211
|
+
"| System | Precision | Recall | F1 | Hit@K | MRR | Avg Tokens | p50 Latency | p95 Latency |",
|
|
212
|
+
"|---|---:|---:|---:|---:|---:|---:|---:|---:|",
|
|
213
|
+
(
|
|
214
|
+
f"| Classic RAG | {summary['classic_rag_avg_precision']} | "
|
|
215
|
+
f"{summary['classic_rag_avg_recall']} | {summary['classic_rag_avg_f1']} | "
|
|
216
|
+
f"{summary['classic_rag_avg_hit_at_k']} | {summary['classic_rag_avg_mrr']} | "
|
|
217
|
+
f"{summary['classic_rag_avg_tokens']} | {summary['classic_rag_p50_latency_ms']} ms | "
|
|
218
|
+
f"{summary['classic_rag_p95_latency_ms']} ms |"
|
|
219
|
+
),
|
|
220
|
+
(
|
|
221
|
+
f"| Intentmind | {summary['intentmind_avg_precision']} | "
|
|
222
|
+
f"{summary['intentmind_avg_recall']} | {summary['intentmind_avg_f1']} | "
|
|
223
|
+
f"{summary['intentmind_avg_hit_at_k']} | {summary['intentmind_avg_mrr']} | "
|
|
224
|
+
f"{summary['intentmind_avg_tokens']} | {summary['intentmind_p50_latency_ms']} ms | "
|
|
225
|
+
f"{summary['intentmind_p95_latency_ms']} ms |"
|
|
226
|
+
),
|
|
227
|
+
"",
|
|
228
|
+
f"Average token saving: {summary['intentmind_avg_token_saving_pct']}%",
|
|
229
|
+
f"Average direct/associated memories: {summary['intentmind_avg_direct_count']} / {summary['intentmind_avg_associated_count']}",
|
|
230
|
+
"",
|
|
231
|
+
"## Intentmind Latency Breakdown (avg ms)",
|
|
232
|
+
"",
|
|
233
|
+
"| Phase | Avg ms | Description |",
|
|
234
|
+
"|---|---:|---|",
|
|
235
|
+
]
|
|
236
|
+
|
|
237
|
+
lb = summary.get("intentmind_latency_breakdown", {})
|
|
238
|
+
phase_desc = {
|
|
239
|
+
"embed_query_ms": "Query embedding (SentenceTransformer or equivalent)",
|
|
240
|
+
"emotion_ms": "Emotion detection",
|
|
241
|
+
"extractor_ms": "Intent extraction (fixture / LLM)",
|
|
242
|
+
"recall_ms": "Graph traversal + chunk scoring",
|
|
243
|
+
"prompt_ms": "Prompt assembly",
|
|
244
|
+
}
|
|
245
|
+
for key in ["embed_query_ms", "emotion_ms", "extractor_ms", "recall_ms", "prompt_ms"]:
|
|
246
|
+
desc = phase_desc.get(key, key)
|
|
247
|
+
val = lb.get(key, 0.0)
|
|
248
|
+
lines.append(f"| {key.replace('_ms', '')} | {val} | {desc} |")
|
|
249
|
+
lines.extend([
|
|
250
|
+
"",
|
|
251
|
+
f"> **Note:** The extractor phase uses a fixture-backed dictionary lookup in this benchmark (<1ms). "
|
|
252
|
+
f"With a real LLM extractor (e.g. GPT-4o-mini), this phase would add 200–800ms per query.",
|
|
253
|
+
"",
|
|
254
|
+
"## Why This Can Be Better Than Classic RAG",
|
|
255
|
+
"",
|
|
256
|
+
"Classic RAG retrieves chunks by query-to-chunk vector similarity. Intentmind first activates query intents, then traverses graph neighbors and scores linked chunks.",
|
|
257
|
+
"This matters when the query does not repeat the exact missing concept, but touches an associated concept that was observed with it before.",
|
|
258
|
+
"",
|
|
259
|
+
"## Representative Associative Cases",
|
|
260
|
+
"",
|
|
261
|
+
])
|
|
262
|
+
|
|
263
|
+
associative_cases = [
|
|
264
|
+
item for item in details
|
|
265
|
+
if any(result.get("layer", 0) > 0 for result in item["intentmind"]["retrieved"])
|
|
266
|
+
][:8]
|
|
267
|
+
if not associative_cases:
|
|
268
|
+
lines.append("No associated-memory cases were returned in this run.")
|
|
269
|
+
for item in associative_cases:
|
|
270
|
+
lines.extend(self._case_lines(item))
|
|
271
|
+
|
|
272
|
+
lines.extend(["", "## Failure Or Risk Cases", ""])
|
|
273
|
+
risky = [
|
|
274
|
+
item for item in details
|
|
275
|
+
if item["intentmind"]["false_negatives"] or item["intentmind"]["false_positives"]
|
|
276
|
+
][:8]
|
|
277
|
+
if not risky:
|
|
278
|
+
lines.append("No Intentmind false positives or false negatives were observed in this run.")
|
|
279
|
+
for item in risky:
|
|
280
|
+
lines.extend(self._case_lines(item))
|
|
281
|
+
|
|
282
|
+
lines.extend([
|
|
283
|
+
"",
|
|
284
|
+
"## Methodology Notes",
|
|
285
|
+
"",
|
|
286
|
+
"- Both systems use the same corpus and same embedder.",
|
|
287
|
+
"- Classic RAG uses vector top-k chunk retrieval.",
|
|
288
|
+
"- Intentmind uses faithful fixture extraction in this benchmark so recall dynamics are deterministic.",
|
|
289
|
+
"- Public claims require larger datasets, independent ground truth, and repeated runs with median/p95 reporting.",
|
|
290
|
+
])
|
|
291
|
+
return "\n".join(lines) + "\n"
|
|
292
|
+
|
|
293
|
+
def _case_lines(self, item: Dict[str, Any]) -> List[str]:
|
|
294
|
+
expected = ", ".join(item["expected_chunks"]) or "(none)"
|
|
295
|
+
rag = ", ".join(r["chunk_id"] for r in item["classic_rag"]["retrieved"]) or "(none)"
|
|
296
|
+
im = ", ".join(
|
|
297
|
+
f"{r['chunk_id']}[{r.get('reason')}:{' > '.join(r.get('path', []))}]"
|
|
298
|
+
for r in item["intentmind"]["retrieved"]
|
|
299
|
+
) or "(none)"
|
|
300
|
+
reason = item["analysis"].get("expected_reason") or "No explicit reason provided."
|
|
301
|
+
return [
|
|
302
|
+
f"### {item['query']}",
|
|
303
|
+
"",
|
|
304
|
+
f"- Expected chunks: {expected}",
|
|
305
|
+
f"- Classic RAG: {rag}",
|
|
306
|
+
f"- Intentmind: {im}",
|
|
307
|
+
f"- Expected reason: {reason}",
|
|
308
|
+
"",
|
|
309
|
+
]
|
|
310
|
+
|
|
311
|
+
def _precision_recall(self, retrieved: List[str], expected: set) -> tuple[float, float]:
|
|
312
|
+
if not expected:
|
|
313
|
+
return (1.0, 1.0) if not retrieved else (0.0, 1.0)
|
|
314
|
+
if not retrieved:
|
|
315
|
+
return 0.0, 0.0
|
|
316
|
+
hits = len(set(retrieved).intersection(expected))
|
|
317
|
+
precision = hits / len(retrieved)
|
|
318
|
+
recall = hits / len(expected)
|
|
319
|
+
return precision, recall
|
|
320
|
+
|
|
321
|
+
def _f1(self, precision: float, recall: float) -> float:
|
|
322
|
+
if precision + recall == 0:
|
|
323
|
+
return 0.0
|
|
324
|
+
return 2 * precision * recall / (precision + recall)
|
|
325
|
+
|
|
326
|
+
def _hit_at_k(self, retrieved: List[str], expected: set, top_k: int) -> int:
|
|
327
|
+
if not expected:
|
|
328
|
+
return 1 if not retrieved else 0
|
|
329
|
+
return 1 if set(retrieved[:top_k]).intersection(expected) else 0
|
|
330
|
+
|
|
331
|
+
def _mrr(self, retrieved: List[str], expected: set) -> float:
|
|
332
|
+
if not expected:
|
|
333
|
+
return 1.0 if not retrieved else 0.0
|
|
334
|
+
for idx, chunk_id in enumerate(retrieved):
|
|
335
|
+
if chunk_id in expected:
|
|
336
|
+
return 1.0 / (idx + 1)
|
|
337
|
+
return 0.0
|
|
338
|
+
|
|
339
|
+
def _percentile(self, values: List[float], percentile: int) -> float:
|
|
340
|
+
if not values:
|
|
341
|
+
return 0.0
|
|
342
|
+
ordered = sorted(values)
|
|
343
|
+
rank = ceil((percentile / 100) * len(ordered)) - 1
|
|
344
|
+
rank = max(0, min(rank, len(ordered) - 1))
|
|
345
|
+
return round(ordered[rank], 3)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Callable, Dict
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def load_fixture(json_path: str) -> Dict[str, Any]:
|
|
6
|
+
with open(json_path, "r", encoding="utf-8") as f:
|
|
7
|
+
return json.load(f)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_fixture_extractor(data: Dict[str, Any]) -> Callable[[str], list[str]]:
|
|
11
|
+
"""
|
|
12
|
+
Build a deterministic stand-in for a faithful LLM extractor.
|
|
13
|
+
|
|
14
|
+
This is for benchmark repeatability. It lets us evaluate graph/recall
|
|
15
|
+
behavior without depending on live LLM calls or hand-tuned FakeEmbedder
|
|
16
|
+
vocabulary.
|
|
17
|
+
"""
|
|
18
|
+
by_text = {}
|
|
19
|
+
for item in data.get("memories", []):
|
|
20
|
+
intents = item.get("intents", [])
|
|
21
|
+
if intents:
|
|
22
|
+
by_text[item["text"]] = intents
|
|
23
|
+
for item in data.get("queries", []):
|
|
24
|
+
intents = item.get("expected_intents", [])
|
|
25
|
+
if intents:
|
|
26
|
+
by_text[item["query"]] = intents
|
|
27
|
+
|
|
28
|
+
def extractor(text: str) -> list[str]:
|
|
29
|
+
return list(by_text.get(text, []))
|
|
30
|
+
|
|
31
|
+
return extractor
|
|
32
|
+
|
|
33
|
+
def load_fixture_and_map(memory_instance, json_path: str) -> Dict[str, Any]:
|
|
34
|
+
"""
|
|
35
|
+
Loads a benchmark fixture JSON, ingests memories into the provided IntentmindMemory instance,
|
|
36
|
+
and returns the mapped queries taking automatic deduplication into account.
|
|
37
|
+
"""
|
|
38
|
+
data = load_fixture(json_path)
|
|
39
|
+
|
|
40
|
+
chunk_mapping = {}
|
|
41
|
+
for item in data.get("memories", []):
|
|
42
|
+
surviving_id = memory_instance.add(
|
|
43
|
+
text=item["text"],
|
|
44
|
+
source="benchmark",
|
|
45
|
+
chunk_id=item["chunk_id"]
|
|
46
|
+
)
|
|
47
|
+
chunk_mapping[item["chunk_id"]] = surviving_id
|
|
48
|
+
|
|
49
|
+
# Update expected queries based on merges
|
|
50
|
+
for q in data.get("queries", []):
|
|
51
|
+
if "expected_chunks" in q:
|
|
52
|
+
mapped = set(chunk_mapping.get(cid, cid) for cid in q["expected_chunks"])
|
|
53
|
+
q["expected_chunks"] = list(mapped)
|
|
54
|
+
|
|
55
|
+
return data
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class PromptBuilder:
|
|
5
|
+
def __init__(self, max_chars: int = 60000):
|
|
6
|
+
self.max_chars = max_chars
|
|
7
|
+
|
|
8
|
+
def build(self, user_query, recall_result, emotional_state):
|
|
9
|
+
system_block = (
|
|
10
|
+
"[SYSTEM]\n"
|
|
11
|
+
"You are a memory-augmented assistant.\n"
|
|
12
|
+
"Use memories only as supporting context.\n"
|
|
13
|
+
"Prioritize the user's latest message.\n"
|
|
14
|
+
"Do not over-generalize or give medical disclaimers unless necessary.\n"
|
|
15
|
+
"Respond naturally and directly.\n"
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
user_block = f"[USER_MESSAGE]\n{user_query}\n"
|
|
19
|
+
|
|
20
|
+
context_block = self._build_context_block(recall_result, emotional_state)
|
|
21
|
+
field_block = self._build_field_block(recall_result)
|
|
22
|
+
memory_block = self._build_memory_block(recall_result)
|
|
23
|
+
style_block = self._build_style_block(emotional_state)
|
|
24
|
+
|
|
25
|
+
prompt = f"{system_block}\n{user_block}\n{context_block}\n{field_block}\n{memory_block}\n{style_block}".strip()
|
|
26
|
+
|
|
27
|
+
if len(prompt) <= self.max_chars:
|
|
28
|
+
return prompt
|
|
29
|
+
return prompt[: self.max_chars] + "\n\n[TRIMMED]\nPrompt budget exceeded."
|
|
30
|
+
|
|
31
|
+
def _build_context_block(self, recall_result, emotional_state):
|
|
32
|
+
lines = ["[CURRENT_CONTEXT]"]
|
|
33
|
+
|
|
34
|
+
# Collect top active intents from layers 0 and 1, unique and sorted by score
|
|
35
|
+
intents = {}
|
|
36
|
+
for layer_id in [0, 1]:
|
|
37
|
+
for item in recall_result.get("activated_layers", {}).get(layer_id, []):
|
|
38
|
+
label = item["intent"].label
|
|
39
|
+
if label not in intents or item["score"] > intents[label]:
|
|
40
|
+
intents[label] = item["score"]
|
|
41
|
+
|
|
42
|
+
if intents:
|
|
43
|
+
top_intents = sorted(intents.items(), key=lambda x: x[1], reverse=True)[:5]
|
|
44
|
+
lines.append("Primary topics:")
|
|
45
|
+
for label, _ in top_intents:
|
|
46
|
+
lines.append(f"- {label}")
|
|
47
|
+
else:
|
|
48
|
+
lines.append("Primary topics: None")
|
|
49
|
+
|
|
50
|
+
lines.append(f"\nEmotional state:\n- {emotional_state.current}")
|
|
51
|
+
return "\n".join(lines) + "\n"
|
|
52
|
+
|
|
53
|
+
def _build_field_block(self, recall_result):
|
|
54
|
+
field = recall_result.get("cognitive_field") or {}
|
|
55
|
+
lines = ["[COGNITIVE FIELD]"]
|
|
56
|
+
|
|
57
|
+
seeds = field.get("seed_intents", [])
|
|
58
|
+
activated = field.get("activated_intents", [])
|
|
59
|
+
|
|
60
|
+
if seeds:
|
|
61
|
+
seed_labels = [item.get("label", "") for item in seeds if item.get("label")]
|
|
62
|
+
lines.append("Seeds: " + ", ".join(seed_labels[:8]))
|
|
63
|
+
else:
|
|
64
|
+
lines.append("Seeds: None")
|
|
65
|
+
|
|
66
|
+
resonant = [
|
|
67
|
+
item for item in activated
|
|
68
|
+
if item.get("role") != "seed" and item.get("label")
|
|
69
|
+
]
|
|
70
|
+
if resonant:
|
|
71
|
+
labels = [f"{item['label']}({item.get('energy', 0.0)})" for item in resonant[:8]]
|
|
72
|
+
lines.append("Resonant: " + ", ".join(labels))
|
|
73
|
+
else:
|
|
74
|
+
lines.append("Resonant: None")
|
|
75
|
+
|
|
76
|
+
return "\n".join(lines) + "\n"
|
|
77
|
+
|
|
78
|
+
def _build_memory_block(self, recall_result):
|
|
79
|
+
lines = ["[RELEVANT_MEMORIES]"]
|
|
80
|
+
memories = []
|
|
81
|
+
|
|
82
|
+
for bucket in ["direct_memories", "associated_memories", "weak_echo_memories"]:
|
|
83
|
+
for item in recall_result.get(bucket, []):
|
|
84
|
+
memories.append(item)
|
|
85
|
+
|
|
86
|
+
# Sort by score and take top 30 (to rely on threshold logic instead of hard caps)
|
|
87
|
+
memories = sorted(memories, key=lambda x: x["score"], reverse=True)[:30]
|
|
88
|
+
|
|
89
|
+
if not memories:
|
|
90
|
+
lines.append("No relevant memories found.")
|
|
91
|
+
else:
|
|
92
|
+
for i, item in enumerate(memories, 1):
|
|
93
|
+
text = self.compress(item["chunk"].text) # changed from summary to text
|
|
94
|
+
score = round(item["score"], 2)
|
|
95
|
+
lines.append(f"{i}. \"{text}\"")
|
|
96
|
+
lines.append(f"relevance: {score}\n")
|
|
97
|
+
|
|
98
|
+
return "\n".join(lines)
|
|
99
|
+
|
|
100
|
+
def _build_style_block(self, emotional_state):
|
|
101
|
+
lines = ["[RESPONSE_STYLE]"]
|
|
102
|
+
lines.append("- short")
|
|
103
|
+
lines.append("- grounded")
|
|
104
|
+
lines.append("- conversational")
|
|
105
|
+
lines.append("- avoid generic assistant phrasing")
|
|
106
|
+
|
|
107
|
+
modes = {
|
|
108
|
+
"nötr": "- balanced and informative",
|
|
109
|
+
"merak": "- highlight possibilities and connections",
|
|
110
|
+
"heyecan": "- open to new creative ideas",
|
|
111
|
+
"güven": "- confident and deep",
|
|
112
|
+
"şüphe": "- rely strictly on reliable memory",
|
|
113
|
+
"korku": "- safe, concise, and clear",
|
|
114
|
+
}
|
|
115
|
+
lines.append(modes.get(emotional_state.current, modes["nötr"]))
|
|
116
|
+
return "\n".join(lines)
|
|
117
|
+
|
|
118
|
+
def compress(self, text: str, max_len: int = 5000) -> str:
|
|
119
|
+
text = " ".join(text.split())
|
|
120
|
+
if len(text) <= max_len:
|
|
121
|
+
return text
|
|
122
|
+
return text[: max_len - 3] + "..."
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from .base import BaseEmbedder
|
|
2
|
+
from .fake import FakeEmbedder, cosine_similarity
|
|
3
|
+
try:
|
|
4
|
+
from .sentence_transformer import SentenceTransformerEmbedder
|
|
5
|
+
except Exception: # optional dependency
|
|
6
|
+
SentenceTransformerEmbedder = None
|
|
7
|
+
|
|
8
|
+
__all__ = ["BaseEmbedder", "FakeEmbedder", "SentenceTransformerEmbedder", "cosine_similarity"]
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
import random
|
|
5
|
+
from typing import List
|
|
6
|
+
from .base import BaseEmbedder
|
|
7
|
+
|
|
8
|
+
SEMANTIC_GROUPS = {
|
|
9
|
+
"memory": ["hafıza", "memory", "recall", "chunk", "store", "geçmiş"],
|
|
10
|
+
"graph": ["graph", "intent", "edge", "node", "traversal", "layer", "bağ"],
|
|
11
|
+
"energy": ["energy", "enerji", "decay", "zayıflama", "güçlenme", "stability"],
|
|
12
|
+
"emotion": ["emotion", "duygu", "emotional", "merak", "heyecan", "şüphe", "korku"],
|
|
13
|
+
"score": ["score", "activation", "similarity", "threshold", "weight", "ağırlık"],
|
|
14
|
+
"prompt": ["prompt", "builder", "context", "bağlam", "llm", "cevap"],
|
|
15
|
+
"pruning": ["pruning", "budama", "temizleme", "pollution", "explosion", "duplicate"],
|
|
16
|
+
"car": ["car", "araba", "arabayla", "arabanın", "arabanin", "arabam", "arabayi", "servis", "servise"],
|
|
17
|
+
"insurance": ["insurance", "sigorta", "sigortasini"],
|
|
18
|
+
"money": ["money", "para", "param"],
|
|
19
|
+
"travel": ["london", "antalya", "antalyaya", "yolculuk", "istanbul", "istanbula", "goturecegim", "gitmem", "yarin", "gidecegim"],
|
|
20
|
+
"fuel": ["benzin", "benzini", "bitti"],
|
|
21
|
+
"food": ["food", "meal", "yemek", "yemistik"],
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
DIM = 32
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _group_vector(group_idx: int, dim: int = DIM) -> List[float]:
|
|
28
|
+
vec = [0.0] * dim
|
|
29
|
+
width = max(1, dim // len(SEMANTIC_GROUPS))
|
|
30
|
+
start = group_idx * width
|
|
31
|
+
end = start + width
|
|
32
|
+
for i in range(start, min(end, dim)):
|
|
33
|
+
vec[i] = 1.0
|
|
34
|
+
return vec
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
GROUP_VECTORS = {group: _group_vector(idx) for idx, group in enumerate(SEMANTIC_GROUPS)}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def cosine_similarity(a: List[float], b: List[float]) -> float:
|
|
41
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
42
|
+
norm_a = math.sqrt(sum(x * x for x in a))
|
|
43
|
+
norm_b = math.sqrt(sum(x * x for x in b))
|
|
44
|
+
if norm_a == 0 or norm_b == 0:
|
|
45
|
+
return 0.0
|
|
46
|
+
return dot / (norm_a * norm_b)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class FakeEmbedder(BaseEmbedder):
|
|
50
|
+
name = "fake-semantic-cluster"
|
|
51
|
+
|
|
52
|
+
def __init__(self, dim: int = DIM):
|
|
53
|
+
self.dim = dim
|
|
54
|
+
|
|
55
|
+
def embed(self, text: str) -> List[float]:
|
|
56
|
+
text_l = text.lower()
|
|
57
|
+
group_weights = {}
|
|
58
|
+
for group, keywords in SEMANTIC_GROUPS.items():
|
|
59
|
+
hits = sum(1 for kw in keywords if kw in text_l)
|
|
60
|
+
if hits > 0:
|
|
61
|
+
group_weights[group] = hits
|
|
62
|
+
|
|
63
|
+
import hashlib
|
|
64
|
+
seed_int = int(hashlib.sha256(text.encode("utf-8")).hexdigest()[:8], 16)
|
|
65
|
+
random.seed(seed_int)
|
|
66
|
+
vec = [random.gauss(0, 0.15) for _ in range(self.dim)]
|
|
67
|
+
|
|
68
|
+
if group_weights:
|
|
69
|
+
total = sum(group_weights.values())
|
|
70
|
+
for group, w in group_weights.items():
|
|
71
|
+
gv = GROUP_VECTORS[group]
|
|
72
|
+
alpha = (w / total) * 2.0
|
|
73
|
+
for i in range(min(self.dim, len(gv))):
|
|
74
|
+
vec[i] += alpha * gv[i]
|
|
75
|
+
|
|
76
|
+
norm = math.sqrt(sum(x * x for x in vec))
|
|
77
|
+
if norm == 0:
|
|
78
|
+
return [1.0 / math.sqrt(self.dim)] * self.dim
|
|
79
|
+
return [x / norm for x in vec]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
from .base import BaseEmbedder
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SentenceTransformerEmbedder(BaseEmbedder):
|
|
8
|
+
def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
|
|
9
|
+
try:
|
|
10
|
+
from sentence_transformers import SentenceTransformer
|
|
11
|
+
except ImportError as exc:
|
|
12
|
+
raise ImportError(
|
|
13
|
+
"sentence-transformers yüklü değil. Kurulum: pip install sentence-transformers"
|
|
14
|
+
) from exc
|
|
15
|
+
self.model_name = model_name
|
|
16
|
+
self.name = model_name
|
|
17
|
+
self._model = SentenceTransformer(model_name)
|
|
18
|
+
|
|
19
|
+
def embed(self, text: str) -> List[float]:
|
|
20
|
+
return self._model.encode(text, normalize_embeddings=True).tolist()
|