intentmind 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
intentmind/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .runtime import IntentmindMemory
2
+
3
+ __all__ = ["IntentmindMemory"]
@@ -0,0 +1,3 @@
1
+ from .runner import BenchmarkRunner
2
+
3
+ __all__ = ["BenchmarkRunner"]
@@ -0,0 +1,345 @@
1
+ import time
2
+ from math import ceil
3
+ from typing import List, Dict, Any
4
+ from ..runtime import IntentmindMemory
5
+ from ..embeddings import cosine_similarity
6
+
7
+ class BenchmarkRunner:
8
+ def __init__(self, memory: IntentmindMemory):
9
+ self.memory = memory
10
+
11
+ def classic_rag_search(self, query: str, top_k: int = 5) -> List[Dict]:
12
+ """Simulates a classic vector search RAG without Intentmind features."""
13
+ query_emb = self.memory.embedder.embed(query)
14
+ results = []
15
+ for chunk in self.memory._store.chunks.values():
16
+ sim = cosine_similarity(query_emb, chunk.embedding)
17
+ results.append({"chunk_id": chunk.chunk_id, "score": sim, "text": chunk.text})
18
+
19
+ results.sort(key=lambda x: x["score"], reverse=True)
20
+ return results[:top_k]
21
+
22
+ def evaluate_query(
23
+ self,
24
+ query: str,
25
+ expected_chunks: List[str],
26
+ expected_intents: List[str] = None,
27
+ expected_reason: str | None = None,
28
+ top_k: int = 5,
29
+ ) -> Dict[str, Any]:
30
+ """Evaluates a single query against both Classic RAG and Intentmind."""
31
+
32
+ # 1. Classic RAG
33
+ start_time = time.perf_counter()
34
+ rag_results = self.classic_rag_search(query, top_k=top_k)
35
+ rag_latency = time.perf_counter() - start_time
36
+
37
+ rag_retrieved_ids = [r["chunk_id"] for r in rag_results]
38
+ rag_tokens = sum(len(r["text"].split()) for r in rag_results)
39
+
40
+ # 2. Intentmind
41
+ start_time = time.perf_counter()
42
+ im_results = self.memory.query(query)
43
+ im_latency = time.perf_counter() - start_time
44
+
45
+ im_items = im_results["memories"]["items"]
46
+ im_retrieved_ids = [item["chunk_id"] for item in im_items]
47
+ im_tokens = sum(len(item["text"].split()) for item in im_items)
48
+
49
+ # Calculate Metrics
50
+ expected_set = set(expected_chunks)
51
+
52
+ rag_precision, rag_recall = self._precision_recall(rag_retrieved_ids, expected_set)
53
+ im_precision, im_recall = self._precision_recall(im_retrieved_ids, expected_set)
54
+ rag_f1 = self._f1(rag_precision, rag_recall)
55
+ im_f1 = self._f1(im_precision, im_recall)
56
+
57
+ # Token Saving (lower is better, but saving % is higher = better)
58
+ token_saving = 0.0
59
+ if rag_tokens > 0:
60
+ token_saving = (rag_tokens - im_tokens) / rag_tokens
61
+
62
+ return {
63
+ "query": query,
64
+ "expected_chunks": expected_chunks,
65
+ "classic_rag": {
66
+ "latency_ms": round(rag_latency * 1000, 2),
67
+ "precision": round(rag_precision, 3),
68
+ "recall": round(rag_recall, 3),
69
+ "f1": round(rag_f1, 3),
70
+ "hit_at_k": self._hit_at_k(rag_retrieved_ids, expected_set, top_k),
71
+ "mrr": round(self._mrr(rag_retrieved_ids, expected_set), 3),
72
+ "retrieved_count": len(rag_retrieved_ids),
73
+ "tokens": rag_tokens,
74
+ "false_positives": [cid for cid in rag_retrieved_ids if cid not in expected_set],
75
+ "false_negatives": [cid for cid in expected_chunks if cid not in set(rag_retrieved_ids)],
76
+ "retrieved": [
77
+ {
78
+ "rank": idx + 1,
79
+ "chunk_id": item["chunk_id"],
80
+ "score": round(item["score"], 4),
81
+ "text": item["text"],
82
+ }
83
+ for idx, item in enumerate(rag_results)
84
+ ],
85
+ },
86
+ "intentmind": {
87
+ "latency_ms": round(im_latency * 1000, 2),
88
+ "latency_breakdown": im_results.get("latency_breakdown", {}),
89
+ "precision": round(im_precision, 3),
90
+ "recall": round(im_recall, 3),
91
+ "f1": round(im_f1, 3),
92
+ "hit_at_k": self._hit_at_k(im_retrieved_ids, expected_set, top_k),
93
+ "mrr": round(self._mrr(im_retrieved_ids, expected_set), 3),
94
+ "retrieved_count": len(im_retrieved_ids),
95
+ "tokens": im_tokens,
96
+ "token_saving_pct": round(token_saving * 100, 2),
97
+ "rejected_count": im_results["memories"]["rejected"],
98
+ "false_positives": [cid for cid in im_retrieved_ids if cid not in expected_set],
99
+ "false_negatives": [cid for cid in expected_chunks if cid not in set(im_retrieved_ids)],
100
+ "direct_count": im_results["memories"]["direct"],
101
+ "associated_count": im_results["memories"]["associated"],
102
+ "weak_echo_count": im_results["memories"]["weak_echo"],
103
+ "retrieved": [
104
+ {
105
+ "rank": idx + 1,
106
+ "chunk_id": item["chunk_id"],
107
+ "score": round(item["score"], 4),
108
+ "layer": item["layer"],
109
+ "intent": item["intent"],
110
+ "called_by": item.get("called_by"),
111
+ "reason": item.get("reason"),
112
+ "path": item.get("path", []),
113
+ "edge": item.get("edge", {}),
114
+ "text": item["text"],
115
+ }
116
+ for idx, item in enumerate(im_items)
117
+ ],
118
+ "trace": im_results.get("trace", []),
119
+ },
120
+ "analysis": {
121
+ "expected_intents": expected_intents or [],
122
+ "expected_reason": expected_reason or "",
123
+ "intentmind_paths": [
124
+ item.get("path", [])
125
+ for item in im_items
126
+ if item.get("path")
127
+ ],
128
+ },
129
+ }
130
+
131
+ def run_suite(self, queries: List[Dict], top_k: int = 5) -> Dict[str, Any]:
132
+ """Runs the benchmark on a list of test cases."""
133
+ results = []
134
+ for q_data in queries:
135
+ res = self.evaluate_query(
136
+ query=q_data["query"],
137
+ expected_chunks=q_data["expected_chunks"],
138
+ expected_intents=q_data.get("expected_intents", []),
139
+ expected_reason=q_data.get("expected_reason"),
140
+ top_k=top_k,
141
+ )
142
+ results.append(res)
143
+
144
+ # Aggregate
145
+ def avg(key1, key2):
146
+ return sum(r[key1][key2] for r in results) / len(results)
147
+
148
+ # Aggregate latency breakdown
149
+ breakdown_keys = ["embed_query_ms", "emotion_ms", "extractor_ms", "recall_ms", "prompt_ms"]
150
+ def breakdown_avg(key):
151
+ vals = [r["intentmind"].get("latency_breakdown", {}).get(key, 0.0) for r in results]
152
+ return round(sum(vals) / len(vals), 2) if vals else 0.0
153
+
154
+ summary = {
155
+ "total_queries": len(queries),
156
+ "top_k": top_k,
157
+ "classic_rag_avg_precision": round(avg("classic_rag", "precision"), 3),
158
+ "classic_rag_avg_recall": round(avg("classic_rag", "recall"), 3),
159
+ "classic_rag_avg_f1": round(avg("classic_rag", "f1"), 3),
160
+ "classic_rag_avg_hit_at_k": round(avg("classic_rag", "hit_at_k"), 3),
161
+ "classic_rag_avg_mrr": round(avg("classic_rag", "mrr"), 3),
162
+ "classic_rag_avg_latency_ms": round(avg("classic_rag", "latency_ms"), 3),
163
+ "classic_rag_p50_latency_ms": self._percentile([r["classic_rag"]["latency_ms"] for r in results], 50),
164
+ "classic_rag_p95_latency_ms": self._percentile([r["classic_rag"]["latency_ms"] for r in results], 95),
165
+ "classic_rag_avg_tokens": round(avg("classic_rag", "tokens"), 3),
166
+ "intentmind_avg_precision": round(avg("intentmind", "precision"), 3),
167
+ "intentmind_avg_recall": round(avg("intentmind", "recall"), 3),
168
+ "intentmind_avg_f1": round(avg("intentmind", "f1"), 3),
169
+ "intentmind_avg_hit_at_k": round(avg("intentmind", "hit_at_k"), 3),
170
+ "intentmind_avg_mrr": round(avg("intentmind", "mrr"), 3),
171
+ "intentmind_avg_latency_ms": round(avg("intentmind", "latency_ms"), 3),
172
+ "intentmind_p50_latency_ms": self._percentile([r["intentmind"]["latency_ms"] for r in results], 50),
173
+ "intentmind_p95_latency_ms": self._percentile([r["intentmind"]["latency_ms"] for r in results], 95),
174
+ "intentmind_avg_tokens": round(avg("intentmind", "tokens"), 3),
175
+ "intentmind_avg_token_saving_pct": round(avg("intentmind", "token_saving_pct"), 3),
176
+ "intentmind_avg_direct_count": round(avg("intentmind", "direct_count"), 3),
177
+ "intentmind_avg_associated_count": round(avg("intentmind", "associated_count"), 3),
178
+ "intentmind_latency_breakdown": {k: breakdown_avg(k) for k in breakdown_keys},
179
+ }
180
+
181
+ return {
182
+ "summary": summary,
183
+ "details": results
184
+ }
185
+
186
+ def generate_markdown_report(self, results: Dict[str, Any], title: str = "Real World Benchmark Report") -> str:
187
+ summary = results["summary"]
188
+ details = results["details"]
189
+ wins = [
190
+ item for item in details
191
+ if item["intentmind"]["f1"] > item["classic_rag"]["f1"]
192
+ ]
193
+ losses = [
194
+ item for item in details
195
+ if item["intentmind"]["f1"] < item["classic_rag"]["f1"]
196
+ ]
197
+ ties = len(details) - len(wins) - len(losses)
198
+
199
+ lines = [
200
+ f"# {title}",
201
+ "",
202
+ "This report compares the same memory corpus with classic vector RAG and Intentmind.",
203
+ "The goal is not to claim universal superiority, but to show where associative recall helps, where it fails, and why.",
204
+ "",
205
+ "## Summary",
206
+ "",
207
+ f"- Total queries: {summary['total_queries']}",
208
+ f"- Top-K baseline: {summary['top_k']}",
209
+ f"- Intentmind wins/ties/losses by F1: {len(wins)} / {ties} / {len(losses)}",
210
+ "",
211
+ "| System | Precision | Recall | F1 | Hit@K | MRR | Avg Tokens | p50 Latency | p95 Latency |",
212
+ "|---|---:|---:|---:|---:|---:|---:|---:|---:|",
213
+ (
214
+ f"| Classic RAG | {summary['classic_rag_avg_precision']} | "
215
+ f"{summary['classic_rag_avg_recall']} | {summary['classic_rag_avg_f1']} | "
216
+ f"{summary['classic_rag_avg_hit_at_k']} | {summary['classic_rag_avg_mrr']} | "
217
+ f"{summary['classic_rag_avg_tokens']} | {summary['classic_rag_p50_latency_ms']} ms | "
218
+ f"{summary['classic_rag_p95_latency_ms']} ms |"
219
+ ),
220
+ (
221
+ f"| Intentmind | {summary['intentmind_avg_precision']} | "
222
+ f"{summary['intentmind_avg_recall']} | {summary['intentmind_avg_f1']} | "
223
+ f"{summary['intentmind_avg_hit_at_k']} | {summary['intentmind_avg_mrr']} | "
224
+ f"{summary['intentmind_avg_tokens']} | {summary['intentmind_p50_latency_ms']} ms | "
225
+ f"{summary['intentmind_p95_latency_ms']} ms |"
226
+ ),
227
+ "",
228
+ f"Average token saving: {summary['intentmind_avg_token_saving_pct']}%",
229
+ f"Average direct/associated memories: {summary['intentmind_avg_direct_count']} / {summary['intentmind_avg_associated_count']}",
230
+ "",
231
+ "## Intentmind Latency Breakdown (avg ms)",
232
+ "",
233
+ "| Phase | Avg ms | Description |",
234
+ "|---|---:|---|",
235
+ ]
236
+
237
+ lb = summary.get("intentmind_latency_breakdown", {})
238
+ phase_desc = {
239
+ "embed_query_ms": "Query embedding (SentenceTransformer or equivalent)",
240
+ "emotion_ms": "Emotion detection",
241
+ "extractor_ms": "Intent extraction (fixture / LLM)",
242
+ "recall_ms": "Graph traversal + chunk scoring",
243
+ "prompt_ms": "Prompt assembly",
244
+ }
245
+ for key in ["embed_query_ms", "emotion_ms", "extractor_ms", "recall_ms", "prompt_ms"]:
246
+ desc = phase_desc.get(key, key)
247
+ val = lb.get(key, 0.0)
248
+ lines.append(f"| {key.replace('_ms', '')} | {val} | {desc} |")
249
+ lines.extend([
250
+ "",
251
+ f"> **Note:** The extractor phase uses a fixture-backed dictionary lookup in this benchmark (<1ms). "
252
+ f"With a real LLM extractor (e.g. GPT-4o-mini), this phase would add 200–800ms per query.",
253
+ "",
254
+ "## Why This Can Be Better Than Classic RAG",
255
+ "",
256
+ "Classic RAG retrieves chunks by query-to-chunk vector similarity. Intentmind first activates query intents, then traverses graph neighbors and scores linked chunks.",
257
+ "This matters when the query does not repeat the exact missing concept, but touches an associated concept that was observed with it before.",
258
+ "",
259
+ "## Representative Associative Cases",
260
+ "",
261
+ ])
262
+
263
+ associative_cases = [
264
+ item for item in details
265
+ if any(result.get("layer", 0) > 0 for result in item["intentmind"]["retrieved"])
266
+ ][:8]
267
+ if not associative_cases:
268
+ lines.append("No associated-memory cases were returned in this run.")
269
+ for item in associative_cases:
270
+ lines.extend(self._case_lines(item))
271
+
272
+ lines.extend(["", "## Failure Or Risk Cases", ""])
273
+ risky = [
274
+ item for item in details
275
+ if item["intentmind"]["false_negatives"] or item["intentmind"]["false_positives"]
276
+ ][:8]
277
+ if not risky:
278
+ lines.append("No Intentmind false positives or false negatives were observed in this run.")
279
+ for item in risky:
280
+ lines.extend(self._case_lines(item))
281
+
282
+ lines.extend([
283
+ "",
284
+ "## Methodology Notes",
285
+ "",
286
+ "- Both systems use the same corpus and same embedder.",
287
+ "- Classic RAG uses vector top-k chunk retrieval.",
288
+ "- Intentmind uses faithful fixture extraction in this benchmark so recall dynamics are deterministic.",
289
+ "- Public claims require larger datasets, independent ground truth, and repeated runs with median/p95 reporting.",
290
+ ])
291
+ return "\n".join(lines) + "\n"
292
+
293
+ def _case_lines(self, item: Dict[str, Any]) -> List[str]:
294
+ expected = ", ".join(item["expected_chunks"]) or "(none)"
295
+ rag = ", ".join(r["chunk_id"] for r in item["classic_rag"]["retrieved"]) or "(none)"
296
+ im = ", ".join(
297
+ f"{r['chunk_id']}[{r.get('reason')}:{' > '.join(r.get('path', []))}]"
298
+ for r in item["intentmind"]["retrieved"]
299
+ ) or "(none)"
300
+ reason = item["analysis"].get("expected_reason") or "No explicit reason provided."
301
+ return [
302
+ f"### {item['query']}",
303
+ "",
304
+ f"- Expected chunks: {expected}",
305
+ f"- Classic RAG: {rag}",
306
+ f"- Intentmind: {im}",
307
+ f"- Expected reason: {reason}",
308
+ "",
309
+ ]
310
+
311
+ def _precision_recall(self, retrieved: List[str], expected: set) -> tuple[float, float]:
312
+ if not expected:
313
+ return (1.0, 1.0) if not retrieved else (0.0, 1.0)
314
+ if not retrieved:
315
+ return 0.0, 0.0
316
+ hits = len(set(retrieved).intersection(expected))
317
+ precision = hits / len(retrieved)
318
+ recall = hits / len(expected)
319
+ return precision, recall
320
+
321
+ def _f1(self, precision: float, recall: float) -> float:
322
+ if precision + recall == 0:
323
+ return 0.0
324
+ return 2 * precision * recall / (precision + recall)
325
+
326
+ def _hit_at_k(self, retrieved: List[str], expected: set, top_k: int) -> int:
327
+ if not expected:
328
+ return 1 if not retrieved else 0
329
+ return 1 if set(retrieved[:top_k]).intersection(expected) else 0
330
+
331
+ def _mrr(self, retrieved: List[str], expected: set) -> float:
332
+ if not expected:
333
+ return 1.0 if not retrieved else 0.0
334
+ for idx, chunk_id in enumerate(retrieved):
335
+ if chunk_id in expected:
336
+ return 1.0 / (idx + 1)
337
+ return 0.0
338
+
339
+ def _percentile(self, values: List[float], percentile: int) -> float:
340
+ if not values:
341
+ return 0.0
342
+ ordered = sorted(values)
343
+ rank = ceil((percentile / 100) * len(ordered)) - 1
344
+ rank = max(0, min(rank, len(ordered) - 1))
345
+ return round(ordered[rank], 3)
@@ -0,0 +1,55 @@
1
+ import json
2
+ from typing import Any, Callable, Dict
3
+
4
+
5
+ def load_fixture(json_path: str) -> Dict[str, Any]:
6
+ with open(json_path, "r", encoding="utf-8") as f:
7
+ return json.load(f)
8
+
9
+
10
+ def build_fixture_extractor(data: Dict[str, Any]) -> Callable[[str], list[str]]:
11
+ """
12
+ Build a deterministic stand-in for a faithful LLM extractor.
13
+
14
+ This is for benchmark repeatability. It lets us evaluate graph/recall
15
+ behavior without depending on live LLM calls or hand-tuned FakeEmbedder
16
+ vocabulary.
17
+ """
18
+ by_text = {}
19
+ for item in data.get("memories", []):
20
+ intents = item.get("intents", [])
21
+ if intents:
22
+ by_text[item["text"]] = intents
23
+ for item in data.get("queries", []):
24
+ intents = item.get("expected_intents", [])
25
+ if intents:
26
+ by_text[item["query"]] = intents
27
+
28
+ def extractor(text: str) -> list[str]:
29
+ return list(by_text.get(text, []))
30
+
31
+ return extractor
32
+
33
+ def load_fixture_and_map(memory_instance, json_path: str) -> Dict[str, Any]:
34
+ """
35
+ Loads a benchmark fixture JSON, ingests memories into the provided IntentmindMemory instance,
36
+ and returns the mapped queries taking automatic deduplication into account.
37
+ """
38
+ data = load_fixture(json_path)
39
+
40
+ chunk_mapping = {}
41
+ for item in data.get("memories", []):
42
+ surviving_id = memory_instance.add(
43
+ text=item["text"],
44
+ source="benchmark",
45
+ chunk_id=item["chunk_id"]
46
+ )
47
+ chunk_mapping[item["chunk_id"]] = surviving_id
48
+
49
+ # Update expected queries based on merges
50
+ for q in data.get("queries", []):
51
+ if "expected_chunks" in q:
52
+ mapped = set(chunk_mapping.get(cid, cid) for cid in q["expected_chunks"])
53
+ q["expected_chunks"] = list(mapped)
54
+
55
+ return data
@@ -0,0 +1,2 @@
1
+ from .prompt_builder import PromptBuilder
2
+ __all__ = ["PromptBuilder"]
@@ -0,0 +1,122 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class PromptBuilder:
5
+ def __init__(self, max_chars: int = 60000):
6
+ self.max_chars = max_chars
7
+
8
+ def build(self, user_query, recall_result, emotional_state):
9
+ system_block = (
10
+ "[SYSTEM]\n"
11
+ "You are a memory-augmented assistant.\n"
12
+ "Use memories only as supporting context.\n"
13
+ "Prioritize the user's latest message.\n"
14
+ "Do not over-generalize or give medical disclaimers unless necessary.\n"
15
+ "Respond naturally and directly.\n"
16
+ )
17
+
18
+ user_block = f"[USER_MESSAGE]\n{user_query}\n"
19
+
20
+ context_block = self._build_context_block(recall_result, emotional_state)
21
+ field_block = self._build_field_block(recall_result)
22
+ memory_block = self._build_memory_block(recall_result)
23
+ style_block = self._build_style_block(emotional_state)
24
+
25
+ prompt = f"{system_block}\n{user_block}\n{context_block}\n{field_block}\n{memory_block}\n{style_block}".strip()
26
+
27
+ if len(prompt) <= self.max_chars:
28
+ return prompt
29
+ return prompt[: self.max_chars] + "\n\n[TRIMMED]\nPrompt budget exceeded."
30
+
31
+ def _build_context_block(self, recall_result, emotional_state):
32
+ lines = ["[CURRENT_CONTEXT]"]
33
+
34
+ # Collect top active intents from layers 0 and 1, unique and sorted by score
35
+ intents = {}
36
+ for layer_id in [0, 1]:
37
+ for item in recall_result.get("activated_layers", {}).get(layer_id, []):
38
+ label = item["intent"].label
39
+ if label not in intents or item["score"] > intents[label]:
40
+ intents[label] = item["score"]
41
+
42
+ if intents:
43
+ top_intents = sorted(intents.items(), key=lambda x: x[1], reverse=True)[:5]
44
+ lines.append("Primary topics:")
45
+ for label, _ in top_intents:
46
+ lines.append(f"- {label}")
47
+ else:
48
+ lines.append("Primary topics: None")
49
+
50
+ lines.append(f"\nEmotional state:\n- {emotional_state.current}")
51
+ return "\n".join(lines) + "\n"
52
+
53
+ def _build_field_block(self, recall_result):
54
+ field = recall_result.get("cognitive_field") or {}
55
+ lines = ["[COGNITIVE FIELD]"]
56
+
57
+ seeds = field.get("seed_intents", [])
58
+ activated = field.get("activated_intents", [])
59
+
60
+ if seeds:
61
+ seed_labels = [item.get("label", "") for item in seeds if item.get("label")]
62
+ lines.append("Seeds: " + ", ".join(seed_labels[:8]))
63
+ else:
64
+ lines.append("Seeds: None")
65
+
66
+ resonant = [
67
+ item for item in activated
68
+ if item.get("role") != "seed" and item.get("label")
69
+ ]
70
+ if resonant:
71
+ labels = [f"{item['label']}({item.get('energy', 0.0)})" for item in resonant[:8]]
72
+ lines.append("Resonant: " + ", ".join(labels))
73
+ else:
74
+ lines.append("Resonant: None")
75
+
76
+ return "\n".join(lines) + "\n"
77
+
78
+ def _build_memory_block(self, recall_result):
79
+ lines = ["[RELEVANT_MEMORIES]"]
80
+ memories = []
81
+
82
+ for bucket in ["direct_memories", "associated_memories", "weak_echo_memories"]:
83
+ for item in recall_result.get(bucket, []):
84
+ memories.append(item)
85
+
86
+ # Sort by score and take top 30 (to rely on threshold logic instead of hard caps)
87
+ memories = sorted(memories, key=lambda x: x["score"], reverse=True)[:30]
88
+
89
+ if not memories:
90
+ lines.append("No relevant memories found.")
91
+ else:
92
+ for i, item in enumerate(memories, 1):
93
+ text = self.compress(item["chunk"].text) # changed from summary to text
94
+ score = round(item["score"], 2)
95
+ lines.append(f"{i}. \"{text}\"")
96
+ lines.append(f"relevance: {score}\n")
97
+
98
+ return "\n".join(lines)
99
+
100
+ def _build_style_block(self, emotional_state):
101
+ lines = ["[RESPONSE_STYLE]"]
102
+ lines.append("- short")
103
+ lines.append("- grounded")
104
+ lines.append("- conversational")
105
+ lines.append("- avoid generic assistant phrasing")
106
+
107
+ modes = {
108
+ "nötr": "- balanced and informative",
109
+ "merak": "- highlight possibilities and connections",
110
+ "heyecan": "- open to new creative ideas",
111
+ "güven": "- confident and deep",
112
+ "şüphe": "- rely strictly on reliable memory",
113
+ "korku": "- safe, concise, and clear",
114
+ }
115
+ lines.append(modes.get(emotional_state.current, modes["nötr"]))
116
+ return "\n".join(lines)
117
+
118
+ def compress(self, text: str, max_len: int = 5000) -> str:
119
+ text = " ".join(text.split())
120
+ if len(text) <= max_len:
121
+ return text
122
+ return text[: max_len - 3] + "..."
@@ -0,0 +1,8 @@
1
+ from .base import BaseEmbedder
2
+ from .fake import FakeEmbedder, cosine_similarity
3
+ try:
4
+ from .sentence_transformer import SentenceTransformerEmbedder
5
+ except Exception: # optional dependency
6
+ SentenceTransformerEmbedder = None
7
+
8
+ __all__ = ["BaseEmbedder", "FakeEmbedder", "SentenceTransformerEmbedder", "cosine_similarity"]
@@ -0,0 +1,12 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import List
5
+
6
+
7
+ class BaseEmbedder(ABC):
8
+ name: str = "base"
9
+
10
+ @abstractmethod
11
+ def embed(self, text: str) -> List[float]:
12
+ raise NotImplementedError
@@ -0,0 +1,79 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import random
5
+ from typing import List
6
+ from .base import BaseEmbedder
7
+
8
+ SEMANTIC_GROUPS = {
9
+ "memory": ["hafıza", "memory", "recall", "chunk", "store", "geçmiş"],
10
+ "graph": ["graph", "intent", "edge", "node", "traversal", "layer", "bağ"],
11
+ "energy": ["energy", "enerji", "decay", "zayıflama", "güçlenme", "stability"],
12
+ "emotion": ["emotion", "duygu", "emotional", "merak", "heyecan", "şüphe", "korku"],
13
+ "score": ["score", "activation", "similarity", "threshold", "weight", "ağırlık"],
14
+ "prompt": ["prompt", "builder", "context", "bağlam", "llm", "cevap"],
15
+ "pruning": ["pruning", "budama", "temizleme", "pollution", "explosion", "duplicate"],
16
+ "car": ["car", "araba", "arabayla", "arabanın", "arabanin", "arabam", "arabayi", "servis", "servise"],
17
+ "insurance": ["insurance", "sigorta", "sigortasini"],
18
+ "money": ["money", "para", "param"],
19
+ "travel": ["london", "antalya", "antalyaya", "yolculuk", "istanbul", "istanbula", "goturecegim", "gitmem", "yarin", "gidecegim"],
20
+ "fuel": ["benzin", "benzini", "bitti"],
21
+ "food": ["food", "meal", "yemek", "yemistik"],
22
+ }
23
+
24
+ DIM = 32
25
+
26
+
27
+ def _group_vector(group_idx: int, dim: int = DIM) -> List[float]:
28
+ vec = [0.0] * dim
29
+ width = max(1, dim // len(SEMANTIC_GROUPS))
30
+ start = group_idx * width
31
+ end = start + width
32
+ for i in range(start, min(end, dim)):
33
+ vec[i] = 1.0
34
+ return vec
35
+
36
+
37
+ GROUP_VECTORS = {group: _group_vector(idx) for idx, group in enumerate(SEMANTIC_GROUPS)}
38
+
39
+
40
+ def cosine_similarity(a: List[float], b: List[float]) -> float:
41
+ dot = sum(x * y for x, y in zip(a, b))
42
+ norm_a = math.sqrt(sum(x * x for x in a))
43
+ norm_b = math.sqrt(sum(x * x for x in b))
44
+ if norm_a == 0 or norm_b == 0:
45
+ return 0.0
46
+ return dot / (norm_a * norm_b)
47
+
48
+
49
+ class FakeEmbedder(BaseEmbedder):
50
+ name = "fake-semantic-cluster"
51
+
52
+ def __init__(self, dim: int = DIM):
53
+ self.dim = dim
54
+
55
+ def embed(self, text: str) -> List[float]:
56
+ text_l = text.lower()
57
+ group_weights = {}
58
+ for group, keywords in SEMANTIC_GROUPS.items():
59
+ hits = sum(1 for kw in keywords if kw in text_l)
60
+ if hits > 0:
61
+ group_weights[group] = hits
62
+
63
+ import hashlib
64
+ seed_int = int(hashlib.sha256(text.encode("utf-8")).hexdigest()[:8], 16)
65
+ random.seed(seed_int)
66
+ vec = [random.gauss(0, 0.15) for _ in range(self.dim)]
67
+
68
+ if group_weights:
69
+ total = sum(group_weights.values())
70
+ for group, w in group_weights.items():
71
+ gv = GROUP_VECTORS[group]
72
+ alpha = (w / total) * 2.0
73
+ for i in range(min(self.dim, len(gv))):
74
+ vec[i] += alpha * gv[i]
75
+
76
+ norm = math.sqrt(sum(x * x for x in vec))
77
+ if norm == 0:
78
+ return [1.0 / math.sqrt(self.dim)] * self.dim
79
+ return [x / norm for x in vec]
@@ -0,0 +1,20 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List
4
+ from .base import BaseEmbedder
5
+
6
+
7
+ class SentenceTransformerEmbedder(BaseEmbedder):
8
+ def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
9
+ try:
10
+ from sentence_transformers import SentenceTransformer
11
+ except ImportError as exc:
12
+ raise ImportError(
13
+ "sentence-transformers yüklü değil. Kurulum: pip install sentence-transformers"
14
+ ) from exc
15
+ self.model_name = model_name
16
+ self.name = model_name
17
+ self._model = SentenceTransformer(model_name)
18
+
19
+ def embed(self, text: str) -> List[float]:
20
+ return self._model.encode(text, normalize_embeddings=True).tolist()
@@ -0,0 +1,5 @@
1
+ from .intent_engine import IntentEngine
2
+ from .emotion_engine import EmotionEngine
3
+ from .recall_engine import RecallEngine
4
+ from .energy_engine import EnergyEngine
5
+ __all__ = ["IntentEngine", "EmotionEngine", "RecallEngine", "EnergyEngine"]