quantum-memory-graph 1.2.1__tar.gz → 1.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/PKG-INFO +3 -29
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/README.md +2 -27
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph/__init__.py +1 -1
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph/pipeline.py +81 -14
- quantum_memory_graph-1.2.2/quantum_memory_graph/synergy_reranker.py +133 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/PKG-INFO +3 -29
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/SOURCES.txt +1 -1
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/requires.txt +0 -1
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/setup.cfg +1 -2
- quantum_memory_graph-1.2.1/benchmarks/memcombine.py +0 -236
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/LICENSE +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/__init__.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/data_collector.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/fast_longmemeval.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/generate_scenarios.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v2.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v3.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v4.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v5.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v6.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v7.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/run_final.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/run_full_benchmark.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/run_full_benchmark_v2.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/run_longmemeval_chunked_staged.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/run_longmemeval_cvar_v2.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/run_longmemeval_staged.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/pyproject.toml +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph/__main__.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph/api.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph/graph.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph/pce_optimizer.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph/recency.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph/subgraph_optimizer.py +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/dependency_links.txt +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/top_level.txt +0 -0
- {quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/tests/test_full_pipeline.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: quantum-memory-graph
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: Quantum-optimized knowledge graph memory for AI agents. Relationship-aware subgraph selection via QAOA.
|
|
5
5
|
Home-page: https://github.com/Dustin-a11y/quantum-memory-graph
|
|
6
6
|
Author: Coinkong (Chef's Attraction)
|
|
@@ -23,7 +23,6 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
23
23
|
Requires-Python: >=3.9
|
|
24
24
|
Description-Content-Type: text/markdown
|
|
25
25
|
License-File: LICENSE
|
|
26
|
-
Requires-Dist: quantum-agent-memory>=0.1.0
|
|
27
26
|
Requires-Dist: sentence-transformers>=2.2.0
|
|
28
27
|
Requires-Dist: networkx>=3.0
|
|
29
28
|
Requires-Dist: numpy>=1.24.0
|
|
@@ -45,29 +44,9 @@ Dynamic: license-file
|
|
|
45
44
|
|
|
46
45
|
Every memory system treats memories as independent documents — search, rank, stuff into context. But memories aren't independent. They have *relationships*. "The team chose React" becomes 10x more useful paired with "because of ecosystem maturity" and "FastAPI handles the backend."
|
|
47
46
|
|
|
48
|
-
Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
|
|
49
|
-
|
|
50
|
-
## Benchmark: MemCombine (Internal — Memory Combination)
|
|
51
|
-
|
|
52
|
-
MemCombine tests what no existing benchmark measures — **memory combination quality**, where QAOA graph selection finds coherent subsets that embedding similarity misses.
|
|
53
|
-
|
|
54
|
-
| Method | Coverage | Evidence Recall | F1 | Perfect |
|
|
55
|
-
|--------|----------|----------------|----|---------|
|
|
56
|
-
| Embedding Top-K | 69.9% | 65.6% | 68.1% | 1/5 |
|
|
57
|
-
| **Graph + QAOA** | **96.7%** | **91.0%** | **92.6%** | **4/5** |
|
|
58
|
-
| **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
|
|
59
|
-
|
|
60
|
-
When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
|
|
61
|
-
|
|
62
|
-
> **How to read this table:** The R@5/R@10 numbers are driven by QMG's chunked
|
|
63
|
-
> embedding retrieval pipeline (Stage 1: gte-large, 500-char chunks, mean-of-top-3
|
|
64
|
-
> scoring). QAOA (Stage 2) refines the top-14 candidates for relationship-aware
|
|
65
|
-
> selection — its advantage shows up in MemCombine (combination quality) rather
|
|
66
|
-
> than raw recall rank. The pipeline as a whole achieves #1.
|
|
67
|
-
|
|
68
47
|
## 🏆 #1 on LongMemEval (ICLR 2025 Benchmark)
|
|
69
48
|
|
|
70
|
-
Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
|
|
49
|
+
Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) — [verified submission](https://github.com/xiaowu0162/LongMemEval/issues/46).
|
|
71
50
|
|
|
72
51
|
| Method | R@1 | R@5 | R@10 | NDCG@10 |
|
|
73
52
|
|--------|:---:|:---:|:----:|:-------:|
|
|
@@ -207,10 +186,7 @@ result = recall(
|
|
|
207
186
|
)
|
|
208
187
|
```
|
|
209
188
|
|
|
210
|
-
### Run MemCombine Benchmark
|
|
211
|
-
|
|
212
189
|
```python
|
|
213
|
-
from benchmarks.memcombine import run_benchmark
|
|
214
190
|
|
|
215
191
|
def my_recall(memories, query, K):
|
|
216
192
|
# Your recall implementation
|
|
@@ -243,8 +219,6 @@ Validated on `ibm_fez` and `ibm_kingston` backends.
|
|
|
243
219
|
|
|
244
220
|
MIT License — Copyright 2026 Coinkong (Chef's Attraction)
|
|
245
221
|
|
|
246
|
-
|
|
247
222
|
## Links
|
|
248
223
|
|
|
249
|
-
- [
|
|
250
|
-
- [MemCombine Benchmark](benchmarks/memcombine.py) — Test memory combination quality
|
|
224
|
+
- [GitHub](https://github.com/Dustin-a11y/quantum-memory-graph) — Source code and benchmarks
|
|
@@ -4,29 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
Every memory system treats memories as independent documents — search, rank, stuff into context. But memories aren't independent. They have *relationships*. "The team chose React" becomes 10x more useful paired with "because of ecosystem maturity" and "FastAPI handles the backend."
|
|
6
6
|
|
|
7
|
-
Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
|
|
8
|
-
|
|
9
|
-
## Benchmark: MemCombine (Internal — Memory Combination)
|
|
10
|
-
|
|
11
|
-
MemCombine tests what no existing benchmark measures — **memory combination quality**, where QAOA graph selection finds coherent subsets that embedding similarity misses.
|
|
12
|
-
|
|
13
|
-
| Method | Coverage | Evidence Recall | F1 | Perfect |
|
|
14
|
-
|--------|----------|----------------|----|---------|
|
|
15
|
-
| Embedding Top-K | 69.9% | 65.6% | 68.1% | 1/5 |
|
|
16
|
-
| **Graph + QAOA** | **96.7%** | **91.0%** | **92.6%** | **4/5** |
|
|
17
|
-
| **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
|
|
18
|
-
|
|
19
|
-
When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
|
|
20
|
-
|
|
21
|
-
> **How to read this table:** The R@5/R@10 numbers are driven by QMG's chunked
|
|
22
|
-
> embedding retrieval pipeline (Stage 1: gte-large, 500-char chunks, mean-of-top-3
|
|
23
|
-
> scoring). QAOA (Stage 2) refines the top-14 candidates for relationship-aware
|
|
24
|
-
> selection — its advantage shows up in MemCombine (combination quality) rather
|
|
25
|
-
> than raw recall rank. The pipeline as a whole achieves #1.
|
|
26
|
-
|
|
27
7
|
## 🏆 #1 on LongMemEval (ICLR 2025 Benchmark)
|
|
28
8
|
|
|
29
|
-
Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
|
|
9
|
+
Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) — [verified submission](https://github.com/xiaowu0162/LongMemEval/issues/46).
|
|
30
10
|
|
|
31
11
|
| Method | R@1 | R@5 | R@10 | NDCG@10 |
|
|
32
12
|
|--------|:---:|:---:|:----:|:-------:|
|
|
@@ -166,10 +146,7 @@ result = recall(
|
|
|
166
146
|
)
|
|
167
147
|
```
|
|
168
148
|
|
|
169
|
-
### Run MemCombine Benchmark
|
|
170
|
-
|
|
171
149
|
```python
|
|
172
|
-
from benchmarks.memcombine import run_benchmark
|
|
173
150
|
|
|
174
151
|
def my_recall(memories, query, K):
|
|
175
152
|
# Your recall implementation
|
|
@@ -202,8 +179,6 @@ Validated on `ibm_fez` and `ibm_kingston` backends.
|
|
|
202
179
|
|
|
203
180
|
MIT License — Copyright 2026 Coinkong (Chef's Attraction)
|
|
204
181
|
|
|
205
|
-
|
|
206
182
|
## Links
|
|
207
183
|
|
|
208
|
-
- [
|
|
209
|
-
- [MemCombine Benchmark](benchmarks/memcombine.py) — Test memory combination quality
|
|
184
|
+
- [GitHub](https://github.com/Dustin-a11y/quantum-memory-graph) — Source code and benchmarks
|
|
@@ -13,7 +13,7 @@ from typing import List, Dict, Optional
|
|
|
13
13
|
from datetime import datetime
|
|
14
14
|
|
|
15
15
|
from .graph import MemoryGraph
|
|
16
|
-
from .subgraph_optimizer import optimize_subgraph
|
|
16
|
+
from .subgraph_optimizer import optimize_subgraph as _std_optimize_subgraph
|
|
17
17
|
from .recency import ShortTermMemory
|
|
18
18
|
|
|
19
19
|
|
|
@@ -139,6 +139,7 @@ def recall(
|
|
|
139
139
|
max_candidates: int = 14,
|
|
140
140
|
use_recency: bool = True,
|
|
141
141
|
stm: ShortTermMemory = None,
|
|
142
|
+
method: str = "qaoa",
|
|
142
143
|
) -> Dict:
|
|
143
144
|
"""
|
|
144
145
|
Recall optimal memories for a query.
|
|
@@ -170,6 +171,14 @@ def recall(
|
|
|
170
171
|
if not g.memories:
|
|
171
172
|
return {"ok": True, "memories": [], "method": "empty"}
|
|
172
173
|
|
|
174
|
+
today_str = datetime.now().strftime("%Y-%m-%d")
|
|
175
|
+
|
|
176
|
+
def _tier(mem):
|
|
177
|
+
"""Determine if a memory is warm (today) or cold (older)."""
|
|
178
|
+
if mem.timestamp and today_str in str(mem.timestamp):
|
|
179
|
+
return "warm"
|
|
180
|
+
return "cold"
|
|
181
|
+
|
|
173
182
|
# Phase 1: Graph neighborhood search
|
|
174
183
|
neighborhood = g.get_neighborhood(
|
|
175
184
|
query=query, hops=hops, top_seeds=top_seeds
|
|
@@ -198,6 +207,7 @@ def recall(
|
|
|
198
207
|
"entities": g.memories[cid].entities,
|
|
199
208
|
"relevance": float(candidate_scores[i]),
|
|
200
209
|
"source": g.memories[cid].source,
|
|
210
|
+
"tier": _tier(g.memories[cid]),
|
|
201
211
|
}
|
|
202
212
|
for i, cid in enumerate(candidate_ids)
|
|
203
213
|
]
|
|
@@ -206,22 +216,66 @@ def recall(
|
|
|
206
216
|
"memories": memories,
|
|
207
217
|
"method": "all_candidates",
|
|
208
218
|
"candidates": len(candidate_ids),
|
|
219
|
+
"tier_counts": {
|
|
220
|
+
"warm": sum(1 for m in memories if m["tier"] == "warm"),
|
|
221
|
+
"cold": sum(1 for m in memories if m["tier"] == "cold"),
|
|
222
|
+
},
|
|
209
223
|
}
|
|
210
224
|
|
|
211
|
-
# Phase 2:
|
|
212
|
-
|
|
213
|
-
|
|
225
|
+
# Phase 2: Synergy rerank or QAOA subgraph
|
|
226
|
+
if method == "synergy":
|
|
227
|
+
try:
|
|
228
|
+
from .synergy_reranker import select as synergy_select
|
|
229
|
+
# Build texts for candidates
|
|
230
|
+
cand_texts = [g.memories[cid].text for cid in candidate_ids]
|
|
231
|
+
selected_synergy = synergy_select(candidate_scores, cand_texts, query, K)
|
|
232
|
+
|
|
233
|
+
selected_memories = []
|
|
234
|
+
for idx in selected_synergy:
|
|
235
|
+
cid = candidate_ids[idx]
|
|
236
|
+
mem = g.memories[cid]
|
|
237
|
+
selected_memories.append({
|
|
238
|
+
"text": mem.text,
|
|
239
|
+
"entities": mem.entities,
|
|
240
|
+
"relevance": float(candidate_scores[idx]),
|
|
241
|
+
"source": mem.source,
|
|
242
|
+
"tier": _tier(mem),
|
|
243
|
+
"connections": [],
|
|
244
|
+
})
|
|
245
|
+
return {
|
|
246
|
+
"ok": True,
|
|
247
|
+
"memories": selected_memories,
|
|
248
|
+
"method": "synergy",
|
|
249
|
+
"candidates": len(candidate_ids),
|
|
250
|
+
"K": K,
|
|
251
|
+
}
|
|
252
|
+
except Exception as e:
|
|
253
|
+
print(f"WARNING: Synergy selection failed ({e}), falling back to QAOA")
|
|
214
254
|
|
|
215
255
|
# Phase 3: QAOA subgraph optimization
|
|
256
|
+
subgraph = g.get_subgraph_data(candidate_ids)
|
|
257
|
+
adjacency = subgraph["adjacency"]
|
|
258
|
+
# Use PCE for larger candidate sets (14+), standard QAOA for smaller
|
|
216
259
|
try:
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
260
|
+
if len(candidate_ids) > 14:
|
|
261
|
+
from .pce_optimizer import optimize_subgraph_pce
|
|
262
|
+
result = optimize_subgraph_pce(
|
|
263
|
+
relevance_scores=candidate_scores,
|
|
264
|
+
adjacency=adjacency,
|
|
265
|
+
K=K,
|
|
266
|
+
alpha=alpha,
|
|
267
|
+
beta_conn=beta_conn,
|
|
268
|
+
gamma_cov=gamma_cov,
|
|
269
|
+
)
|
|
270
|
+
else:
|
|
271
|
+
result = _std_optimize_subgraph(
|
|
272
|
+
relevance_scores=candidate_scores,
|
|
273
|
+
adjacency=adjacency,
|
|
274
|
+
K=K,
|
|
275
|
+
alpha=alpha,
|
|
276
|
+
beta_conn=beta_conn,
|
|
277
|
+
gamma_cov=gamma_cov,
|
|
278
|
+
)
|
|
225
279
|
except Exception as e:
|
|
226
280
|
# Ultimate fallback: use greedy if QAOA fails despite internal try/except
|
|
227
281
|
print(f"WARNING: Subgraph optimization failed ({e}), using greedy fallback")
|
|
@@ -239,6 +293,14 @@ def recall(
|
|
|
239
293
|
|
|
240
294
|
selected_idxs = result["selection"]
|
|
241
295
|
selected_memories = []
|
|
296
|
+
|
|
297
|
+
# Compute comparison pcts safely
|
|
298
|
+
qaoa_score_val = result.get("qaoa", {}).get("score", result["score"])
|
|
299
|
+
greedy_score_val = result.get("greedy", {}).get("score", result["score"])
|
|
300
|
+
optimal_score_val = result.get("optimal", {}).get("score", result["score"])
|
|
301
|
+
qaoa_vs_greedy = (qaoa_score_val / greedy_score_val * 100) if greedy_score_val > 0 else 100
|
|
302
|
+
qaoa_vs_optimal = (qaoa_score_val / optimal_score_val * 100) if optimal_score_val > 0 else 100
|
|
303
|
+
|
|
242
304
|
for idx in selected_idxs:
|
|
243
305
|
cid = candidate_ids[idx]
|
|
244
306
|
mem = g.memories[cid]
|
|
@@ -261,6 +323,7 @@ def recall(
|
|
|
261
323
|
"entities": mem.entities,
|
|
262
324
|
"relevance": float(candidate_scores[idx]),
|
|
263
325
|
"source": mem.source,
|
|
326
|
+
"tier": _tier(mem),
|
|
264
327
|
"connections": connections,
|
|
265
328
|
})
|
|
266
329
|
|
|
@@ -273,7 +336,11 @@ def recall(
|
|
|
273
336
|
"qaoa_score": result["score"],
|
|
274
337
|
"greedy_score": result["greedy"]["score"],
|
|
275
338
|
"optimal_score": result["optimal"]["score"],
|
|
276
|
-
"qaoa_vs_optimal_pct":
|
|
277
|
-
"qaoa_vs_greedy_pct":
|
|
339
|
+
"qaoa_vs_optimal_pct": round(float(qaoa_vs_optimal), 2),
|
|
340
|
+
"qaoa_vs_greedy_pct": round(float(qaoa_vs_greedy), 2),
|
|
341
|
+
"tier_counts": {
|
|
342
|
+
"warm": sum(1 for m in selected_memories if m["tier"] == "warm"),
|
|
343
|
+
"cold": sum(1 for m in selected_memories if m["tier"] == "cold"),
|
|
344
|
+
},
|
|
278
345
|
"graph_stats": g.stats(),
|
|
279
346
|
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Synergy-aware reranker — word-overlap synergy + diversity selection.
|
|
4
|
+
|
|
5
|
+
Uses token-level overlap analysis to select chunks that are
|
|
6
|
+
complementary to each other, not just individually relevant.
|
|
7
|
+
|
|
8
|
+
DK 🦍
|
|
9
|
+
"""
|
|
10
|
+
import math
|
|
11
|
+
import numpy as np
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
|
|
14
|
+
STOP_WORDS = frozenset({
|
|
15
|
+
"the","is","a","an","and","or","but","in","on","at",
|
|
16
|
+
"to","for","of","with","by","from","was","were","are",
|
|
17
|
+
"be","been","being","have","has","had","do","does","did",
|
|
18
|
+
"will","would","could","should","may","might","can","this",
|
|
19
|
+
"that","these","those","it","its","not","no","he","she",
|
|
20
|
+
"his","her","my","me","i","you","we","us","they","them",
|
|
21
|
+
"what","who","how","when","where","which",
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _tokenize(text):
|
|
26
|
+
words = set()
|
|
27
|
+
for w in text.lower().split():
|
|
28
|
+
w = "".join(c for c in w if c.isalnum())
|
|
29
|
+
if len(w) > 2 and w not in STOP_WORDS:
|
|
30
|
+
words.add(w)
|
|
31
|
+
return words
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _synergy_matrix(texts, query):
|
|
35
|
+
"""Pairwise synergy between chunks given a query."""
|
|
36
|
+
n = len(texts)
|
|
37
|
+
qt = _tokenize(query)
|
|
38
|
+
mts = [_tokenize(t) for t in texts]
|
|
39
|
+
mat = np.zeros((n, n))
|
|
40
|
+
for i in range(n):
|
|
41
|
+
for j in range(i + 1, n):
|
|
42
|
+
qi = qt & mts[i]
|
|
43
|
+
qj = qt & mts[j]
|
|
44
|
+
combined = qi | qj
|
|
45
|
+
complementary = (len(combined) - max(len(qi), len(qj))) / len(qt) if qt else 0.0
|
|
46
|
+
mi, mj = mts[i], mts[j]
|
|
47
|
+
u = mi | mj
|
|
48
|
+
jaccard = len(mi & mj) / len(u) if u else 0.0
|
|
49
|
+
relatedness = math.exp(-((jaccard - 0.2) ** 2) / 0.05)
|
|
50
|
+
shared = (mi & mj) - qt
|
|
51
|
+
bridge = min(len(shared) / 5.0, 0.3)
|
|
52
|
+
synergy = max(0.0, complementary * 0.5 + relatedness * 0.3 + bridge * 0.2)
|
|
53
|
+
mat[i][j] = mat[j][i] = synergy
|
|
54
|
+
return mat
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _diversity_matrix(texts):
|
|
58
|
+
"""1 - Jaccard overlap between chunk token sets."""
|
|
59
|
+
n = len(texts)
|
|
60
|
+
mts = [_tokenize(t) for t in texts]
|
|
61
|
+
mat = np.ones((n, n))
|
|
62
|
+
np.fill_diagonal(mat, 0.0)
|
|
63
|
+
for i in range(n):
|
|
64
|
+
for j in range(i + 1, n):
|
|
65
|
+
u = mts[i] | mts[j]
|
|
66
|
+
overlap = len(mts[i] & mts[j]) / len(u) if u else 0.0
|
|
67
|
+
mat[i][j] = mat[j][i] = 1.0 - overlap
|
|
68
|
+
return mat
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def select(cosine_scores, chunk_texts, query, K=5):
|
|
72
|
+
"""
|
|
73
|
+
Select K chunks using synergy-aware greedy selection.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
cosine_scores: 1D array of cosine scores
|
|
77
|
+
chunk_texts: list of chunk text strings
|
|
78
|
+
query: query text
|
|
79
|
+
K: number of chunks to select
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
List of selected chunk indices in selection order
|
|
83
|
+
"""
|
|
84
|
+
n = len(cosine_scores)
|
|
85
|
+
if n <= K:
|
|
86
|
+
return list(range(n))
|
|
87
|
+
|
|
88
|
+
synergy = _synergy_matrix(chunk_texts, query)
|
|
89
|
+
diversity = _diversity_matrix(chunk_texts)
|
|
90
|
+
|
|
91
|
+
selected = []
|
|
92
|
+
remaining = set(range(n))
|
|
93
|
+
first = int(np.argmax(cosine_scores))
|
|
94
|
+
selected.append(first)
|
|
95
|
+
remaining.remove(first)
|
|
96
|
+
|
|
97
|
+
for _ in range(K - 1):
|
|
98
|
+
best_idx, best_score = -1, -np.inf
|
|
99
|
+
for i in remaining:
|
|
100
|
+
if selected:
|
|
101
|
+
avg_syn = float(np.mean([synergy[i][j] for j in selected]))
|
|
102
|
+
avg_div = float(np.mean([diversity[i][j] for j in selected]))
|
|
103
|
+
else:
|
|
104
|
+
avg_syn = avg_div = 0.0
|
|
105
|
+
score = 0.4 * cosine_scores[i] + 0.3 * avg_syn + 0.2 * avg_div + 0.1
|
|
106
|
+
if score > best_score:
|
|
107
|
+
best_score = score
|
|
108
|
+
best_idx = i
|
|
109
|
+
selected.append(best_idx)
|
|
110
|
+
remaining.remove(best_idx)
|
|
111
|
+
|
|
112
|
+
return selected
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def rerank(cosine_scores, chunk_texts, chunk_session_ids, query, K=5):
|
|
116
|
+
"""
|
|
117
|
+
Full synergy rerank: select chunks, rank sessions by contribution.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
cosine_scores: per-chunk cosine scores
|
|
121
|
+
chunk_texts: per-chunk text
|
|
122
|
+
chunk_session_ids: per-chunk session ID
|
|
123
|
+
query: query text
|
|
124
|
+
K: number of chunks to select
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
List of session IDs ranked by synergy contribution
|
|
128
|
+
"""
|
|
129
|
+
selected = select(cosine_scores, chunk_texts, query, K)
|
|
130
|
+
counts = defaultdict(int)
|
|
131
|
+
for idx in selected:
|
|
132
|
+
counts[chunk_session_ids[idx]] += 1
|
|
133
|
+
return sorted(counts.keys(), key=lambda s: -counts[s])
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: quantum-memory-graph
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: Quantum-optimized knowledge graph memory for AI agents. Relationship-aware subgraph selection via QAOA.
|
|
5
5
|
Home-page: https://github.com/Dustin-a11y/quantum-memory-graph
|
|
6
6
|
Author: Coinkong (Chef's Attraction)
|
|
@@ -23,7 +23,6 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
23
23
|
Requires-Python: >=3.9
|
|
24
24
|
Description-Content-Type: text/markdown
|
|
25
25
|
License-File: LICENSE
|
|
26
|
-
Requires-Dist: quantum-agent-memory>=0.1.0
|
|
27
26
|
Requires-Dist: sentence-transformers>=2.2.0
|
|
28
27
|
Requires-Dist: networkx>=3.0
|
|
29
28
|
Requires-Dist: numpy>=1.24.0
|
|
@@ -45,29 +44,9 @@ Dynamic: license-file
|
|
|
45
44
|
|
|
46
45
|
Every memory system treats memories as independent documents — search, rank, stuff into context. But memories aren't independent. They have *relationships*. "The team chose React" becomes 10x more useful paired with "because of ecosystem maturity" and "FastAPI handles the backend."
|
|
47
46
|
|
|
48
|
-
Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
|
|
49
|
-
|
|
50
|
-
## Benchmark: MemCombine (Internal — Memory Combination)
|
|
51
|
-
|
|
52
|
-
MemCombine tests what no existing benchmark measures — **memory combination quality**, where QAOA graph selection finds coherent subsets that embedding similarity misses.
|
|
53
|
-
|
|
54
|
-
| Method | Coverage | Evidence Recall | F1 | Perfect |
|
|
55
|
-
|--------|----------|----------------|----|---------|
|
|
56
|
-
| Embedding Top-K | 69.9% | 65.6% | 68.1% | 1/5 |
|
|
57
|
-
| **Graph + QAOA** | **96.7%** | **91.0%** | **92.6%** | **4/5** |
|
|
58
|
-
| **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
|
|
59
|
-
|
|
60
|
-
When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
|
|
61
|
-
|
|
62
|
-
> **How to read this table:** The R@5/R@10 numbers are driven by QMG's chunked
|
|
63
|
-
> embedding retrieval pipeline (Stage 1: gte-large, 500-char chunks, mean-of-top-3
|
|
64
|
-
> scoring). QAOA (Stage 2) refines the top-14 candidates for relationship-aware
|
|
65
|
-
> selection — its advantage shows up in MemCombine (combination quality) rather
|
|
66
|
-
> than raw recall rank. The pipeline as a whole achieves #1.
|
|
67
|
-
|
|
68
47
|
## 🏆 #1 on LongMemEval (ICLR 2025 Benchmark)
|
|
69
48
|
|
|
70
|
-
Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
|
|
49
|
+
Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) — [verified submission](https://github.com/xiaowu0162/LongMemEval/issues/46).
|
|
71
50
|
|
|
72
51
|
| Method | R@1 | R@5 | R@10 | NDCG@10 |
|
|
73
52
|
|--------|:---:|:---:|:----:|:-------:|
|
|
@@ -207,10 +186,7 @@ result = recall(
|
|
|
207
186
|
)
|
|
208
187
|
```
|
|
209
188
|
|
|
210
|
-
### Run MemCombine Benchmark
|
|
211
|
-
|
|
212
189
|
```python
|
|
213
|
-
from benchmarks.memcombine import run_benchmark
|
|
214
190
|
|
|
215
191
|
def my_recall(memories, query, K):
|
|
216
192
|
# Your recall implementation
|
|
@@ -243,8 +219,6 @@ Validated on `ibm_fez` and `ibm_kingston` backends.
|
|
|
243
219
|
|
|
244
220
|
MIT License — Copyright 2026 Coinkong (Chef's Attraction)
|
|
245
221
|
|
|
246
|
-
|
|
247
222
|
## Links
|
|
248
223
|
|
|
249
|
-
- [
|
|
250
|
-
- [MemCombine Benchmark](benchmarks/memcombine.py) — Test memory combination quality
|
|
224
|
+
- [GitHub](https://github.com/Dustin-a11y/quantum-memory-graph) — Source code and benchmarks
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/SOURCES.txt
RENAMED
|
@@ -13,7 +13,6 @@ benchmarks/longmemeval_bench_v4.py
|
|
|
13
13
|
benchmarks/longmemeval_bench_v5.py
|
|
14
14
|
benchmarks/longmemeval_bench_v6.py
|
|
15
15
|
benchmarks/longmemeval_bench_v7.py
|
|
16
|
-
benchmarks/memcombine.py
|
|
17
16
|
benchmarks/run_final.py
|
|
18
17
|
benchmarks/run_full_benchmark.py
|
|
19
18
|
benchmarks/run_full_benchmark_v2.py
|
|
@@ -28,6 +27,7 @@ quantum_memory_graph/pce_optimizer.py
|
|
|
28
27
|
quantum_memory_graph/pipeline.py
|
|
29
28
|
quantum_memory_graph/recency.py
|
|
30
29
|
quantum_memory_graph/subgraph_optimizer.py
|
|
30
|
+
quantum_memory_graph/synergy_reranker.py
|
|
31
31
|
quantum_memory_graph.egg-info/PKG-INFO
|
|
32
32
|
quantum_memory_graph.egg-info/SOURCES.txt
|
|
33
33
|
quantum_memory_graph.egg-info/dependency_links.txt
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = quantum-memory-graph
|
|
3
|
-
version = 1.2.
|
|
3
|
+
version = 1.2.2
|
|
4
4
|
description = Quantum-optimized knowledge graph memory for AI agents. Relationship-aware subgraph selection via QAOA.
|
|
5
5
|
long_description = file: README.md
|
|
6
6
|
long_description_content_type = text/markdown
|
|
@@ -29,7 +29,6 @@ classifiers =
|
|
|
29
29
|
packages = find:
|
|
30
30
|
python_requires = >=3.9
|
|
31
31
|
install_requires =
|
|
32
|
-
quantum-agent-memory>=0.1.0
|
|
33
32
|
sentence-transformers>=2.2.0
|
|
34
33
|
networkx>=3.0
|
|
35
34
|
numpy>=1.24.0
|
|
@@ -1,236 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
MemCombine Benchmark — Tests memory COMBINATION quality.
|
|
3
|
-
|
|
4
|
-
Unlike LongMemEval (needle-in-haystack retrieval), MemCombine tests whether
|
|
5
|
-
selected memories work TOGETHER to answer complex questions.
|
|
6
|
-
|
|
7
|
-
Questions require synthesizing information from multiple memories:
|
|
8
|
-
- "What was the decision AND its reasoning AND its outcome?"
|
|
9
|
-
- "How do project X and project Y relate?"
|
|
10
|
-
- "What changed between meeting A and meeting B?"
|
|
11
|
-
|
|
12
|
-
Metrics:
|
|
13
|
-
- Combination Score: Do selected memories cover all required facets?
|
|
14
|
-
- Synergy Score: Do memories reference/build on each other?
|
|
15
|
-
- Completeness: Can the question be fully answered from selected memories?
|
|
16
|
-
|
|
17
|
-
Copyright 2026 Coinkong (Chef's Attraction). MIT License.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
import json
|
|
21
|
-
import numpy as np
|
|
22
|
-
from typing import List, Dict
|
|
23
|
-
from dataclasses import dataclass, field
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
@dataclass
|
|
27
|
-
class MemCombineQuestion:
|
|
28
|
-
"""A question requiring multiple related memories."""
|
|
29
|
-
id: str
|
|
30
|
-
question: str
|
|
31
|
-
category: str # synthesis, temporal, causal, multi-entity
|
|
32
|
-
memories: List[Dict] # All available memories
|
|
33
|
-
evidence_ids: List[int] # Which memories contain evidence
|
|
34
|
-
facets: List[str] # Required information facets
|
|
35
|
-
facet_memory_map: Dict # Which facet comes from which memory
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
# Built-in benchmark scenarios
|
|
39
|
-
SCENARIOS = [
|
|
40
|
-
{
|
|
41
|
-
"id": "synthesis_1",
|
|
42
|
-
"question": "What technology stack was chosen for the project and why was each component selected?",
|
|
43
|
-
"category": "synthesis",
|
|
44
|
-
"memories": [
|
|
45
|
-
{"id": 0, "text": "Team meeting: Decided to use React for the frontend. Sarah argued it has the best ecosystem for our use case."},
|
|
46
|
-
{"id": 1, "text": "Architecture review: PostgreSQL chosen for the database. Need JSONB support for flexible schemas."},
|
|
47
|
-
{"id": 2, "text": "Sprint planning: Set up CI/CD pipeline using GitHub Actions. Two-week sprint cycles."},
|
|
48
|
-
{"id": 3, "text": "Team lunch at the Italian place. Good pasta. Bob told a funny joke about recursion."},
|
|
49
|
-
{"id": 4, "text": "Backend discussion: FastAPI selected over Django. Need async support for real-time features."},
|
|
50
|
-
{"id": 5, "text": "Deployment strategy: Going with Docker + Kubernetes on AWS. Auto-scaling is critical for launch."},
|
|
51
|
-
{"id": 6, "text": "Budget review: Cloud costs estimated at $2000/month. Within budget allocation."},
|
|
52
|
-
{"id": 7, "text": "Coffee chat about the new office layout. Open floor plan vs cubicles debate."},
|
|
53
|
-
{"id": 8, "text": "Performance testing results: FastAPI handles 10K concurrent connections. Meets our requirements."},
|
|
54
|
-
{"id": 9, "text": "Security audit: Need to add rate limiting and input validation before launch."},
|
|
55
|
-
],
|
|
56
|
-
"evidence_ids": [0, 1, 4, 5],
|
|
57
|
-
"facets": ["frontend_choice", "frontend_reason", "backend_choice", "backend_reason", "database_choice", "database_reason", "deployment_choice"],
|
|
58
|
-
"facet_memory_map": {"frontend_choice": 0, "frontend_reason": 0, "backend_choice": 4, "backend_reason": 4, "database_choice": 1, "database_reason": 1, "deployment_choice": 5},
|
|
59
|
-
},
|
|
60
|
-
{
|
|
61
|
-
"id": "temporal_1",
|
|
62
|
-
"question": "How did the team's stance on remote work change over the three months?",
|
|
63
|
-
"category": "temporal",
|
|
64
|
-
"memories": [
|
|
65
|
-
{"id": 0, "text": "January all-hands: CEO announced mandatory return to office 5 days a week starting February."},
|
|
66
|
-
{"id": 1, "text": "Q4 revenue report showed 15% growth. Celebrated with team dinner."},
|
|
67
|
-
{"id": 2, "text": "February survey results: 73% of employees reported decreased satisfaction with RTO policy."},
|
|
68
|
-
{"id": 3, "text": "New coffee machine installed in the break room. Everyone loves it."},
|
|
69
|
-
{"id": 4, "text": "February town hall: HR presented data showing 20% increase in turnover since RTO mandate."},
|
|
70
|
-
{"id": 5, "text": "March policy update: CEO reversed course. Now hybrid 3 days in office, 2 remote. Cited retention data."},
|
|
71
|
-
{"id": 6, "text": "IT upgraded the conference room AV equipment for better hybrid meetings."},
|
|
72
|
-
{"id": 7, "text": "Quarterly OKR review. Team hit 4 of 5 objectives."},
|
|
73
|
-
{"id": 8, "text": "March satisfaction survey: Employee satisfaction recovered to 85% after hybrid policy."},
|
|
74
|
-
{"id": 9, "text": "Parking garage construction causing noise complaints from third floor."},
|
|
75
|
-
],
|
|
76
|
-
"evidence_ids": [0, 2, 4, 5, 8],
|
|
77
|
-
"facets": ["initial_policy", "employee_reaction", "turnover_impact", "policy_change", "final_outcome"],
|
|
78
|
-
"facet_memory_map": {"initial_policy": 0, "employee_reaction": 2, "turnover_impact": 4, "policy_change": 5, "final_outcome": 8},
|
|
79
|
-
},
|
|
80
|
-
{
|
|
81
|
-
"id": "causal_1",
|
|
82
|
-
"question": "What caused the production outage, what was done to fix it, and what prevention measures were taken?",
|
|
83
|
-
"category": "causal",
|
|
84
|
-
"memories": [
|
|
85
|
-
{"id": 0, "text": "Monday 2am alert: Production database hit 100% disk usage. All writes failing."},
|
|
86
|
-
{"id": 1, "text": "Sprint retrospective: Team agreed to improve code review process."},
|
|
87
|
-
{"id": 2, "text": "Root cause analysis: Logging table grew 500GB in 2 weeks due to debug logging left on after feature deploy."},
|
|
88
|
-
{"id": 3, "text": "Incident response: DevOps team purged old log entries and increased disk from 1TB to 2TB."},
|
|
89
|
-
{"id": 4, "text": "New hire orientation for three junior developers. HR handled logistics."},
|
|
90
|
-
{"id": 5, "text": "Post-mortem action item 1: Implement log rotation with 30-day retention policy."},
|
|
91
|
-
{"id": 6, "text": "Post-mortem action item 2: Add disk usage alerts at 70%, 80%, 90% thresholds."},
|
|
92
|
-
{"id": 7, "text": "Post-mortem action item 3: Require removing debug logging before merging to main."},
|
|
93
|
-
{"id": 8, "text": "Team building event at the escape room. Marketing team won."},
|
|
94
|
-
{"id": 9, "text": "Client demo went well. They want to proceed with Phase 2."},
|
|
95
|
-
],
|
|
96
|
-
"evidence_ids": [0, 2, 3, 5, 6, 7],
|
|
97
|
-
"facets": ["what_happened", "root_cause", "immediate_fix", "prevention_1", "prevention_2", "prevention_3"],
|
|
98
|
-
"facet_memory_map": {"what_happened": 0, "root_cause": 2, "immediate_fix": 3, "prevention_1": 5, "prevention_2": 6, "prevention_3": 7},
|
|
99
|
-
},
|
|
100
|
-
{
|
|
101
|
-
"id": "multi_entity_1",
|
|
102
|
-
"question": "What are each team member's roles and how do their responsibilities interact?",
|
|
103
|
-
"category": "multi_entity",
|
|
104
|
-
"memories": [
|
|
105
|
-
{"id": 0, "text": "Alice leads frontend development. She works closely with Bob on API contracts."},
|
|
106
|
-
{"id": 1, "text": "Company picnic was fun. Great weather this year."},
|
|
107
|
-
{"id": 2, "text": "Bob owns the backend services. He designs APIs that Alice's frontend consumes."},
|
|
108
|
-
{"id": 3, "text": "Carol manages the infrastructure. She provisions the servers Bob's services run on."},
|
|
109
|
-
{"id": 4, "text": "New ping pong table in the break room. Tournament next Friday."},
|
|
110
|
-
{"id": 5, "text": "Dave handles QA. He writes integration tests that cover Alice's UI and Bob's APIs."},
|
|
111
|
-
{"id": 6, "text": "Eve is the project manager. She coordinates between Alice, Bob, Carol, and Dave."},
|
|
112
|
-
{"id": 7, "text": "Office plants are dying. Need to assign someone to water them."},
|
|
113
|
-
{"id": 8, "text": "Alice and Carol paired on improving the CI/CD pipeline. Reduced deploy time by 40%."},
|
|
114
|
-
{"id": 9, "text": "Dave found a critical bug in Bob's API. Bob fixed it same day."},
|
|
115
|
-
],
|
|
116
|
-
"evidence_ids": [0, 2, 3, 5, 6, 8, 9],
|
|
117
|
-
"facets": ["alice_role", "bob_role", "carol_role", "dave_role", "eve_role", "alice_bob_interaction", "bob_carol_interaction", "dave_integration"],
|
|
118
|
-
"facet_memory_map": {"alice_role": 0, "bob_role": 2, "carol_role": 3, "dave_role": 5, "eve_role": 6, "alice_bob_interaction": 0, "bob_carol_interaction": 3, "dave_integration": 5},
|
|
119
|
-
},
|
|
120
|
-
{
|
|
121
|
-
"id": "synthesis_2",
|
|
122
|
-
"question": "What is the complete customer onboarding process from signup to first value?",
|
|
123
|
-
"category": "synthesis",
|
|
124
|
-
"memories": [
|
|
125
|
-
{"id": 0, "text": "Step 1: Customer signs up via website. Auto-creates account and sends welcome email."},
|
|
126
|
-
{"id": 1, "text": "Marketing team redesigned the landing page. Conversion rate up 12%."},
|
|
127
|
-
{"id": 2, "text": "Step 2: Customer success rep schedules onboarding call within 24 hours of signup."},
|
|
128
|
-
{"id": 3, "text": "Step 3: During onboarding call, rep helps customer import their data and configure integrations."},
|
|
129
|
-
{"id": 4, "text": "Sales team hit quarterly target. Pizza party celebration."},
|
|
130
|
-
{"id": 5, "text": "Step 4: Customer gets access to interactive tutorial. Must complete 3 core modules."},
|
|
131
|
-
{"id": 6, "text": "Step 5: After tutorial completion, customer success checks in at day 7 and day 30."},
|
|
132
|
-
{"id": 7, "text": "Office AC broken again. Facilities contacted."},
|
|
133
|
-
{"id": 8, "text": "Churn analysis: Customers who complete onboarding tutorial have 3x higher retention."},
|
|
134
|
-
{"id": 9, "text": "Support ticket about login issues. Resolved — was a password reset problem."},
|
|
135
|
-
],
|
|
136
|
-
"evidence_ids": [0, 2, 3, 5, 6],
|
|
137
|
-
"facets": ["signup", "scheduling", "data_import", "tutorial", "followup"],
|
|
138
|
-
"facet_memory_map": {"signup": 0, "scheduling": 2, "data_import": 3, "tutorial": 5, "followup": 6},
|
|
139
|
-
},
|
|
140
|
-
]
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
def evaluate_combination(selected_ids: List[int], scenario: Dict) -> Dict:
|
|
144
|
-
"""
|
|
145
|
-
Evaluate how well selected memories combine to answer the question.
|
|
146
|
-
|
|
147
|
-
Returns facet coverage, synergy score, and overall combination quality.
|
|
148
|
-
"""
|
|
149
|
-
evidence_ids = set(scenario["evidence_ids"])
|
|
150
|
-
facet_map = scenario["facet_memory_map"]
|
|
151
|
-
facets = scenario["facets"]
|
|
152
|
-
selected_set = set(selected_ids)
|
|
153
|
-
|
|
154
|
-
# Facet coverage: what percentage of required facets are covered?
|
|
155
|
-
covered_facets = []
|
|
156
|
-
for facet in facets:
|
|
157
|
-
required_mem = facet_map.get(facet)
|
|
158
|
-
if required_mem is not None and required_mem in selected_set:
|
|
159
|
-
covered_facets.append(facet)
|
|
160
|
-
|
|
161
|
-
coverage = len(covered_facets) / len(facets) if facets else 0
|
|
162
|
-
|
|
163
|
-
# Evidence recall: what percentage of evidence memories selected?
|
|
164
|
-
evidence_found = selected_set & evidence_ids
|
|
165
|
-
evidence_recall = len(evidence_found) / len(evidence_ids) if evidence_ids else 0
|
|
166
|
-
|
|
167
|
-
# Precision: what percentage of selected are actually evidence?
|
|
168
|
-
precision = len(evidence_found) / len(selected_set) if selected_set else 0
|
|
169
|
-
|
|
170
|
-
# Noise: non-evidence memories selected
|
|
171
|
-
noise = len(selected_set - evidence_ids)
|
|
172
|
-
|
|
173
|
-
return {
|
|
174
|
-
"coverage": coverage,
|
|
175
|
-
"evidence_recall": evidence_recall,
|
|
176
|
-
"precision": precision,
|
|
177
|
-
"noise": noise,
|
|
178
|
-
"covered_facets": covered_facets,
|
|
179
|
-
"missing_facets": [f for f in facets if f not in covered_facets],
|
|
180
|
-
"f1": (2 * precision * evidence_recall / (precision + evidence_recall)
|
|
181
|
-
if (precision + evidence_recall) > 0 else 0),
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
def run_benchmark(recall_fn, K: int = 5, scenarios: List[Dict] = None) -> Dict:
|
|
186
|
-
"""
|
|
187
|
-
Run MemCombine benchmark against a recall function.
|
|
188
|
-
|
|
189
|
-
Args:
|
|
190
|
-
recall_fn: Function(memories, query, K) -> List[int] (selected indices)
|
|
191
|
-
K: Number of memories to select
|
|
192
|
-
scenarios: Custom scenarios (uses built-in if None)
|
|
193
|
-
|
|
194
|
-
Returns:
|
|
195
|
-
Benchmark results with per-scenario and aggregate scores
|
|
196
|
-
"""
|
|
197
|
-
if scenarios is None:
|
|
198
|
-
scenarios = SCENARIOS
|
|
199
|
-
|
|
200
|
-
results = []
|
|
201
|
-
total_coverage = 0
|
|
202
|
-
total_recall = 0
|
|
203
|
-
total_f1 = 0
|
|
204
|
-
perfect = 0
|
|
205
|
-
|
|
206
|
-
for scenario in scenarios:
|
|
207
|
-
memory_texts = [m["text"] for m in scenario["memories"]]
|
|
208
|
-
selected = recall_fn(memory_texts, scenario["question"], K)
|
|
209
|
-
|
|
210
|
-
eval_result = evaluate_combination(selected, scenario)
|
|
211
|
-
|
|
212
|
-
results.append({
|
|
213
|
-
"id": scenario["id"],
|
|
214
|
-
"category": scenario["category"],
|
|
215
|
-
"selected": selected,
|
|
216
|
-
**eval_result,
|
|
217
|
-
})
|
|
218
|
-
|
|
219
|
-
total_coverage += eval_result["coverage"]
|
|
220
|
-
total_recall += eval_result["evidence_recall"]
|
|
221
|
-
total_f1 += eval_result["f1"]
|
|
222
|
-
if eval_result["coverage"] == 1.0:
|
|
223
|
-
perfect += 1
|
|
224
|
-
|
|
225
|
-
n = len(scenarios)
|
|
226
|
-
return {
|
|
227
|
-
"benchmark": "MemCombine",
|
|
228
|
-
"n_scenarios": n,
|
|
229
|
-
"K": K,
|
|
230
|
-
"avg_coverage": total_coverage / n,
|
|
231
|
-
"avg_evidence_recall": total_recall / n,
|
|
232
|
-
"avg_f1": total_f1 / n,
|
|
233
|
-
"perfect_coverage": perfect,
|
|
234
|
-
"perfect_coverage_pct": perfect / n * 100,
|
|
235
|
-
"per_scenario": results,
|
|
236
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v2.py
RENAMED
|
File without changes
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v3.py
RENAMED
|
File without changes
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v4.py
RENAMED
|
File without changes
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v5.py
RENAMED
|
File without changes
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v6.py
RENAMED
|
File without changes
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v7.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/run_full_benchmark_v2.py
RENAMED
|
File without changes
|
|
File without changes
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/run_longmemeval_cvar_v2.py
RENAMED
|
File without changes
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/benchmarks/run_longmemeval_staged.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph/pce_optimizer.py
RENAMED
|
File without changes
|
|
File without changes
|
{quantum_memory_graph-1.2.1 → quantum_memory_graph-1.2.2}/quantum_memory_graph/subgraph_optimizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|