quantum-memory-graph 1.2.0__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {quantum_memory_graph-1.2.0/quantum_memory_graph.egg-info → quantum_memory_graph-1.2.1}/PKG-INFO +21 -5
  2. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/README.md +10 -4
  3. quantum_memory_graph-1.2.1/benchmarks/run_longmemeval_cvar_v2.py +272 -0
  4. quantum_memory_graph-1.2.1/benchmarks/run_longmemeval_staged.py +355 -0
  5. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph/__init__.py +1 -1
  6. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1/quantum_memory_graph.egg-info}/PKG-INFO +21 -5
  7. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph.egg-info/SOURCES.txt +2 -0
  8. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/setup.cfg +12 -1
  9. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/LICENSE +0 -0
  10. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/__init__.py +0 -0
  11. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/data_collector.py +0 -0
  12. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/fast_longmemeval.py +0 -0
  13. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/generate_scenarios.py +0 -0
  14. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/longmemeval_bench.py +0 -0
  15. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/longmemeval_bench_v2.py +0 -0
  16. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/longmemeval_bench_v3.py +0 -0
  17. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/longmemeval_bench_v4.py +0 -0
  18. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/longmemeval_bench_v5.py +0 -0
  19. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/longmemeval_bench_v6.py +0 -0
  20. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/longmemeval_bench_v7.py +0 -0
  21. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/memcombine.py +0 -0
  22. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/run_final.py +0 -0
  23. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/run_full_benchmark.py +0 -0
  24. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/run_full_benchmark_v2.py +0 -0
  25. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/benchmarks/run_longmemeval_chunked_staged.py +0 -0
  26. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/pyproject.toml +0 -0
  27. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph/__main__.py +0 -0
  28. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph/api.py +0 -0
  29. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph/graph.py +0 -0
  30. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph/pce_optimizer.py +0 -0
  31. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph/pipeline.py +0 -0
  32. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph/recency.py +0 -0
  33. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph/subgraph_optimizer.py +0 -0
  34. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph.egg-info/dependency_links.txt +0 -0
  35. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph.egg-info/requires.txt +0 -0
  36. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/quantum_memory_graph.egg-info/top_level.txt +0 -0
  37. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.1}/tests/test_full_pipeline.py +0 -0
@@ -1,15 +1,25 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: quantum-memory-graph
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Summary: Quantum-optimized knowledge graph memory for AI agents. Relationship-aware subgraph selection via QAOA.
5
5
  Home-page: https://github.com/Dustin-a11y/quantum-memory-graph
6
6
  Author: Coinkong (Chef's Attraction)
7
7
  License: MIT
8
+ Project-URL: Source Code, https://github.com/Dustin-a11y/quantum-memory-graph
9
+ Project-URL: Issue Tracker, https://github.com/Dustin-a11y/quantum-memory-graph/issues
10
+ Project-URL: Benchmark Results, https://github.com/Dustin-a11y/quantum-memory-graph/tree/main/benchmarks
11
+ Project-URL: LongMemEval Submission, https://github.com/xiaowu0162/LongMemEval/issues
8
12
  Keywords: quantum,memory,knowledge-graph,agents,qaoa,ai
9
13
  Classifier: Development Status :: 4 - Beta
10
14
  Classifier: Intended Audience :: Developers
11
15
  Classifier: License :: OSI Approved :: MIT License
12
16
  Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
23
  Requires-Python: >=3.9
14
24
  Description-Content-Type: text/markdown
15
25
  License-File: LICENSE
@@ -37,9 +47,9 @@ Every memory system treats memories as independent documents — search, rank, s
37
47
 
38
48
  Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
39
49
 
40
- ## Benchmark: MemCombine
50
+ ## Benchmark: MemCombine (Internal — Memory Combination)
41
51
 
42
- We created MemCombine to test what no existing benchmark measures — **memory combination quality**.
52
+ MemCombine tests what no existing benchmark measures — **memory combination quality**, where QAOA graph selection finds coherent subsets that embedding similarity misses.
43
53
 
44
54
  | Method | Coverage | Evidence Recall | F1 | Perfect |
45
55
  |--------|----------|----------------|----|---------|
@@ -48,6 +58,13 @@ We created MemCombine to test what no existing benchmark measures — **memory c
48
58
  | **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
49
59
 
50
60
  When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
61
+
62
+ > **How to read this table:** The R@5/R@10 numbers are driven by QMG's chunked
63
+ > embedding retrieval pipeline (Stage 1: gte-large, 500-char chunks, mean-of-top-3
64
+ > scoring). QAOA (Stage 2) refines the top-14 candidates for relationship-aware
65
+ > selection — its advantage shows up in MemCombine (combination quality) rather
66
+ > than raw recall rank. The pipeline as a whole achieves #1.
67
+
51
68
  ## 🏆 #1 on LongMemEval (ICLR 2025 Benchmark)
52
69
 
53
70
  Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) for long-term memory in AI agents:
@@ -57,7 +74,7 @@ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
57
74
  | OMEGA (prev SOTA) | — | 89.2% | 94.1% | 87.5% |
58
75
  | Mastra OM | — | 91.0% | 95.2% | 89.1% |
59
76
  | **QMG v1.1 (published #1)** | — | **95.8%** | **98.85%** | **93.2%** |
60
- | **QMG v1.2 (official, this repo)** 🏆 | **90.6%** | **98.6%** | **99.4%** | **0.9426** |
77
+ | **QMG v1.2 chunked retrieval pipeline** 🏆 | **90.6%** | **98.6%** | **99.4%** | **94.26%** |
61
78
 
62
79
  **Benchmark run:** 500 questions, chunked gte-large embeddings (500-char blocks, 100-char overlap, mean-of-top-3 session scoring). Verified on DGX Spark GB10 (CUDA, ~53 min).
63
80
 
@@ -65,7 +82,6 @@ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
65
82
 
66
83
  **See:** `benchmarks/run_longmemeval_chunked_staged.py` for the exact benchmark code, `benchmarks/longmemeval_chunked_staged_results.json` for full per-question results.
67
84
 
68
-
69
85
  ## Install
70
86
 
71
87
  ```bash
@@ -6,9 +6,9 @@ Every memory system treats memories as independent documents — search, rank, s
6
6
 
7
7
  Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
8
8
 
9
- ## Benchmark: MemCombine
9
+ ## Benchmark: MemCombine (Internal — Memory Combination)
10
10
 
11
- We created MemCombine to test what no existing benchmark measures — **memory combination quality**.
11
+ MemCombine tests what no existing benchmark measures — **memory combination quality**, where QAOA graph selection finds coherent subsets that embedding similarity misses.
12
12
 
13
13
  | Method | Coverage | Evidence Recall | F1 | Perfect |
14
14
  |--------|----------|----------------|----|---------|
@@ -17,6 +17,13 @@ We created MemCombine to test what no existing benchmark measures — **memory c
17
17
  | **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
18
18
 
19
19
  When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
20
+
21
+ > **How to read this table:** The R@5/R@10 numbers are driven by QMG's chunked
22
+ > embedding retrieval pipeline (Stage 1: gte-large, 500-char chunks, mean-of-top-3
23
+ > scoring). QAOA (Stage 2) refines the top-14 candidates for relationship-aware
24
+ > selection — its advantage shows up in MemCombine (combination quality) rather
25
+ > than raw recall rank. The pipeline as a whole achieves #1.
26
+
20
27
  ## 🏆 #1 on LongMemEval (ICLR 2025 Benchmark)
21
28
 
22
29
  Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) for long-term memory in AI agents:
@@ -26,7 +33,7 @@ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
26
33
  | OMEGA (prev SOTA) | — | 89.2% | 94.1% | 87.5% |
27
34
  | Mastra OM | — | 91.0% | 95.2% | 89.1% |
28
35
  | **QMG v1.1 (published #1)** | — | **95.8%** | **98.85%** | **93.2%** |
29
- | **QMG v1.2 (official, this repo)** 🏆 | **90.6%** | **98.6%** | **99.4%** | **0.9426** |
36
+ | **QMG v1.2 chunked retrieval pipeline** 🏆 | **90.6%** | **98.6%** | **99.4%** | **94.26%** |
30
37
 
31
38
  **Benchmark run:** 500 questions, chunked gte-large embeddings (500-char blocks, 100-char overlap, mean-of-top-3 session scoring). Verified on DGX Spark GB10 (CUDA, ~53 min).
32
39
 
@@ -34,7 +41,6 @@ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
34
41
 
35
42
  **See:** `benchmarks/run_longmemeval_chunked_staged.py` for the exact benchmark code, `benchmarks/longmemeval_chunked_staged_results.json` for full per-question results.
36
43
 
37
-
38
44
  ## Install
39
45
 
40
46
  ```bash
@@ -0,0 +1,272 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ LongMemEval 500-question Benchmark — QMG CVaR subgraph optimizer.
4
+
5
+ Routes each question through the QMG subgraph optimizer on Spark.
6
+ Measures recall@K against gold answer sessions.
7
+
8
+ Usage:
9
+ python3 -u run_longmemeval_cvar.py --limit 5 # Quick test
10
+ python3 -u run_longmemeval_cvar.py --force # Full 500
11
+ python3 -u run_longmemeval_cvar.py --fast # Skip QMG, cosine only
12
+
13
+ Output: JSON results + CSV saved to benchmarks/ directory.
14
+ """
15
+ import json, time, math, sys, os, argparse, csv
16
+ from datetime import datetime, timezone
17
+ import numpy as np
18
+
19
+ DATA_PATH = "/home/dt/projects-shared/LongMemEval/data/longmemeval_s_cleaned.json"
20
+ RESULTS_DIR = "/home/dt/qmg-v1/benchmarks"
21
+ RESULTS_FILE = os.path.join(RESULTS_DIR, "longmemeval_cvar_results.json")
22
+ CSV_FILE = os.path.join(RESULTS_DIR, "longmemeval_cvar_results.csv")
23
+
24
+ T_START = time.time()
25
+
26
+ def flatten_session(session):
27
+ if isinstance(session, str): return session
28
+ if isinstance(session, list):
29
+ parts = []
30
+ for turn in session:
31
+ if isinstance(turn, dict):
32
+ parts.append(f"{turn.get('role','')}: {turn.get('content', turn.get('text', str(turn)))}")
33
+ else:
34
+ parts.append(str(turn))
35
+ return "\n".join(parts)
36
+ return str(session)
37
+
38
+ def load_data(path, limit=None):
39
+ with open(path) as f: data = json.load(f)
40
+ if not isinstance(data, list):
41
+ for k in ["data","questions","items","results"]:
42
+ if k in data: data = data[k]; break
43
+ if limit: data = data[:limit]
44
+ return data
45
+
46
+ def recall_at_k(ranked, gold, K):
47
+ gold_set = set(gold)
48
+ if not gold_set: return 1.0
49
+ return 1.0 if set(ranked[:K]) & gold_set else 0.0
50
+
51
+ def ndcg_at_k(ranked, gold, K):
52
+ gold_set = set(gold)
53
+ if not gold_set: return 1.0
54
+ dcg = sum(1.0/math.log2(i+2) for i,idx in enumerate(ranked[:K]) if idx in gold_set)
55
+ idcg = sum(1.0/math.log2(i+2) for i in range(min(len(gold_set), K)))
56
+ return dcg/idcg if idcg>0 else 0.0
57
+
58
+ def main():
59
+ parser = argparse.ArgumentParser()
60
+ parser.add_argument("--limit", type=int, default=None)
61
+ parser.add_argument("--fast", action="store_true", help="Skip QMG, cosine only")
62
+ parser.add_argument("--force", action="store_true", help="Run full 500")
63
+ args = parser.parse_args()
64
+
65
+ data = load_data(DATA_PATH)
66
+ print(f"Loaded {len(data)} questions", flush=True)
67
+
68
+ limit = args.limit
69
+ if args.force: limit = None
70
+ if limit: data = data[:limit]
71
+
72
+ from sentence_transformers import SentenceTransformer
73
+ import torch
74
+ device = "cuda" if torch.cuda.is_available() else "cpu"
75
+ print(f"Loading gte-large on {device}...", flush=True)
76
+ model = SentenceTransformer("thenlper/gte-large", device=device)
77
+ dim = model.get_sentence_embedding_dimension()
78
+ print(f"Model loaded, dim={dim}", flush=True)
79
+
80
+ results = []
81
+ n_questions = len(data)
82
+
83
+ for idx, item in enumerate(data):
84
+ question = item.get("question", item.get("query", ""))
85
+ haystack = item.get("haystack_sessions", item.get("sessions", item.get("corpus", [])))
86
+ haystack_ids = item.get("haystack_session_ids", item.get("session_ids", []))
87
+ answer_ids = item.get("answer_session_ids", item.get("answer_ids", []))
88
+
89
+ gold_indices = []
90
+ for g in answer_ids:
91
+ try: gold_indices.append(haystack_ids.index(g))
92
+ except ValueError: pass
93
+
94
+ if not gold_indices or len(haystack) < 3:
95
+ results.append({"idx": idx, "skip": True, "reason": "no_gold_or_too_few"})
96
+ continue
97
+
98
+ texts = [flatten_session(s) for s in haystack]
99
+
100
+ # Encode
101
+ t0 = time.time()
102
+ all_texts = [question] + texts
103
+ embs = model.encode(all_texts, normalize_embeddings=True, batch_size=32, show_progress_bar=False)
104
+ q_emb = embs[0]
105
+ sess_embs = embs[1:]
106
+ encode_time = time.time() - t0
107
+
108
+ n_sessions = len(sess_embs)
109
+ K_target = min(5, n_sessions)
110
+
111
+ # Cosine baseline
112
+ t0 = time.time()
113
+ cos_scores = q_emb @ sess_embs.T
114
+ cos_ranked = np.argsort(cos_scores)[::-1].tolist()
115
+ cos_time = time.time() - t0
116
+
117
+ r = {
118
+ "idx": idx,
119
+ "question": question[:120],
120
+ "n_sessions": n_sessions,
121
+ "n_gold": len(gold_indices),
122
+ "cosine": {
123
+ "r1": float(recall_at_k(cos_ranked, gold_indices, 1)),
124
+ "r5": float(recall_at_k(cos_ranked, gold_indices, 5)),
125
+ "r10": float(recall_at_k(cos_ranked, gold_indices, 10)),
126
+ "ndcg": float(ndcg_at_k(cos_ranked, gold_indices, 10)),
127
+ "time": cos_time,
128
+ }
129
+ }
130
+
131
+ # QMG CVaR optimizer — two configs
132
+ if not args.fast:
133
+ t0 = time.time()
134
+ try:
135
+ sys.path.insert(0, "/home/dt/qmg-v1")
136
+ from quantum_memory_graph.subgraph_optimizer import optimize_subgraph
137
+
138
+ # Build adjacency from session embeddings (cosine similarity matrix)
139
+ adj = sess_embs @ sess_embs.T
140
+ np.fill_diagonal(adj, 0.0)
141
+
142
+ for cfg_name, cfg in [
143
+ ("default", {"alpha": 0.4, "beta_conn": 0.35, "gamma_cov": 0.25, "shots": 4096}),
144
+ ("retrieval", {"alpha": 1.0, "beta_conn": 0.0, "gamma_cov": 0.0, "shots": 4096}),
145
+ ]:
146
+ # Cap candidates at 14 for QAOA to avoid memory OOM
147
+ # (2^14 = 16K complex numbers, 2^40 = 17TB)
148
+ top_indices = np.argsort(cos_scores)[::-1][:14]
149
+ top_scores = cos_scores[top_indices]
150
+ top_adj = adj[np.ix_(top_indices, top_indices)]
151
+
152
+ result = optimize_subgraph(
153
+ relevance_scores=top_scores,
154
+ adjacency=top_adj,
155
+ K=K_target,
156
+ alpha=cfg["alpha"],
157
+ beta_conn=cfg["beta_conn"],
158
+ gamma_cov=cfg["gamma_cov"],
159
+ grid_size=6,
160
+ shots=cfg["shots"],
161
+ p_layers=2,
162
+ )
163
+ selection_raw = result.get("selection", [])
164
+ method = result.get("method", "unknown")
165
+
166
+ # Map capped indices back to original indices
167
+ selection = [top_indices[s] for s in selection_raw]
168
+
169
+ sel_set = set(selection)
170
+ ranked = list(selection)
171
+ for i in range(n_sessions):
172
+ if len(ranked) >= n_sessions: break
173
+ if i not in sel_set: ranked.append(i)
174
+
175
+ r[cfg_name] = {
176
+ "r1": float(recall_at_k(ranked, gold_indices, 1)),
177
+ "r5": float(recall_at_k(ranked, gold_indices, 5)),
178
+ "r10": float(recall_at_k(ranked, gold_indices, 10)),
179
+ "ndcg": float(ndcg_at_k(ranked, gold_indices, 10)),
180
+ "method": method,
181
+ "n_capped": len(top_indices),
182
+ "score": float(result.get("score", 0)),
183
+ "optimal_score": float(result.get("optimal", {}).get("score", 0)),
184
+ "time": time.time() - t0,
185
+ }
186
+
187
+ except Exception as e:
188
+ import traceback
189
+ r["qmg_error"] = f"{type(e).__name__}: {e}"
190
+ r["qmg_traceback"] = traceback.format_exc()
191
+
192
+ r["total_qmg_time"] = time.time() - t0
193
+
194
+ results.append(r)
195
+
196
+ # Progress every 5 questions
197
+ if (idx+1) % 5 == 0:
198
+ elapsed = time.time() - T_START
199
+ effective = [rr for rr in results if not rr.get("skip")]
200
+ cos_done = [rr for rr in effective if "cosine" in rr]
201
+ if cos_done:
202
+ cos_r5_avg = np.mean([rr["cosine"]["r5"] for rr in cos_done]) * 100
203
+ print(f"[{idx+1}/{n_questions}] {elapsed:.0f}s cos_r5={cos_r5_avg:.1f}%", flush=True)
204
+
205
+ # Summary
206
+ effective = [r for r in results if not r.get("skip")]
207
+
208
+ cos_items = [r for r in effective if "cosine" in r]
209
+ print("\n" + "="*60, flush=True)
210
+ print(f"LONGMEMEVAL — {datetime.now(timezone.utc).isoformat()}", flush=True)
211
+ print(f"Questions: {len(effective)} effective ({len(results)-len(effective)} skipped)", flush=True)
212
+
213
+ if cos_items:
214
+ cos_r1 = np.mean([r["cosine"]["r1"] for r in cos_items])*100
215
+ cos_r5 = np.mean([r["cosine"]["r5"] for r in cos_items])*100
216
+ cos_r10 = np.mean([r["cosine"]["r10"] for r in cos_items])*100
217
+ cos_ndcg = np.mean([r["cosine"]["ndcg"] for r in cos_items])
218
+ print(f"\nCOSINE BASELINE:", flush=True)
219
+ print(f" R@1: {cos_r1:.1f}%", flush=True)
220
+ print(f" R@5: {cos_r5:.1f}%", flush=True)
221
+ print(f" R@10: {cos_r10:.1f}%", flush=True)
222
+ print(f" NDCG: {cos_ndcg:.4f}", flush=True)
223
+
224
+ for cfg_name in ["default", "retrieval"]:
225
+ items = [r for r in effective if cfg_name in r]
226
+ if items:
227
+ r1 = np.mean([r[cfg_name]["r1"] for r in items])*100
228
+ r5 = np.mean([r[cfg_name]["r5"] for r in items])*100
229
+ r10 = np.mean([r[cfg_name]["r10"] for r in items])*100
230
+ ndcg = np.mean([r[cfg_name]["ndcg"] for r in items])
231
+ methods = {}
232
+ for r in items:
233
+ m = r[cfg_name].get("method", "?")
234
+ methods.setdefault(m, []).append(r[cfg_name]["r5"])
235
+ avg_time = np.mean([r[cfg_name]["time"] for r in items])
236
+ print(f"\nQMG {cfg_name.upper()}:", flush=True)
237
+ print(f" R@1: {r1:.1f}%", flush=True)
238
+ print(f" R@5: {r5:.1f}%", flush=True)
239
+ print(f" R@10: {r10:.1f}%", flush=True)
240
+ print(f" NDCG: {ndcg:.4f}", flush=True)
241
+ print(f" Avg time: {avg_time:.1f}s", flush=True)
242
+ for m, vals in sorted(methods.items()):
243
+ print(f" {m}: {len(vals)}x R@5={np.mean(vals)*100:.1f}%", flush=True)
244
+
245
+ total_t = time.time() - T_START
246
+ print(f"\nTotal: {total_t:.0f}s ({total_t/60:.1f} min)", flush=True)
247
+ print("="*60, flush=True)
248
+
249
+ with open(RESULTS_FILE, "w") as f: json.dump({"timestamp": datetime.now(timezone.utc).isoformat(), "n_total": len(data), "results": results}, f, indent=2, default=str)
250
+ print(f"\nSaved to {RESULTS_FILE}", flush=True)
251
+
252
+ with open(CSV_FILE, "w", newline="") as f:
253
+ w = csv.writer(f)
254
+ w.writerow(["idx","n","ngold","cr1","cr5","cr10","cndcg",
255
+ "dr1","dr5","dr10","dndcg","dmethod",
256
+ "rr1","rr5","rr10","rndcg","rmethod"])
257
+ for r in results:
258
+ if r.get("skip"): continue
259
+ w.writerow([
260
+ r["idx"], r["n_sessions"], r["n_gold"],
261
+ r["cosine"]["r1"], r["cosine"]["r5"], r["cosine"]["r10"], r["cosine"]["ndcg"],
262
+ r.get("default", {}).get("r1"), r.get("default", {}).get("r5"),
263
+ r.get("default", {}).get("r10"), r.get("default", {}).get("ndcg"),
264
+ r.get("default", {}).get("method"),
265
+ r.get("retrieval", {}).get("r1"), r.get("retrieval", {}).get("r5"),
266
+ r.get("retrieval", {}).get("r10"), r.get("retrieval", {}).get("ndcg"),
267
+ r.get("retrieval", {}).get("method"),
268
+ ])
269
+ print(f"CSV saved to {CSV_FILE}", flush=True)
270
+
271
+ if __name__ == "__main__":
272
+ main()
@@ -0,0 +1,355 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ LongMemEval 500 — Two-Stage Pipeline Benchmark.
4
+
5
+ Stage 1: gte-large cosine similarity -> candidate ranking
6
+ Stage 2: QAOA+CVaR subgraph refinement on top candidates
7
+
8
+ Measures: pure cosine vs cosine+QAOA refinement vs greedy subgraph
9
+
10
+ DK 🦍
11
+ """
12
+ import json, time, math, sys, os, argparse, csv
13
+ from datetime import datetime, timezone
14
+ import numpy as np
15
+
16
+ DATA_PATH = "/home/dt/projects-shared/LongMemEval/data/longmemeval_s_cleaned.json"
17
+ RESULTS_DIR = "/home/dt/qmg-v1/benchmarks"
18
+ RESULTS_FILE = os.path.join(RESULTS_DIR, "longmemeval_staged_results.json")
19
+ CSV_FILE = os.path.join(RESULTS_DIR, "longmemeval_staged_results.csv")
20
+
21
+ T_START = time.time()
22
+
23
+ def flatten_session(session):
24
+ if isinstance(session, str): return session
25
+ if isinstance(session, list):
26
+ parts = []
27
+ for turn in session:
28
+ if isinstance(turn, dict):
29
+ parts.append("%s: %s" % (turn.get('role',''), turn.get('content', turn.get('text', str(turn)))))
30
+ else:
31
+ parts.append(str(turn))
32
+ return "\n".join(parts)
33
+ return str(session)
34
+
35
+ def load_data(path, limit=None):
36
+ with open(path) as f: data = json.load(f)
37
+ if not isinstance(data, list):
38
+ for k in ["data","questions","items","results"]:
39
+ if k in data: data = data[k]; break
40
+ if limit: data = data[:limit]
41
+ return data
42
+
43
+ def recall_at_k(ranked, gold, K):
44
+ gold_set = set(gold)
45
+ if not gold_set: return 1.0
46
+ return 1.0 if set(ranked[:K]) & gold_set else 0.0
47
+
48
+ def ndcg_at_k(ranked, gold, K):
49
+ gold_set = set(gold)
50
+ if not gold_set: return 1.0
51
+ dcg = sum(1.0/math.log2(i+2) for i,idx in enumerate(ranked[:K]) if idx in gold_set)
52
+ idcg = sum(1.0/math.log2(i+2) for i in range(min(len(gold_set), K)))
53
+ return dcg/idcg if idcg>0 else 0.0
54
+
55
+
56
+ def main():
57
+ parser = argparse.ArgumentParser()
58
+ parser.add_argument("--limit", type=int, default=None)
59
+ parser.add_argument("--force", action="store_true", help="Run full 500")
60
+ parser.add_argument("--max-candidates", type=int, default=14, help="QAOA candidate pool size")
61
+ parser.add_argument("--top-k", type=int, default=5, help="Target selection K")
62
+ args = parser.parse_args()
63
+
64
+ data = load_data(DATA_PATH)
65
+ print("Loaded %d questions" % len(data), flush=True)
66
+
67
+ limit = args.limit
68
+ if args.force: limit = None
69
+ if limit: data = data[:limit]
70
+
71
+ from sentence_transformers import SentenceTransformer
72
+ import torch
73
+ device = "cuda" if torch.cuda.is_available() else "cpu"
74
+ print("Loading gte-large on %s..." % device, flush=True)
75
+ model = SentenceTransformer("thenlper/gte-large", device=device)
76
+ dim = model.get_sentence_embedding_dimension()
77
+ print("Model loaded, dim=%d" % dim, flush=True)
78
+
79
+ results = []
80
+ n_questions = len(data)
81
+ max_candidates = args.max_candidates
82
+ top_k = args.top_k
83
+
84
+ # Trackers
85
+ count_qaoa_won = 0
86
+ count_greedy_won = 0
87
+ count_tied = 0
88
+ count_qaoa_runs = 0
89
+
90
+ for idx, item in enumerate(data):
91
+ question = item.get("question", item.get("query", ""))
92
+ haystack = item.get("haystack_sessions", item.get("sessions", item.get("corpus", [])))
93
+ haystack_ids = item.get("haystack_session_ids", item.get("session_ids", []))
94
+ answer_ids = item.get("answer_session_ids", item.get("answer_ids", []))
95
+
96
+ gold_indices = []
97
+ for g in answer_ids:
98
+ try: gold_indices.append(haystack_ids.index(g))
99
+ except ValueError: pass
100
+
101
+ if not gold_indices or len(haystack) < 3:
102
+ results.append({"idx": idx, "skip": True, "reason": "no_gold_or_too_few"})
103
+ continue
104
+
105
+ texts = [flatten_session(s) for s in haystack]
106
+
107
+ # Encode
108
+ t0 = time.time()
109
+ all_texts = [question] + texts
110
+ embs = model.encode(all_texts, normalize_embeddings=True, batch_size=32, show_progress_bar=False)
111
+ q_emb = embs[0]
112
+ sess_embs = embs[1:]
113
+ encode_time = time.time() - t0
114
+
115
+ n_sessions = len(sess_embs)
116
+
117
+ # --- Stage 1: Cosine ---
118
+ t0 = time.time()
119
+ cos_scores = q_emb @ sess_embs.T
120
+ cos_ranking = np.argsort(cos_scores)[::-1].tolist()
121
+ cos_time = time.time() - t0
122
+
123
+ r = {
124
+ "idx": idx,
125
+ "question": question[:120],
126
+ "n_sessions": n_sessions,
127
+ "n_gold": len(gold_indices),
128
+ "cosine": {
129
+ "r1": float(recall_at_k(cos_ranking, gold_indices, 1)),
130
+ "r5": float(recall_at_k(cos_ranking, gold_indices, 5)),
131
+ "r10": float(recall_at_k(cos_ranking, gold_indices, 10)),
132
+ "ndcg": float(ndcg_at_k(cos_ranking, gold_indices, 10)),
133
+ "time": cos_time,
134
+ },
135
+ }
136
+
137
+ # --- Stage 2: QAOA+CVaR refinement on top candidates ---
138
+ try:
139
+ t0 = time.time()
140
+ sys.path.insert(0, "/home/dt/qmg-v1")
141
+ from quantum_memory_graph.subgraph_optimizer import optimize_subgraph
142
+
143
+ # Take candidates from cosine top-N
144
+ top_indices = cos_ranking[:max_candidates]
145
+ top_scores = cos_scores[top_indices]
146
+
147
+ # Build adjacency from top-candidate embeddings
148
+ top_embs = sess_embs[top_indices]
149
+ adj = top_embs @ top_embs.T
150
+ np.fill_diagonal(adj, 0.0)
151
+
152
+ # Methods to compare
153
+ for method_name, cfg in [
154
+ ("qaoa_cvar", {"alpha": 0.4, "beta_conn": 0.35, "gamma_cov": 0.25, "shots": 4096, "p_layers": 2}),
155
+ ("greedy_subgraph", {"alpha": 0.4, "beta_conn": 0.35, "gamma_cov": 0.25}),
156
+ ]:
157
+ result = optimize_subgraph(
158
+ relevance_scores=top_scores,
159
+ adjacency=adj,
160
+ K=top_k,
161
+ alpha=cfg["alpha"],
162
+ beta_conn=cfg["beta_conn"],
163
+ gamma_cov=cfg["gamma_cov"],
164
+ grid_size=6,
165
+ shots=cfg.get("shots", 4096),
166
+ p_layers=cfg.get("p_layers", 2),
167
+ )
168
+ selection_raw = result.get("selection", [])
169
+ opt_method = result.get("method", "unknown")
170
+
171
+ # Map capped indices back
172
+ selection = [top_indices[s] for s in selection_raw]
173
+
174
+ # Build ranked list: QAOA picks first, then remaining in cosine order
175
+ sel_set = set(selection)
176
+ ranked = list(selection)
177
+ for i in cos_ranking:
178
+ if len(ranked) >= n_sessions: break
179
+ if i not in sel_set:
180
+ ranked.append(i)
181
+
182
+ r[method_name] = {
183
+ "r1": float(recall_at_k(ranked, gold_indices, 1)),
184
+ "r5": float(recall_at_k(ranked, gold_indices, 5)),
185
+ "r10": float(recall_at_k(ranked, gold_indices, 10)),
186
+ "ndcg": float(ndcg_at_k(ranked, gold_indices, 10)),
187
+ "method": opt_method,
188
+ "n_capped": len(top_indices),
189
+ "score": float(result.get("score", 0)),
190
+ "optimal_score": float(result.get("optimal", {}).get("score", 0)),
191
+ "time": time.time() - t0,
192
+ }
193
+
194
+ # Determine winner between QAOA and cosine
195
+ q_r5 = r.get("qaoa_cvar", {}).get("r5", 0)
196
+ c_r5 = r["cosine"]["r5"]
197
+ if q_r5 > c_r5:
198
+ r["stage2_winner"] = "qaoa_cvar"
199
+ count_qaoa_won += 1
200
+ elif c_r5 > q_r5:
201
+ r["stage2_winner"] = "cosine"
202
+ count_greedy_won += 1
203
+ else:
204
+ r["stage2_winner"] = "tie"
205
+ count_tied += 1
206
+
207
+ if r.get("qaoa_cvar", {}).get("method") == "qaoa":
208
+ count_qaoa_runs += 1
209
+
210
+ except Exception as e:
211
+ import traceback
212
+ r["stage2_error"] = "%s: %s" % (type(e).__name__, e)
213
+ r["stage2_traceback"] = traceback.format_exc()
214
+
215
+ results.append(r)
216
+
217
+ # Progress
218
+ if (idx+1) % 5 == 0:
219
+ elapsed = time.time() - T_START
220
+ effective = [rr for rr in results if not rr.get("skip")]
221
+ if effective:
222
+ c_r5_avg = np.mean([rr["cosine"]["r5"] for rr in effective]) * 100
223
+ q_r5_avg = np.mean([rr.get("qaoa_cvar", {}).get("r5", 0) for rr in effective if "qaoa_cvar" in rr]) * 100
224
+ q_wins = sum(1 for rr in effective if rr.get("stage2_winner") == "qaoa_cvar")
225
+ print("[%d/%d] %.0fs | cos_r5=%.1f%% | qaoa_r5=%.1f%% | qaoa_wins=%d" % (
226
+ idx+1, n_questions, elapsed, c_r5_avg, q_r5_avg, q_wins), flush=True)
227
+
228
+ # Summary
229
+ effective = [r for r in results if not r.get("skip")]
230
+ n_eff = len(effective)
231
+
232
+ print("\n" + "=" * 80, flush=True)
233
+ print("LONGMEMEVAL TWO-STAGE — %s" % datetime.now(timezone.utc).isoformat(), flush=True)
234
+ print("Questions: %d effective (%d skipped)" % (n_eff, n_questions - n_eff), flush=True)
235
+ print("Max candidates: %d, Target K: %d" % (max_candidates, top_k), flush=True)
236
+ print()
237
+
238
+ # Stage 1: Pure cosine
239
+ cos_items = [r for r in effective if "cosine" in r]
240
+ if cos_items:
241
+ cos = {
242
+ "r1": np.mean([r["cosine"]["r1"] for r in cos_items]) * 100,
243
+ "r5": np.mean([r["cosine"]["r5"] for r in cos_items]) * 100,
244
+ "r10": np.mean([r["cosine"]["r10"] for r in cos_items]) * 100,
245
+ "ndcg": np.mean([r["cosine"]["ndcg"] for r in cos_items]),
246
+ }
247
+ print("--- STAGE 1: COSINE BASELINE ---")
248
+ print(" R@1: %.1f%%" % cos["r1"])
249
+ print(" R@5: %.1f%%" % cos["r5"])
250
+ print(" R@10: %.1f%%" % cos["r10"])
251
+ print(" NDCG: %.4f" % cos["ndcg"])
252
+ print()
253
+
254
+ # Stage 2: QAOA+CVaR refinement
255
+ qaoa_items = [r for r in effective if "qaoa_cvar" in r]
256
+ if qaoa_items:
257
+ qaoa = {
258
+ "r1": np.mean([r["qaoa_cvar"]["r1"] for r in qaoa_items]) * 100,
259
+ "r5": np.mean([r["qaoa_cvar"]["r5"] for r in qaoa_items]) * 100,
260
+ "r10": np.mean([r["qaoa_cvar"]["r10"] for r in qaoa_items]) * 100,
261
+ "ndcg": np.mean([r["qaoa_cvar"]["ndcg"] for r in qaoa_items]),
262
+ }
263
+ print("--- STAGE 2: COSINE + QAOA REFINEMENT ---")
264
+ print(" R@1: %.1f%%" % qaoa["r1"])
265
+ print(" R@5: %.1f%%" % qaoa["r5"])
266
+ print(" R@10: %.1f%%" % qaoa["r10"])
267
+ print(" NDCG: %.4f" % qaoa["ndcg"])
268
+ print()
269
+
270
+ # Greedy subgraph baseline
271
+ greedy_items = [r for r in effective if "greedy_subgraph" in r]
272
+ if greedy_items:
273
+ greedy = {
274
+ "r1": np.mean([r["greedy_subgraph"]["r1"] for r in greedy_items]) * 100,
275
+ "r5": np.mean([r["greedy_subgraph"]["r5"] for r in greedy_items]) * 100,
276
+ "r10": np.mean([r["greedy_subgraph"]["r10"] for r in greedy_items]) * 100,
277
+ "ndcg": np.mean([r["greedy_subgraph"]["ndcg"] for r in greedy_items]),
278
+ }
279
+ print("--- BASELINE: COSINE + GREEDY SUBGRAPH ---")
280
+ print(" R@1: %.1f%%" % greedy["r1"])
281
+ print(" R@5: %.1f%%" % greedy["r5"])
282
+ print(" R@10: %.1f%%" % greedy["r10"])
283
+ print(" NDCG: %.4f" % greedy["ndcg"])
284
+ print()
285
+
286
+ # Head-to-head: QAOA vs Cosine
287
+ print("--- HEAD-TO-HEAD (QAOA refinement vs pure cosine) ---")
288
+ print(" Questions where QAOA refinement WINS: %d (%.1f%%)" % (count_qaoa_won, count_qaoa_won/n_eff*100))
289
+ print(" Questions where cosine alone WINS: %d (%.1f%%)" % (count_greedy_won, count_greedy_won/n_eff*100))
290
+ print(" Ties: %d (%.1f%%)" % (count_tied, count_tied/n_eff*100))
291
+ print(" QAOA optimizer ran (%d/%d)" % (count_qaoa_runs, n_eff))
292
+ print()
293
+
294
+ # Delta vs baseline
295
+ if qaoa_items and cos_items:
296
+ delta_r1 = qaoa["r1"] - cos["r1"]
297
+ delta_r5 = qaoa["r5"] - cos["r5"]
298
+ delta_r10 = qaoa["r10"] - cos["r10"]
299
+ print("--- DELTA (stage2 - stage1) ---")
300
+ print(" R@1: %+.1f%%" % delta_r1)
301
+ print(" R@5: %+.1f%%" % delta_r5)
302
+ print(" R@10: %+.1f%%" % delta_r10)
303
+ print()
304
+
305
+ total_t = time.time() - T_START
306
+ print("Total: %.0fs (%.1f min)" % (total_t, total_t/60), flush=True)
307
+ print("=" * 80, flush=True)
308
+
309
+ # Save JSON
310
+ with open(RESULTS_FILE, "w") as f:
311
+ json.dump({
312
+ "timestamp": datetime.now(timezone.utc).isoformat(),
313
+ "n_total": len(data),
314
+ "config": {"max_candidates": max_candidates, "top_k": top_k},
315
+ "cosine": cos if cos_items else {},
316
+ "qaoa_cvar": qaoa if qaoa_items else {},
317
+ "greedy_subgraph": greedy if greedy_items else {},
318
+ "count_qaoa_won": count_qaoa_won,
319
+ "count_cosine_won": count_greedy_won,
320
+ "count_tied": count_tied,
321
+ "count_qaoa_runs": count_qaoa_runs,
322
+ "results": results,
323
+ }, f, indent=2, default=str)
324
+ print("Saved JSON to %s" % RESULTS_FILE, flush=True)
325
+
326
+ # Save CSV
327
+ with open(CSV_FILE, "w", newline="") as f:
328
+ w = csv.writer(f)
329
+ w.writerow([
330
+ "idx","n","ngold",
331
+ "cr1","cr5","cr10","cndcg",
332
+ "qr1","qr5","qr10","qndcg","qmethod",
333
+ "gr1","gr5","gr10","gndcg","gmethod",
334
+ "winner"
335
+ ])
336
+ for r in results:
337
+ if r.get("skip"): continue
338
+ def g(d, key): return d.get(key, "") if d else ""
339
+ w.writerow([
340
+ r["idx"], r["n_sessions"], r["n_gold"],
341
+ g(r.get("cosine"), "r1"), g(r.get("cosine"), "r5"),
342
+ g(r.get("cosine"), "r10"), g(r.get("cosine"), "ndcg"),
343
+ g(r.get("qaoa_cvar"), "r1"), g(r.get("qaoa_cvar"), "r5"),
344
+ g(r.get("qaoa_cvar"), "r10"), g(r.get("qaoa_cvar"), "ndcg"),
345
+ g(r.get("qaoa_cvar"), "method"),
346
+ g(r.get("greedy_subgraph"), "r1"), g(r.get("greedy_subgraph"), "r5"),
347
+ g(r.get("greedy_subgraph"), "r10"), g(r.get("greedy_subgraph"), "ndcg"),
348
+ g(r.get("greedy_subgraph"), "method"),
349
+ r.get("stage2_winner", "?"),
350
+ ])
351
+ print("Saved CSV to %s" % CSV_FILE, flush=True)
352
+
353
+
354
+ if __name__ == "__main__":
355
+ main()
@@ -7,7 +7,7 @@ then QAOA to find the optimal subgraph for any query.
7
7
  Copyright 2026 Coinkong (Chef's Attraction). MIT License.
8
8
  """
9
9
 
10
- __version__ = "0.4.0"
10
+ __version__ = "1.2.0"
11
11
 
12
12
  from .graph import MemoryGraph
13
13
  from .subgraph_optimizer import optimize_subgraph
@@ -1,15 +1,25 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: quantum-memory-graph
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Summary: Quantum-optimized knowledge graph memory for AI agents. Relationship-aware subgraph selection via QAOA.
5
5
  Home-page: https://github.com/Dustin-a11y/quantum-memory-graph
6
6
  Author: Coinkong (Chef's Attraction)
7
7
  License: MIT
8
+ Project-URL: Source Code, https://github.com/Dustin-a11y/quantum-memory-graph
9
+ Project-URL: Issue Tracker, https://github.com/Dustin-a11y/quantum-memory-graph/issues
10
+ Project-URL: Benchmark Results, https://github.com/Dustin-a11y/quantum-memory-graph/tree/main/benchmarks
11
+ Project-URL: LongMemEval Submission, https://github.com/xiaowu0162/LongMemEval/issues
8
12
  Keywords: quantum,memory,knowledge-graph,agents,qaoa,ai
9
13
  Classifier: Development Status :: 4 - Beta
10
14
  Classifier: Intended Audience :: Developers
11
15
  Classifier: License :: OSI Approved :: MIT License
12
16
  Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
23
  Requires-Python: >=3.9
14
24
  Description-Content-Type: text/markdown
15
25
  License-File: LICENSE
@@ -37,9 +47,9 @@ Every memory system treats memories as independent documents — search, rank, s
37
47
 
38
48
  Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
39
49
 
40
- ## Benchmark: MemCombine
50
+ ## Benchmark: MemCombine (Internal — Memory Combination)
41
51
 
42
- We created MemCombine to test what no existing benchmark measures — **memory combination quality**.
52
+ MemCombine tests what no existing benchmark measures — **memory combination quality**, where QAOA graph selection finds coherent subsets that embedding similarity misses.
43
53
 
44
54
  | Method | Coverage | Evidence Recall | F1 | Perfect |
45
55
  |--------|----------|----------------|----|---------|
@@ -48,6 +58,13 @@ We created MemCombine to test what no existing benchmark measures — **memory c
48
58
  | **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
49
59
 
50
60
  When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
61
+
62
+ > **How to read this table:** The R@5/R@10 numbers are driven by QMG's chunked
63
+ > embedding retrieval pipeline (Stage 1: gte-large, 500-char chunks, mean-of-top-3
64
+ > scoring). QAOA (Stage 2) refines the top-14 candidates for relationship-aware
65
+ > selection — its advantage shows up in MemCombine (combination quality) rather
66
+ > than raw recall rank. The pipeline as a whole achieves #1.
67
+
51
68
  ## 🏆 #1 on LongMemEval (ICLR 2025 Benchmark)
52
69
 
53
70
  Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) for long-term memory in AI agents:
@@ -57,7 +74,7 @@ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
57
74
  | OMEGA (prev SOTA) | — | 89.2% | 94.1% | 87.5% |
58
75
  | Mastra OM | — | 91.0% | 95.2% | 89.1% |
59
76
  | **QMG v1.1 (published #1)** | — | **95.8%** | **98.85%** | **93.2%** |
60
- | **QMG v1.2 (official, this repo)** 🏆 | **90.6%** | **98.6%** | **99.4%** | **0.9426** |
77
+ | **QMG v1.2 chunked retrieval pipeline** 🏆 | **90.6%** | **98.6%** | **99.4%** | **94.26%** |
61
78
 
62
79
  **Benchmark run:** 500 questions, chunked gte-large embeddings (500-char blocks, 100-char overlap, mean-of-top-3 session scoring). Verified on DGX Spark GB10 (CUDA, ~53 min).
63
80
 
@@ -65,7 +82,6 @@ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
65
82
 
66
83
  **See:** `benchmarks/run_longmemeval_chunked_staged.py` for the exact benchmark code, `benchmarks/longmemeval_chunked_staged_results.json` for full per-question results.
67
84
 
68
-
69
85
  ## Install
70
86
 
71
87
  ```bash
@@ -18,6 +18,8 @@ benchmarks/run_final.py
18
18
  benchmarks/run_full_benchmark.py
19
19
  benchmarks/run_full_benchmark_v2.py
20
20
  benchmarks/run_longmemeval_chunked_staged.py
21
+ benchmarks/run_longmemeval_cvar_v2.py
22
+ benchmarks/run_longmemeval_staged.py
21
23
  quantum_memory_graph/__init__.py
22
24
  quantum_memory_graph/__main__.py
23
25
  quantum_memory_graph/api.py
@@ -1,18 +1,29 @@
1
1
  [metadata]
2
2
  name = quantum-memory-graph
3
- version = 1.2.0
3
+ version = 1.2.1
4
4
  description = Quantum-optimized knowledge graph memory for AI agents. Relationship-aware subgraph selection via QAOA.
5
5
  long_description = file: README.md
6
6
  long_description_content_type = text/markdown
7
7
  author = Coinkong (Chef's Attraction)
8
8
  license = MIT
9
9
  url = https://github.com/Dustin-a11y/quantum-memory-graph
10
+ project_urls =
11
+ Source Code = https://github.com/Dustin-a11y/quantum-memory-graph
12
+ Issue Tracker = https://github.com/Dustin-a11y/quantum-memory-graph/issues
13
+ Benchmark Results = https://github.com/Dustin-a11y/quantum-memory-graph/tree/main/benchmarks
14
+ LongMemEval Submission = https://github.com/xiaowu0162/LongMemEval/issues
10
15
  keywords = quantum, memory, knowledge-graph, agents, qaoa, ai
11
16
  classifiers =
12
17
  Development Status :: 4 - Beta
13
18
  Intended Audience :: Developers
14
19
  License :: OSI Approved :: MIT License
15
20
  Programming Language :: Python :: 3
21
+ Programming Language :: Python :: 3.9
22
+ Programming Language :: Python :: 3.10
23
+ Programming Language :: Python :: 3.11
24
+ Programming Language :: Python :: 3.12
25
+ Programming Language :: Python :: 3.13
26
+ Topic :: Scientific/Engineering :: Artificial Intelligence
16
27
 
17
28
  [options]
18
29
  packages = find: