quantum-memory-graph 1.2.0__tar.gz → 1.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {quantum_memory_graph-1.2.0/quantum_memory_graph.egg-info → quantum_memory_graph-1.2.2}/PKG-INFO +14 -24
  2. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/README.md +3 -22
  3. quantum_memory_graph-1.2.2/benchmarks/run_longmemeval_cvar_v2.py +272 -0
  4. quantum_memory_graph-1.2.2/benchmarks/run_longmemeval_staged.py +355 -0
  5. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/__init__.py +1 -1
  6. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/pipeline.py +81 -14
  7. quantum_memory_graph-1.2.2/quantum_memory_graph/synergy_reranker.py +133 -0
  8. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2/quantum_memory_graph.egg-info}/PKG-INFO +14 -24
  9. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/SOURCES.txt +3 -1
  10. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/requires.txt +0 -1
  11. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/setup.cfg +12 -2
  12. quantum_memory_graph-1.2.0/benchmarks/memcombine.py +0 -236
  13. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/LICENSE +0 -0
  14. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/__init__.py +0 -0
  15. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/data_collector.py +0 -0
  16. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/fast_longmemeval.py +0 -0
  17. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/generate_scenarios.py +0 -0
  18. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench.py +0 -0
  19. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v2.py +0 -0
  20. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v3.py +0 -0
  21. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v4.py +0 -0
  22. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v5.py +0 -0
  23. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v6.py +0 -0
  24. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v7.py +0 -0
  25. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/run_final.py +0 -0
  26. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/run_full_benchmark.py +0 -0
  27. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/run_full_benchmark_v2.py +0 -0
  28. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/run_longmemeval_chunked_staged.py +0 -0
  29. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/pyproject.toml +0 -0
  30. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/__main__.py +0 -0
  31. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/api.py +0 -0
  32. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/graph.py +0 -0
  33. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/pce_optimizer.py +0 -0
  34. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/recency.py +0 -0
  35. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/subgraph_optimizer.py +0 -0
  36. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/dependency_links.txt +0 -0
  37. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/top_level.txt +0 -0
  38. {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/tests/test_full_pipeline.py +0 -0
@@ -1,19 +1,28 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: quantum-memory-graph
3
- Version: 1.2.0
3
+ Version: 1.2.2
4
4
  Summary: Quantum-optimized knowledge graph memory for AI agents. Relationship-aware subgraph selection via QAOA.
5
5
  Home-page: https://github.com/Dustin-a11y/quantum-memory-graph
6
6
  Author: Coinkong (Chef's Attraction)
7
7
  License: MIT
8
+ Project-URL: Source Code, https://github.com/Dustin-a11y/quantum-memory-graph
9
+ Project-URL: Issue Tracker, https://github.com/Dustin-a11y/quantum-memory-graph/issues
10
+ Project-URL: Benchmark Results, https://github.com/Dustin-a11y/quantum-memory-graph/tree/main/benchmarks
11
+ Project-URL: LongMemEval Submission, https://github.com/xiaowu0162/LongMemEval/issues
8
12
  Keywords: quantum,memory,knowledge-graph,agents,qaoa,ai
9
13
  Classifier: Development Status :: 4 - Beta
10
14
  Classifier: Intended Audience :: Developers
11
15
  Classifier: License :: OSI Approved :: MIT License
12
16
  Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
23
  Requires-Python: >=3.9
14
24
  Description-Content-Type: text/markdown
15
25
  License-File: LICENSE
16
- Requires-Dist: quantum-agent-memory>=0.1.0
17
26
  Requires-Dist: sentence-transformers>=2.2.0
18
27
  Requires-Dist: networkx>=3.0
19
28
  Requires-Dist: numpy>=1.24.0
@@ -35,29 +44,16 @@ Dynamic: license-file
35
44
 
36
45
  Every memory system treats memories as independent documents — search, rank, stuff into context. But memories aren't independent. They have *relationships*. "The team chose React" becomes 10x more useful paired with "because of ecosystem maturity" and "FastAPI handles the backend."
37
46
 
38
- Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
39
-
40
- ## Benchmark: MemCombine
41
-
42
- We created MemCombine to test what no existing benchmark measures — **memory combination quality**.
43
-
44
- | Method | Coverage | Evidence Recall | F1 | Perfect |
45
- |--------|----------|----------------|----|---------|
46
- | Embedding Top-K | 69.9% | 65.6% | 68.1% | 1/5 |
47
- | **Graph + QAOA** | **96.7%** | **91.0%** | **92.6%** | **4/5** |
48
- | **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
49
-
50
- When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
51
47
  ## 🏆 #1 on LongMemEval (ICLR 2025 Benchmark)
52
48
 
53
- Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) for long-term memory in AI agents:
49
+ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) [verified submission](https://github.com/xiaowu0162/LongMemEval/issues/46).
54
50
 
55
51
  | Method | R@1 | R@5 | R@10 | NDCG@10 |
56
52
  |--------|:---:|:---:|:----:|:-------:|
57
53
  | OMEGA (prev SOTA) | — | 89.2% | 94.1% | 87.5% |
58
54
  | Mastra OM | — | 91.0% | 95.2% | 89.1% |
59
55
  | **QMG v1.1 (published #1)** | — | **95.8%** | **98.85%** | **93.2%** |
60
- | **QMG v1.2 (official, this repo)** 🏆 | **90.6%** | **98.6%** | **99.4%** | **0.9426** |
56
+ | **QMG v1.2 chunked retrieval pipeline** 🏆 | **90.6%** | **98.6%** | **99.4%** | **94.26%** |
61
57
 
62
58
  **Benchmark run:** 500 questions, chunked gte-large embeddings (500-char blocks, 100-char overlap, mean-of-top-3 session scoring). Verified on DGX Spark GB10 (CUDA, ~53 min).
63
59
 
@@ -65,7 +61,6 @@ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
65
61
 
66
62
  **See:** `benchmarks/run_longmemeval_chunked_staged.py` for the exact benchmark code, `benchmarks/longmemeval_chunked_staged_results.json` for full per-question results.
67
63
 
68
-
69
64
  ## Install
70
65
 
71
66
  ```bash
@@ -191,10 +186,7 @@ result = recall(
191
186
  )
192
187
  ```
193
188
 
194
- ### Run MemCombine Benchmark
195
-
196
189
  ```python
197
- from benchmarks.memcombine import run_benchmark
198
190
 
199
191
  def my_recall(memories, query, K):
200
192
  # Your recall implementation
@@ -227,8 +219,6 @@ Validated on `ibm_fez` and `ibm_kingston` backends.
227
219
 
228
220
  MIT License — Copyright 2026 Coinkong (Chef's Attraction)
229
221
 
230
-
231
222
  ## Links
232
223
 
233
- - [quantum-agent-memory](https://github.com/Dustin-a11y/quantum-agent-memory) — The QAOA optimization engine
234
- - [MemCombine Benchmark](benchmarks/memcombine.py) — Test memory combination quality
224
+ - [GitHub](https://github.com/Dustin-a11y/quantum-memory-graph) — Source code and benchmarks
@@ -4,29 +4,16 @@
4
4
 
5
5
  Every memory system treats memories as independent documents — search, rank, stuff into context. But memories aren't independent. They have *relationships*. "The team chose React" becomes 10x more useful paired with "because of ecosystem maturity" and "FastAPI handles the backend."
6
6
 
7
- Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
8
-
9
- ## Benchmark: MemCombine
10
-
11
- We created MemCombine to test what no existing benchmark measures — **memory combination quality**.
12
-
13
- | Method | Coverage | Evidence Recall | F1 | Perfect |
14
- |--------|----------|----------------|----|---------|
15
- | Embedding Top-K | 69.9% | 65.6% | 68.1% | 1/5 |
16
- | **Graph + QAOA** | **96.7%** | **91.0%** | **92.6%** | **4/5** |
17
- | **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
18
-
19
- When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
20
7
  ## 🏆 #1 on LongMemEval (ICLR 2025 Benchmark)
21
8
 
22
- Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) for long-term memory in AI agents:
9
+ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) [verified submission](https://github.com/xiaowu0162/LongMemEval/issues/46).
23
10
 
24
11
  | Method | R@1 | R@5 | R@10 | NDCG@10 |
25
12
  |--------|:---:|:---:|:----:|:-------:|
26
13
  | OMEGA (prev SOTA) | — | 89.2% | 94.1% | 87.5% |
27
14
  | Mastra OM | — | 91.0% | 95.2% | 89.1% |
28
15
  | **QMG v1.1 (published #1)** | — | **95.8%** | **98.85%** | **93.2%** |
29
- | **QMG v1.2 (official, this repo)** 🏆 | **90.6%** | **98.6%** | **99.4%** | **0.9426** |
16
+ | **QMG v1.2 chunked retrieval pipeline** 🏆 | **90.6%** | **98.6%** | **99.4%** | **94.26%** |
30
17
 
31
18
  **Benchmark run:** 500 questions, chunked gte-large embeddings (500-char blocks, 100-char overlap, mean-of-top-3 session scoring). Verified on DGX Spark GB10 (CUDA, ~53 min).
32
19
 
@@ -34,7 +21,6 @@ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
34
21
 
35
22
  **See:** `benchmarks/run_longmemeval_chunked_staged.py` for the exact benchmark code, `benchmarks/longmemeval_chunked_staged_results.json` for full per-question results.
36
23
 
37
-
38
24
  ## Install
39
25
 
40
26
  ```bash
@@ -160,10 +146,7 @@ result = recall(
160
146
  )
161
147
  ```
162
148
 
163
- ### Run MemCombine Benchmark
164
-
165
149
  ```python
166
- from benchmarks.memcombine import run_benchmark
167
150
 
168
151
  def my_recall(memories, query, K):
169
152
  # Your recall implementation
@@ -196,8 +179,6 @@ Validated on `ibm_fez` and `ibm_kingston` backends.
196
179
 
197
180
  MIT License — Copyright 2026 Coinkong (Chef's Attraction)
198
181
 
199
-
200
182
  ## Links
201
183
 
202
- - [quantum-agent-memory](https://github.com/Dustin-a11y/quantum-agent-memory) — The QAOA optimization engine
203
- - [MemCombine Benchmark](benchmarks/memcombine.py) — Test memory combination quality
184
+ - [GitHub](https://github.com/Dustin-a11y/quantum-memory-graph) — Source code and benchmarks
@@ -0,0 +1,272 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ LongMemEval 500-question Benchmark — QMG CVaR subgraph optimizer.
4
+
5
+ Routes each question through the QMG subgraph optimizer on Spark.
6
+ Measures recall@K against gold answer sessions.
7
+
8
+ Usage:
9
+ python3 -u run_longmemeval_cvar.py --limit 5 # Quick test
10
+ python3 -u run_longmemeval_cvar.py --force # Full 500
11
+ python3 -u run_longmemeval_cvar.py --fast # Skip QMG, cosine only
12
+
13
+ Output: JSON results + CSV saved to benchmarks/ directory.
14
+ """
15
+ import json, time, math, sys, os, argparse, csv
16
+ from datetime import datetime, timezone
17
+ import numpy as np
18
+
19
+ DATA_PATH = "/home/dt/projects-shared/LongMemEval/data/longmemeval_s_cleaned.json"
20
+ RESULTS_DIR = "/home/dt/qmg-v1/benchmarks"
21
+ RESULTS_FILE = os.path.join(RESULTS_DIR, "longmemeval_cvar_results.json")
22
+ CSV_FILE = os.path.join(RESULTS_DIR, "longmemeval_cvar_results.csv")
23
+
24
+ T_START = time.time()
25
+
26
+ def flatten_session(session):
27
+ if isinstance(session, str): return session
28
+ if isinstance(session, list):
29
+ parts = []
30
+ for turn in session:
31
+ if isinstance(turn, dict):
32
+ parts.append(f"{turn.get('role','')}: {turn.get('content', turn.get('text', str(turn)))}")
33
+ else:
34
+ parts.append(str(turn))
35
+ return "\n".join(parts)
36
+ return str(session)
37
+
38
+ def load_data(path, limit=None):
39
+ with open(path) as f: data = json.load(f)
40
+ if not isinstance(data, list):
41
+ for k in ["data","questions","items","results"]:
42
+ if k in data: data = data[k]; break
43
+ if limit: data = data[:limit]
44
+ return data
45
+
46
+ def recall_at_k(ranked, gold, K):
47
+ gold_set = set(gold)
48
+ if not gold_set: return 1.0
49
+ return 1.0 if set(ranked[:K]) & gold_set else 0.0
50
+
51
+ def ndcg_at_k(ranked, gold, K):
52
+ gold_set = set(gold)
53
+ if not gold_set: return 1.0
54
+ dcg = sum(1.0/math.log2(i+2) for i,idx in enumerate(ranked[:K]) if idx in gold_set)
55
+ idcg = sum(1.0/math.log2(i+2) for i in range(min(len(gold_set), K)))
56
+ return dcg/idcg if idcg>0 else 0.0
57
+
58
+ def main():
59
+ parser = argparse.ArgumentParser()
60
+ parser.add_argument("--limit", type=int, default=None)
61
+ parser.add_argument("--fast", action="store_true", help="Skip QMG, cosine only")
62
+ parser.add_argument("--force", action="store_true", help="Run full 500")
63
+ args = parser.parse_args()
64
+
65
+ data = load_data(DATA_PATH)
66
+ print(f"Loaded {len(data)} questions", flush=True)
67
+
68
+ limit = args.limit
69
+ if args.force: limit = None
70
+ if limit: data = data[:limit]
71
+
72
+ from sentence_transformers import SentenceTransformer
73
+ import torch
74
+ device = "cuda" if torch.cuda.is_available() else "cpu"
75
+ print(f"Loading gte-large on {device}...", flush=True)
76
+ model = SentenceTransformer("thenlper/gte-large", device=device)
77
+ dim = model.get_sentence_embedding_dimension()
78
+ print(f"Model loaded, dim={dim}", flush=True)
79
+
80
+ results = []
81
+ n_questions = len(data)
82
+
83
+ for idx, item in enumerate(data):
84
+ question = item.get("question", item.get("query", ""))
85
+ haystack = item.get("haystack_sessions", item.get("sessions", item.get("corpus", [])))
86
+ haystack_ids = item.get("haystack_session_ids", item.get("session_ids", []))
87
+ answer_ids = item.get("answer_session_ids", item.get("answer_ids", []))
88
+
89
+ gold_indices = []
90
+ for g in answer_ids:
91
+ try: gold_indices.append(haystack_ids.index(g))
92
+ except ValueError: pass
93
+
94
+ if not gold_indices or len(haystack) < 3:
95
+ results.append({"idx": idx, "skip": True, "reason": "no_gold_or_too_few"})
96
+ continue
97
+
98
+ texts = [flatten_session(s) for s in haystack]
99
+
100
+ # Encode
101
+ t0 = time.time()
102
+ all_texts = [question] + texts
103
+ embs = model.encode(all_texts, normalize_embeddings=True, batch_size=32, show_progress_bar=False)
104
+ q_emb = embs[0]
105
+ sess_embs = embs[1:]
106
+ encode_time = time.time() - t0
107
+
108
+ n_sessions = len(sess_embs)
109
+ K_target = min(5, n_sessions)
110
+
111
+ # Cosine baseline
112
+ t0 = time.time()
113
+ cos_scores = q_emb @ sess_embs.T
114
+ cos_ranked = np.argsort(cos_scores)[::-1].tolist()
115
+ cos_time = time.time() - t0
116
+
117
+ r = {
118
+ "idx": idx,
119
+ "question": question[:120],
120
+ "n_sessions": n_sessions,
121
+ "n_gold": len(gold_indices),
122
+ "cosine": {
123
+ "r1": float(recall_at_k(cos_ranked, gold_indices, 1)),
124
+ "r5": float(recall_at_k(cos_ranked, gold_indices, 5)),
125
+ "r10": float(recall_at_k(cos_ranked, gold_indices, 10)),
126
+ "ndcg": float(ndcg_at_k(cos_ranked, gold_indices, 10)),
127
+ "time": cos_time,
128
+ }
129
+ }
130
+
131
+ # QMG CVaR optimizer — two configs
132
+ if not args.fast:
133
+ t0 = time.time()
134
+ try:
135
+ sys.path.insert(0, "/home/dt/qmg-v1")
136
+ from quantum_memory_graph.subgraph_optimizer import optimize_subgraph
137
+
138
+ # Build adjacency from session embeddings (cosine similarity matrix)
139
+ adj = sess_embs @ sess_embs.T
140
+ np.fill_diagonal(adj, 0.0)
141
+
142
+ for cfg_name, cfg in [
143
+ ("default", {"alpha": 0.4, "beta_conn": 0.35, "gamma_cov": 0.25, "shots": 4096}),
144
+ ("retrieval", {"alpha": 1.0, "beta_conn": 0.0, "gamma_cov": 0.0, "shots": 4096}),
145
+ ]:
146
+ # Cap candidates at 14 for QAOA to avoid memory OOM
147
+ # (2^14 = 16K complex numbers, 2^40 = 17TB)
148
+ top_indices = np.argsort(cos_scores)[::-1][:14]
149
+ top_scores = cos_scores[top_indices]
150
+ top_adj = adj[np.ix_(top_indices, top_indices)]
151
+
152
+ result = optimize_subgraph(
153
+ relevance_scores=top_scores,
154
+ adjacency=top_adj,
155
+ K=K_target,
156
+ alpha=cfg["alpha"],
157
+ beta_conn=cfg["beta_conn"],
158
+ gamma_cov=cfg["gamma_cov"],
159
+ grid_size=6,
160
+ shots=cfg["shots"],
161
+ p_layers=2,
162
+ )
163
+ selection_raw = result.get("selection", [])
164
+ method = result.get("method", "unknown")
165
+
166
+ # Map capped indices back to original indices
167
+ selection = [top_indices[s] for s in selection_raw]
168
+
169
+ sel_set = set(selection)
170
+ ranked = list(selection)
171
+ for i in range(n_sessions):
172
+ if len(ranked) >= n_sessions: break
173
+ if i not in sel_set: ranked.append(i)
174
+
175
+ r[cfg_name] = {
176
+ "r1": float(recall_at_k(ranked, gold_indices, 1)),
177
+ "r5": float(recall_at_k(ranked, gold_indices, 5)),
178
+ "r10": float(recall_at_k(ranked, gold_indices, 10)),
179
+ "ndcg": float(ndcg_at_k(ranked, gold_indices, 10)),
180
+ "method": method,
181
+ "n_capped": len(top_indices),
182
+ "score": float(result.get("score", 0)),
183
+ "optimal_score": float(result.get("optimal", {}).get("score", 0)),
184
+ "time": time.time() - t0,
185
+ }
186
+
187
+ except Exception as e:
188
+ import traceback
189
+ r["qmg_error"] = f"{type(e).__name__}: {e}"
190
+ r["qmg_traceback"] = traceback.format_exc()
191
+
192
+ r["total_qmg_time"] = time.time() - t0
193
+
194
+ results.append(r)
195
+
196
+ # Progress every 5 questions
197
+ if (idx+1) % 5 == 0:
198
+ elapsed = time.time() - T_START
199
+ effective = [rr for rr in results if not rr.get("skip")]
200
+ cos_done = [rr for rr in effective if "cosine" in rr]
201
+ if cos_done:
202
+ cos_r5_avg = np.mean([rr["cosine"]["r5"] for rr in cos_done]) * 100
203
+ print(f"[{idx+1}/{n_questions}] {elapsed:.0f}s cos_r5={cos_r5_avg:.1f}%", flush=True)
204
+
205
+ # Summary
206
+ effective = [r for r in results if not r.get("skip")]
207
+
208
+ cos_items = [r for r in effective if "cosine" in r]
209
+ print("\n" + "="*60, flush=True)
210
+ print(f"LONGMEMEVAL — {datetime.now(timezone.utc).isoformat()}", flush=True)
211
+ print(f"Questions: {len(effective)} effective ({len(results)-len(effective)} skipped)", flush=True)
212
+
213
+ if cos_items:
214
+ cos_r1 = np.mean([r["cosine"]["r1"] for r in cos_items])*100
215
+ cos_r5 = np.mean([r["cosine"]["r5"] for r in cos_items])*100
216
+ cos_r10 = np.mean([r["cosine"]["r10"] for r in cos_items])*100
217
+ cos_ndcg = np.mean([r["cosine"]["ndcg"] for r in cos_items])
218
+ print(f"\nCOSINE BASELINE:", flush=True)
219
+ print(f" R@1: {cos_r1:.1f}%", flush=True)
220
+ print(f" R@5: {cos_r5:.1f}%", flush=True)
221
+ print(f" R@10: {cos_r10:.1f}%", flush=True)
222
+ print(f" NDCG: {cos_ndcg:.4f}", flush=True)
223
+
224
+ for cfg_name in ["default", "retrieval"]:
225
+ items = [r for r in effective if cfg_name in r]
226
+ if items:
227
+ r1 = np.mean([r[cfg_name]["r1"] for r in items])*100
228
+ r5 = np.mean([r[cfg_name]["r5"] for r in items])*100
229
+ r10 = np.mean([r[cfg_name]["r10"] for r in items])*100
230
+ ndcg = np.mean([r[cfg_name]["ndcg"] for r in items])
231
+ methods = {}
232
+ for r in items:
233
+ m = r[cfg_name].get("method", "?")
234
+ methods.setdefault(m, []).append(r[cfg_name]["r5"])
235
+ avg_time = np.mean([r[cfg_name]["time"] for r in items])
236
+ print(f"\nQMG {cfg_name.upper()}:", flush=True)
237
+ print(f" R@1: {r1:.1f}%", flush=True)
238
+ print(f" R@5: {r5:.1f}%", flush=True)
239
+ print(f" R@10: {r10:.1f}%", flush=True)
240
+ print(f" NDCG: {ndcg:.4f}", flush=True)
241
+ print(f" Avg time: {avg_time:.1f}s", flush=True)
242
+ for m, vals in sorted(methods.items()):
243
+ print(f" {m}: {len(vals)}x R@5={np.mean(vals)*100:.1f}%", flush=True)
244
+
245
+ total_t = time.time() - T_START
246
+ print(f"\nTotal: {total_t:.0f}s ({total_t/60:.1f} min)", flush=True)
247
+ print("="*60, flush=True)
248
+
249
+ with open(RESULTS_FILE, "w") as f: json.dump({"timestamp": datetime.now(timezone.utc).isoformat(), "n_total": len(data), "results": results}, f, indent=2, default=str)
250
+ print(f"\nSaved to {RESULTS_FILE}", flush=True)
251
+
252
+ with open(CSV_FILE, "w", newline="") as f:
253
+ w = csv.writer(f)
254
+ w.writerow(["idx","n","ngold","cr1","cr5","cr10","cndcg",
255
+ "dr1","dr5","dr10","dndcg","dmethod",
256
+ "rr1","rr5","rr10","rndcg","rmethod"])
257
+ for r in results:
258
+ if r.get("skip"): continue
259
+ w.writerow([
260
+ r["idx"], r["n_sessions"], r["n_gold"],
261
+ r["cosine"]["r1"], r["cosine"]["r5"], r["cosine"]["r10"], r["cosine"]["ndcg"],
262
+ r.get("default", {}).get("r1"), r.get("default", {}).get("r5"),
263
+ r.get("default", {}).get("r10"), r.get("default", {}).get("ndcg"),
264
+ r.get("default", {}).get("method"),
265
+ r.get("retrieval", {}).get("r1"), r.get("retrieval", {}).get("r5"),
266
+ r.get("retrieval", {}).get("r10"), r.get("retrieval", {}).get("ndcg"),
267
+ r.get("retrieval", {}).get("method"),
268
+ ])
269
+ print(f"CSV saved to {CSV_FILE}", flush=True)
270
+
271
+ if __name__ == "__main__":
272
+ main()