quantum-memory-graph 1.2.0__tar.gz → 1.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quantum_memory_graph-1.2.0/quantum_memory_graph.egg-info → quantum_memory_graph-1.2.2}/PKG-INFO +14 -24
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/README.md +3 -22
- quantum_memory_graph-1.2.2/benchmarks/run_longmemeval_cvar_v2.py +272 -0
- quantum_memory_graph-1.2.2/benchmarks/run_longmemeval_staged.py +355 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/__init__.py +1 -1
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/pipeline.py +81 -14
- quantum_memory_graph-1.2.2/quantum_memory_graph/synergy_reranker.py +133 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2/quantum_memory_graph.egg-info}/PKG-INFO +14 -24
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/SOURCES.txt +3 -1
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/requires.txt +0 -1
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/setup.cfg +12 -2
- quantum_memory_graph-1.2.0/benchmarks/memcombine.py +0 -236
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/LICENSE +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/__init__.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/data_collector.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/fast_longmemeval.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/generate_scenarios.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v2.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v3.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v4.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v5.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v6.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/longmemeval_bench_v7.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/run_final.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/run_full_benchmark.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/run_full_benchmark_v2.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/benchmarks/run_longmemeval_chunked_staged.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/pyproject.toml +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/__main__.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/api.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/graph.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/pce_optimizer.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/recency.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph/subgraph_optimizer.py +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/dependency_links.txt +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/quantum_memory_graph.egg-info/top_level.txt +0 -0
- {quantum_memory_graph-1.2.0 → quantum_memory_graph-1.2.2}/tests/test_full_pipeline.py +0 -0
{quantum_memory_graph-1.2.0/quantum_memory_graph.egg-info → quantum_memory_graph-1.2.2}/PKG-INFO
RENAMED
|
@@ -1,19 +1,28 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: quantum-memory-graph
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: Quantum-optimized knowledge graph memory for AI agents. Relationship-aware subgraph selection via QAOA.
|
|
5
5
|
Home-page: https://github.com/Dustin-a11y/quantum-memory-graph
|
|
6
6
|
Author: Coinkong (Chef's Attraction)
|
|
7
7
|
License: MIT
|
|
8
|
+
Project-URL: Source Code, https://github.com/Dustin-a11y/quantum-memory-graph
|
|
9
|
+
Project-URL: Issue Tracker, https://github.com/Dustin-a11y/quantum-memory-graph/issues
|
|
10
|
+
Project-URL: Benchmark Results, https://github.com/Dustin-a11y/quantum-memory-graph/tree/main/benchmarks
|
|
11
|
+
Project-URL: LongMemEval Submission, https://github.com/xiaowu0162/LongMemEval/issues
|
|
8
12
|
Keywords: quantum,memory,knowledge-graph,agents,qaoa,ai
|
|
9
13
|
Classifier: Development Status :: 4 - Beta
|
|
10
14
|
Classifier: Intended Audience :: Developers
|
|
11
15
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
16
|
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
23
|
Requires-Python: >=3.9
|
|
14
24
|
Description-Content-Type: text/markdown
|
|
15
25
|
License-File: LICENSE
|
|
16
|
-
Requires-Dist: quantum-agent-memory>=0.1.0
|
|
17
26
|
Requires-Dist: sentence-transformers>=2.2.0
|
|
18
27
|
Requires-Dist: networkx>=3.0
|
|
19
28
|
Requires-Dist: numpy>=1.24.0
|
|
@@ -35,29 +44,16 @@ Dynamic: license-file
|
|
|
35
44
|
|
|
36
45
|
Every memory system treats memories as independent documents — search, rank, stuff into context. But memories aren't independent. They have *relationships*. "The team chose React" becomes 10x more useful paired with "because of ecosystem maturity" and "FastAPI handles the backend."
|
|
37
46
|
|
|
38
|
-
Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
|
|
39
|
-
|
|
40
|
-
## Benchmark: MemCombine
|
|
41
|
-
|
|
42
|
-
We created MemCombine to test what no existing benchmark measures — **memory combination quality**.
|
|
43
|
-
|
|
44
|
-
| Method | Coverage | Evidence Recall | F1 | Perfect |
|
|
45
|
-
|--------|----------|----------------|----|---------|
|
|
46
|
-
| Embedding Top-K | 69.9% | 65.6% | 68.1% | 1/5 |
|
|
47
|
-
| **Graph + QAOA** | **96.7%** | **91.0%** | **92.6%** | **4/5** |
|
|
48
|
-
| **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
|
|
49
|
-
|
|
50
|
-
When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
|
|
51
47
|
## 🏆 #1 on LongMemEval (ICLR 2025 Benchmark)
|
|
52
48
|
|
|
53
|
-
Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
|
|
49
|
+
Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) — [verified submission](https://github.com/xiaowu0162/LongMemEval/issues/46).
|
|
54
50
|
|
|
55
51
|
| Method | R@1 | R@5 | R@10 | NDCG@10 |
|
|
56
52
|
|--------|:---:|:---:|:----:|:-------:|
|
|
57
53
|
| OMEGA (prev SOTA) | — | 89.2% | 94.1% | 87.5% |
|
|
58
54
|
| Mastra OM | — | 91.0% | 95.2% | 89.1% |
|
|
59
55
|
| **QMG v1.1 (published #1)** | — | **95.8%** | **98.85%** | **93.2%** |
|
|
60
|
-
| **QMG v1.2
|
|
56
|
+
| **QMG v1.2 — chunked retrieval pipeline** 🏆 | **90.6%** | **98.6%** | **99.4%** | **94.26%** |
|
|
61
57
|
|
|
62
58
|
**Benchmark run:** 500 questions, chunked gte-large embeddings (500-char blocks, 100-char overlap, mean-of-top-3 session scoring). Verified on DGX Spark GB10 (CUDA, ~53 min).
|
|
63
59
|
|
|
@@ -65,7 +61,6 @@ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
|
|
|
65
61
|
|
|
66
62
|
**See:** `benchmarks/run_longmemeval_chunked_staged.py` for the exact benchmark code, `benchmarks/longmemeval_chunked_staged_results.json` for full per-question results.
|
|
67
63
|
|
|
68
|
-
|
|
69
64
|
## Install
|
|
70
65
|
|
|
71
66
|
```bash
|
|
@@ -191,10 +186,7 @@ result = recall(
|
|
|
191
186
|
)
|
|
192
187
|
```
|
|
193
188
|
|
|
194
|
-
### Run MemCombine Benchmark
|
|
195
|
-
|
|
196
189
|
```python
|
|
197
|
-
from benchmarks.memcombine import run_benchmark
|
|
198
190
|
|
|
199
191
|
def my_recall(memories, query, K):
|
|
200
192
|
# Your recall implementation
|
|
@@ -227,8 +219,6 @@ Validated on `ibm_fez` and `ibm_kingston` backends.
|
|
|
227
219
|
|
|
228
220
|
MIT License — Copyright 2026 Coinkong (Chef's Attraction)
|
|
229
221
|
|
|
230
|
-
|
|
231
222
|
## Links
|
|
232
223
|
|
|
233
|
-
- [
|
|
234
|
-
- [MemCombine Benchmark](benchmarks/memcombine.py) — Test memory combination quality
|
|
224
|
+
- [GitHub](https://github.com/Dustin-a11y/quantum-memory-graph) — Source code and benchmarks
|
|
@@ -4,29 +4,16 @@
|
|
|
4
4
|
|
|
5
5
|
Every memory system treats memories as independent documents — search, rank, stuff into context. But memories aren't independent. They have *relationships*. "The team chose React" becomes 10x more useful paired with "because of ecosystem maturity" and "FastAPI handles the backend."
|
|
6
6
|
|
|
7
|
-
Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
|
|
8
|
-
|
|
9
|
-
## Benchmark: MemCombine
|
|
10
|
-
|
|
11
|
-
We created MemCombine to test what no existing benchmark measures — **memory combination quality**.
|
|
12
|
-
|
|
13
|
-
| Method | Coverage | Evidence Recall | F1 | Perfect |
|
|
14
|
-
|--------|----------|----------------|----|---------|
|
|
15
|
-
| Embedding Top-K | 69.9% | 65.6% | 68.1% | 1/5 |
|
|
16
|
-
| **Graph + QAOA** | **96.7%** | **91.0%** | **92.6%** | **4/5** |
|
|
17
|
-
| **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
|
|
18
|
-
|
|
19
|
-
When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
|
|
20
7
|
## 🏆 #1 on LongMemEval (ICLR 2025 Benchmark)
|
|
21
8
|
|
|
22
|
-
Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
|
|
9
|
+
Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813) — [verified submission](https://github.com/xiaowu0162/LongMemEval/issues/46).
|
|
23
10
|
|
|
24
11
|
| Method | R@1 | R@5 | R@10 | NDCG@10 |
|
|
25
12
|
|--------|:---:|:---:|:----:|:-------:|
|
|
26
13
|
| OMEGA (prev SOTA) | — | 89.2% | 94.1% | 87.5% |
|
|
27
14
|
| Mastra OM | — | 91.0% | 95.2% | 89.1% |
|
|
28
15
|
| **QMG v1.1 (published #1)** | — | **95.8%** | **98.85%** | **93.2%** |
|
|
29
|
-
| **QMG v1.2
|
|
16
|
+
| **QMG v1.2 — chunked retrieval pipeline** 🏆 | **90.6%** | **98.6%** | **99.4%** | **94.26%** |
|
|
30
17
|
|
|
31
18
|
**Benchmark run:** 500 questions, chunked gte-large embeddings (500-char blocks, 100-char overlap, mean-of-top-3 session scoring). Verified on DGX Spark GB10 (CUDA, ~53 min).
|
|
32
19
|
|
|
@@ -34,7 +21,6 @@ Tested on the official [LongMemEval benchmark](https://arxiv.org/abs/2410.10813)
|
|
|
34
21
|
|
|
35
22
|
**See:** `benchmarks/run_longmemeval_chunked_staged.py` for the exact benchmark code, `benchmarks/longmemeval_chunked_staged_results.json` for full per-question results.
|
|
36
23
|
|
|
37
|
-
|
|
38
24
|
## Install
|
|
39
25
|
|
|
40
26
|
```bash
|
|
@@ -160,10 +146,7 @@ result = recall(
|
|
|
160
146
|
)
|
|
161
147
|
```
|
|
162
148
|
|
|
163
|
-
### Run MemCombine Benchmark
|
|
164
|
-
|
|
165
149
|
```python
|
|
166
|
-
from benchmarks.memcombine import run_benchmark
|
|
167
150
|
|
|
168
151
|
def my_recall(memories, query, K):
|
|
169
152
|
# Your recall implementation
|
|
@@ -196,8 +179,6 @@ Validated on `ibm_fez` and `ibm_kingston` backends.
|
|
|
196
179
|
|
|
197
180
|
MIT License — Copyright 2026 Coinkong (Chef's Attraction)
|
|
198
181
|
|
|
199
|
-
|
|
200
182
|
## Links
|
|
201
183
|
|
|
202
|
-
- [
|
|
203
|
-
- [MemCombine Benchmark](benchmarks/memcombine.py) — Test memory combination quality
|
|
184
|
+
- [GitHub](https://github.com/Dustin-a11y/quantum-memory-graph) — Source code and benchmarks
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
LongMemEval 500-question Benchmark — QMG CVaR subgraph optimizer.
|
|
4
|
+
|
|
5
|
+
Routes each question through the QMG subgraph optimizer on Spark.
|
|
6
|
+
Measures recall@K against gold answer sessions.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python3 -u run_longmemeval_cvar.py --limit 5 # Quick test
|
|
10
|
+
python3 -u run_longmemeval_cvar.py --force # Full 500
|
|
11
|
+
python3 -u run_longmemeval_cvar.py --fast # Skip QMG, cosine only
|
|
12
|
+
|
|
13
|
+
Output: JSON results + CSV saved to benchmarks/ directory.
|
|
14
|
+
"""
|
|
15
|
+
import json, time, math, sys, os, argparse, csv
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
DATA_PATH = "/home/dt/projects-shared/LongMemEval/data/longmemeval_s_cleaned.json"
|
|
20
|
+
RESULTS_DIR = "/home/dt/qmg-v1/benchmarks"
|
|
21
|
+
RESULTS_FILE = os.path.join(RESULTS_DIR, "longmemeval_cvar_results.json")
|
|
22
|
+
CSV_FILE = os.path.join(RESULTS_DIR, "longmemeval_cvar_results.csv")
|
|
23
|
+
|
|
24
|
+
T_START = time.time()
|
|
25
|
+
|
|
26
|
+
def flatten_session(session):
|
|
27
|
+
if isinstance(session, str): return session
|
|
28
|
+
if isinstance(session, list):
|
|
29
|
+
parts = []
|
|
30
|
+
for turn in session:
|
|
31
|
+
if isinstance(turn, dict):
|
|
32
|
+
parts.append(f"{turn.get('role','')}: {turn.get('content', turn.get('text', str(turn)))}")
|
|
33
|
+
else:
|
|
34
|
+
parts.append(str(turn))
|
|
35
|
+
return "\n".join(parts)
|
|
36
|
+
return str(session)
|
|
37
|
+
|
|
38
|
+
def load_data(path, limit=None):
|
|
39
|
+
with open(path) as f: data = json.load(f)
|
|
40
|
+
if not isinstance(data, list):
|
|
41
|
+
for k in ["data","questions","items","results"]:
|
|
42
|
+
if k in data: data = data[k]; break
|
|
43
|
+
if limit: data = data[:limit]
|
|
44
|
+
return data
|
|
45
|
+
|
|
46
|
+
def recall_at_k(ranked, gold, K):
|
|
47
|
+
gold_set = set(gold)
|
|
48
|
+
if not gold_set: return 1.0
|
|
49
|
+
return 1.0 if set(ranked[:K]) & gold_set else 0.0
|
|
50
|
+
|
|
51
|
+
def ndcg_at_k(ranked, gold, K):
|
|
52
|
+
gold_set = set(gold)
|
|
53
|
+
if not gold_set: return 1.0
|
|
54
|
+
dcg = sum(1.0/math.log2(i+2) for i,idx in enumerate(ranked[:K]) if idx in gold_set)
|
|
55
|
+
idcg = sum(1.0/math.log2(i+2) for i in range(min(len(gold_set), K)))
|
|
56
|
+
return dcg/idcg if idcg>0 else 0.0
|
|
57
|
+
|
|
58
|
+
def main():
|
|
59
|
+
parser = argparse.ArgumentParser()
|
|
60
|
+
parser.add_argument("--limit", type=int, default=None)
|
|
61
|
+
parser.add_argument("--fast", action="store_true", help="Skip QMG, cosine only")
|
|
62
|
+
parser.add_argument("--force", action="store_true", help="Run full 500")
|
|
63
|
+
args = parser.parse_args()
|
|
64
|
+
|
|
65
|
+
data = load_data(DATA_PATH)
|
|
66
|
+
print(f"Loaded {len(data)} questions", flush=True)
|
|
67
|
+
|
|
68
|
+
limit = args.limit
|
|
69
|
+
if args.force: limit = None
|
|
70
|
+
if limit: data = data[:limit]
|
|
71
|
+
|
|
72
|
+
from sentence_transformers import SentenceTransformer
|
|
73
|
+
import torch
|
|
74
|
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
75
|
+
print(f"Loading gte-large on {device}...", flush=True)
|
|
76
|
+
model = SentenceTransformer("thenlper/gte-large", device=device)
|
|
77
|
+
dim = model.get_sentence_embedding_dimension()
|
|
78
|
+
print(f"Model loaded, dim={dim}", flush=True)
|
|
79
|
+
|
|
80
|
+
results = []
|
|
81
|
+
n_questions = len(data)
|
|
82
|
+
|
|
83
|
+
for idx, item in enumerate(data):
|
|
84
|
+
question = item.get("question", item.get("query", ""))
|
|
85
|
+
haystack = item.get("haystack_sessions", item.get("sessions", item.get("corpus", [])))
|
|
86
|
+
haystack_ids = item.get("haystack_session_ids", item.get("session_ids", []))
|
|
87
|
+
answer_ids = item.get("answer_session_ids", item.get("answer_ids", []))
|
|
88
|
+
|
|
89
|
+
gold_indices = []
|
|
90
|
+
for g in answer_ids:
|
|
91
|
+
try: gold_indices.append(haystack_ids.index(g))
|
|
92
|
+
except ValueError: pass
|
|
93
|
+
|
|
94
|
+
if not gold_indices or len(haystack) < 3:
|
|
95
|
+
results.append({"idx": idx, "skip": True, "reason": "no_gold_or_too_few"})
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
texts = [flatten_session(s) for s in haystack]
|
|
99
|
+
|
|
100
|
+
# Encode
|
|
101
|
+
t0 = time.time()
|
|
102
|
+
all_texts = [question] + texts
|
|
103
|
+
embs = model.encode(all_texts, normalize_embeddings=True, batch_size=32, show_progress_bar=False)
|
|
104
|
+
q_emb = embs[0]
|
|
105
|
+
sess_embs = embs[1:]
|
|
106
|
+
encode_time = time.time() - t0
|
|
107
|
+
|
|
108
|
+
n_sessions = len(sess_embs)
|
|
109
|
+
K_target = min(5, n_sessions)
|
|
110
|
+
|
|
111
|
+
# Cosine baseline
|
|
112
|
+
t0 = time.time()
|
|
113
|
+
cos_scores = q_emb @ sess_embs.T
|
|
114
|
+
cos_ranked = np.argsort(cos_scores)[::-1].tolist()
|
|
115
|
+
cos_time = time.time() - t0
|
|
116
|
+
|
|
117
|
+
r = {
|
|
118
|
+
"idx": idx,
|
|
119
|
+
"question": question[:120],
|
|
120
|
+
"n_sessions": n_sessions,
|
|
121
|
+
"n_gold": len(gold_indices),
|
|
122
|
+
"cosine": {
|
|
123
|
+
"r1": float(recall_at_k(cos_ranked, gold_indices, 1)),
|
|
124
|
+
"r5": float(recall_at_k(cos_ranked, gold_indices, 5)),
|
|
125
|
+
"r10": float(recall_at_k(cos_ranked, gold_indices, 10)),
|
|
126
|
+
"ndcg": float(ndcg_at_k(cos_ranked, gold_indices, 10)),
|
|
127
|
+
"time": cos_time,
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# QMG CVaR optimizer — two configs
|
|
132
|
+
if not args.fast:
|
|
133
|
+
t0 = time.time()
|
|
134
|
+
try:
|
|
135
|
+
sys.path.insert(0, "/home/dt/qmg-v1")
|
|
136
|
+
from quantum_memory_graph.subgraph_optimizer import optimize_subgraph
|
|
137
|
+
|
|
138
|
+
# Build adjacency from session embeddings (cosine similarity matrix)
|
|
139
|
+
adj = sess_embs @ sess_embs.T
|
|
140
|
+
np.fill_diagonal(adj, 0.0)
|
|
141
|
+
|
|
142
|
+
for cfg_name, cfg in [
|
|
143
|
+
("default", {"alpha": 0.4, "beta_conn": 0.35, "gamma_cov": 0.25, "shots": 4096}),
|
|
144
|
+
("retrieval", {"alpha": 1.0, "beta_conn": 0.0, "gamma_cov": 0.0, "shots": 4096}),
|
|
145
|
+
]:
|
|
146
|
+
# Cap candidates at 14 for QAOA to avoid memory OOM
|
|
147
|
+
# (2^14 = 16K complex numbers, 2^40 = 17TB)
|
|
148
|
+
top_indices = np.argsort(cos_scores)[::-1][:14]
|
|
149
|
+
top_scores = cos_scores[top_indices]
|
|
150
|
+
top_adj = adj[np.ix_(top_indices, top_indices)]
|
|
151
|
+
|
|
152
|
+
result = optimize_subgraph(
|
|
153
|
+
relevance_scores=top_scores,
|
|
154
|
+
adjacency=top_adj,
|
|
155
|
+
K=K_target,
|
|
156
|
+
alpha=cfg["alpha"],
|
|
157
|
+
beta_conn=cfg["beta_conn"],
|
|
158
|
+
gamma_cov=cfg["gamma_cov"],
|
|
159
|
+
grid_size=6,
|
|
160
|
+
shots=cfg["shots"],
|
|
161
|
+
p_layers=2,
|
|
162
|
+
)
|
|
163
|
+
selection_raw = result.get("selection", [])
|
|
164
|
+
method = result.get("method", "unknown")
|
|
165
|
+
|
|
166
|
+
# Map capped indices back to original indices
|
|
167
|
+
selection = [top_indices[s] for s in selection_raw]
|
|
168
|
+
|
|
169
|
+
sel_set = set(selection)
|
|
170
|
+
ranked = list(selection)
|
|
171
|
+
for i in range(n_sessions):
|
|
172
|
+
if len(ranked) >= n_sessions: break
|
|
173
|
+
if i not in sel_set: ranked.append(i)
|
|
174
|
+
|
|
175
|
+
r[cfg_name] = {
|
|
176
|
+
"r1": float(recall_at_k(ranked, gold_indices, 1)),
|
|
177
|
+
"r5": float(recall_at_k(ranked, gold_indices, 5)),
|
|
178
|
+
"r10": float(recall_at_k(ranked, gold_indices, 10)),
|
|
179
|
+
"ndcg": float(ndcg_at_k(ranked, gold_indices, 10)),
|
|
180
|
+
"method": method,
|
|
181
|
+
"n_capped": len(top_indices),
|
|
182
|
+
"score": float(result.get("score", 0)),
|
|
183
|
+
"optimal_score": float(result.get("optimal", {}).get("score", 0)),
|
|
184
|
+
"time": time.time() - t0,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
except Exception as e:
|
|
188
|
+
import traceback
|
|
189
|
+
r["qmg_error"] = f"{type(e).__name__}: {e}"
|
|
190
|
+
r["qmg_traceback"] = traceback.format_exc()
|
|
191
|
+
|
|
192
|
+
r["total_qmg_time"] = time.time() - t0
|
|
193
|
+
|
|
194
|
+
results.append(r)
|
|
195
|
+
|
|
196
|
+
# Progress every 5 questions
|
|
197
|
+
if (idx+1) % 5 == 0:
|
|
198
|
+
elapsed = time.time() - T_START
|
|
199
|
+
effective = [rr for rr in results if not rr.get("skip")]
|
|
200
|
+
cos_done = [rr for rr in effective if "cosine" in rr]
|
|
201
|
+
if cos_done:
|
|
202
|
+
cos_r5_avg = np.mean([rr["cosine"]["r5"] for rr in cos_done]) * 100
|
|
203
|
+
print(f"[{idx+1}/{n_questions}] {elapsed:.0f}s cos_r5={cos_r5_avg:.1f}%", flush=True)
|
|
204
|
+
|
|
205
|
+
# Summary
|
|
206
|
+
effective = [r for r in results if not r.get("skip")]
|
|
207
|
+
|
|
208
|
+
cos_items = [r for r in effective if "cosine" in r]
|
|
209
|
+
print("\n" + "="*60, flush=True)
|
|
210
|
+
print(f"LONGMEMEVAL — {datetime.now(timezone.utc).isoformat()}", flush=True)
|
|
211
|
+
print(f"Questions: {len(effective)} effective ({len(results)-len(effective)} skipped)", flush=True)
|
|
212
|
+
|
|
213
|
+
if cos_items:
|
|
214
|
+
cos_r1 = np.mean([r["cosine"]["r1"] for r in cos_items])*100
|
|
215
|
+
cos_r5 = np.mean([r["cosine"]["r5"] for r in cos_items])*100
|
|
216
|
+
cos_r10 = np.mean([r["cosine"]["r10"] for r in cos_items])*100
|
|
217
|
+
cos_ndcg = np.mean([r["cosine"]["ndcg"] for r in cos_items])
|
|
218
|
+
print(f"\nCOSINE BASELINE:", flush=True)
|
|
219
|
+
print(f" R@1: {cos_r1:.1f}%", flush=True)
|
|
220
|
+
print(f" R@5: {cos_r5:.1f}%", flush=True)
|
|
221
|
+
print(f" R@10: {cos_r10:.1f}%", flush=True)
|
|
222
|
+
print(f" NDCG: {cos_ndcg:.4f}", flush=True)
|
|
223
|
+
|
|
224
|
+
for cfg_name in ["default", "retrieval"]:
|
|
225
|
+
items = [r for r in effective if cfg_name in r]
|
|
226
|
+
if items:
|
|
227
|
+
r1 = np.mean([r[cfg_name]["r1"] for r in items])*100
|
|
228
|
+
r5 = np.mean([r[cfg_name]["r5"] for r in items])*100
|
|
229
|
+
r10 = np.mean([r[cfg_name]["r10"] for r in items])*100
|
|
230
|
+
ndcg = np.mean([r[cfg_name]["ndcg"] for r in items])
|
|
231
|
+
methods = {}
|
|
232
|
+
for r in items:
|
|
233
|
+
m = r[cfg_name].get("method", "?")
|
|
234
|
+
methods.setdefault(m, []).append(r[cfg_name]["r5"])
|
|
235
|
+
avg_time = np.mean([r[cfg_name]["time"] for r in items])
|
|
236
|
+
print(f"\nQMG {cfg_name.upper()}:", flush=True)
|
|
237
|
+
print(f" R@1: {r1:.1f}%", flush=True)
|
|
238
|
+
print(f" R@5: {r5:.1f}%", flush=True)
|
|
239
|
+
print(f" R@10: {r10:.1f}%", flush=True)
|
|
240
|
+
print(f" NDCG: {ndcg:.4f}", flush=True)
|
|
241
|
+
print(f" Avg time: {avg_time:.1f}s", flush=True)
|
|
242
|
+
for m, vals in sorted(methods.items()):
|
|
243
|
+
print(f" {m}: {len(vals)}x R@5={np.mean(vals)*100:.1f}%", flush=True)
|
|
244
|
+
|
|
245
|
+
total_t = time.time() - T_START
|
|
246
|
+
print(f"\nTotal: {total_t:.0f}s ({total_t/60:.1f} min)", flush=True)
|
|
247
|
+
print("="*60, flush=True)
|
|
248
|
+
|
|
249
|
+
with open(RESULTS_FILE, "w") as f: json.dump({"timestamp": datetime.now(timezone.utc).isoformat(), "n_total": len(data), "results": results}, f, indent=2, default=str)
|
|
250
|
+
print(f"\nSaved to {RESULTS_FILE}", flush=True)
|
|
251
|
+
|
|
252
|
+
with open(CSV_FILE, "w", newline="") as f:
|
|
253
|
+
w = csv.writer(f)
|
|
254
|
+
w.writerow(["idx","n","ngold","cr1","cr5","cr10","cndcg",
|
|
255
|
+
"dr1","dr5","dr10","dndcg","dmethod",
|
|
256
|
+
"rr1","rr5","rr10","rndcg","rmethod"])
|
|
257
|
+
for r in results:
|
|
258
|
+
if r.get("skip"): continue
|
|
259
|
+
w.writerow([
|
|
260
|
+
r["idx"], r["n_sessions"], r["n_gold"],
|
|
261
|
+
r["cosine"]["r1"], r["cosine"]["r5"], r["cosine"]["r10"], r["cosine"]["ndcg"],
|
|
262
|
+
r.get("default", {}).get("r1"), r.get("default", {}).get("r5"),
|
|
263
|
+
r.get("default", {}).get("r10"), r.get("default", {}).get("ndcg"),
|
|
264
|
+
r.get("default", {}).get("method"),
|
|
265
|
+
r.get("retrieval", {}).get("r1"), r.get("retrieval", {}).get("r5"),
|
|
266
|
+
r.get("retrieval", {}).get("r10"), r.get("retrieval", {}).get("ndcg"),
|
|
267
|
+
r.get("retrieval", {}).get("method"),
|
|
268
|
+
])
|
|
269
|
+
print(f"CSV saved to {CSV_FILE}", flush=True)
|
|
270
|
+
|
|
271
|
+
if __name__ == "__main__":
|
|
272
|
+
main()
|