quantum-memory-graph 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Coinkong (Chef's Attraction)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,216 @@
1
+ Metadata-Version: 2.1
2
+ Name: quantum-memory-graph
3
+ Version: 0.1.0
4
+ Summary: Quantum-optimized knowledge graph memory for AI agents. Relationship-aware subgraph selection via QAOA.
5
+ Home-page: https://github.com/Dustin-a11y/quantum-memory-graph
6
+ Author: Coinkong (Chef's Attraction)
7
+ License: MIT
8
+ Keywords: quantum,memory,knowledge-graph,agents,qaoa,ai
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Requires-Python: >=3.9
14
+ Description-Content-Type: text/markdown
15
+ Provides-Extra: api
16
+ Provides-Extra: ibm
17
+ Provides-Extra: dev
18
+ License-File: LICENSE
19
+
20
+ # Quantum Memory Graph ⚛️🧠
21
+
22
+ **Relationship-aware memory for AI agents. Knowledge graphs + quantum-optimized subgraph selection.**
23
+
24
+ Every memory system treats memories as independent documents — search, rank, stuff into context. But memories aren't independent. They have *relationships*. "The team chose React" becomes 10x more useful paired with "because of ecosystem maturity" and "FastAPI handles the backend."
25
+
26
+ Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
27
+
28
+ ## Benchmark: MemCombine
29
+
30
+ We created MemCombine to test what no existing benchmark measures — **memory combination quality**.
31
+
32
+ | Method | Coverage | Evidence Recall | F1 | Perfect |
33
+ |--------|----------|----------------|----|---------|
34
+ | Embedding Top-K | 69.9% | 65.6% | 68.1% | 1/5 |
35
+ | **Graph + QAOA** | **96.7%** | **91.0%** | **92.6%** | **4/5** |
36
+ | **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
37
+
38
+ When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
39
+
40
+ ## Install
41
+
42
+ ```bash
43
+ pip install quantum-memory-graph
44
+ ```
45
+
46
+ ## Quick Start
47
+
48
+ ```python
49
+ from quantum_memory_graph import store, recall
50
+
51
+ # Store memories — automatically builds knowledge graph
52
+ store("Project Alpha uses React frontend with TypeScript.")
53
+ store("Project Alpha backend is FastAPI with PostgreSQL.")
54
+ store("FastAPI connects to PostgreSQL via SQLAlchemy ORM.")
55
+ store("React components use Material UI for styling.")
56
+ store("Team had pizza for lunch. Pepperoni was great.")
57
+
58
+ # Recall — graph traversal + QAOA finds the optimal combination
59
+ result = recall("What is Project Alpha's full tech stack?", K=4)
60
+
61
+ for memory in result["memories"]:
62
+ print(f" {memory['text']}")
63
+ print(f" Connected to {len(memory['connections'])} other selected memories")
64
+ ```
65
+
66
+ Output: Returns React, FastAPI, PostgreSQL, and SQLAlchemy memories — connected, complete, no noise. The pizza memory is excluded because it has no graph connections to the tech stack cluster.
67
+
68
+ ## How It Works
69
+
70
+ ```
71
+ Query: "What's the tech stack?"
72
+
73
+
74
+ ┌─────────────────────┐
75
+ │ 1. Graph Search │ Embedding similarity + multi-hop traversal
76
+ │ Find neighbors │ Discovers memories connected to relevant ones
77
+ └────────┬────────────┘
78
+ │ 14 candidates
79
+
80
+ ┌─────────────────────┐
81
+ │ 2. Subgraph Data │ Extract adjacency matrix + relevance scores
82
+ │ Build problem │ Encode relationships as optimization weights
83
+ └────────┬────────────┘
84
+ │ NP-hard selection
85
+
86
+ ┌─────────────────────┐
87
+ │ 3. QAOA Optimize │ Quantum approximate optimization
88
+ │ Find best K │ Maximizes: relevance + connectivity + coverage
89
+ └────────┬────────────┘
90
+ │ K memories
91
+
92
+ ┌─────────────────────┐
93
+ │ 4. Return with │ Each memory includes its connections
94
+ │ relationships │ to other selected memories
95
+ └─────────────────────┘
96
+ ```
97
+
98
+ ### Why Quantum?
99
+
100
+ Optimal subgraph selection is NP-hard. Given N candidate memories, finding the best K that maximize relevance, connectivity, AND coverage has exponential classical complexity. QAOA provides polynomial-time approximate solutions that beat greedy heuristics — this is the one problem where quantum computing has a genuine algorithmic advantage over classical approaches.
101
+
102
+ ## Architecture
103
+
104
+ ### Three Layers
105
+
106
+ 1. **Knowledge Graph** (`graph.py`) — Memories are nodes. Relationships are weighted edges based on:
107
+ - Semantic similarity (embedding cosine distance)
108
+ - Entity co-occurrence (shared people, projects, concepts)
109
+ - Temporal proximity (memories close in time)
110
+ - Source proximity (same conversation/document)
111
+
112
+ 2. **Subgraph Optimizer** (`subgraph_optimizer.py`) — QAOA circuit that maximizes:
113
+ - α × relevance (individual memory scores)
114
+ - β × connectivity (edge weights within selected subgraph)
115
+ - γ × coverage (topic diversity across selection)
116
+
117
+ 3. **Pipeline** (`pipeline.py`) — Unified `store()` and `recall()` interface.
118
+
119
+ ### Optional: MemPalace Integration
120
+
121
+ Use [MemPalace](https://github.com/milla-jovovich/mempalace) (MIT, by @bensig) as the storage/retrieval backend for 96.6% base retrieval quality:
122
+
123
+ ```python
124
+ from quantum_memory_graph.mempalace_bridge import store_memory, recall_memories
125
+
126
+ # MemPalace stores verbatim → ChromaDB retrieves candidates → QAOA selects optimal subgraph
127
+ result = recall_memories("What happened in the meeting?", K=5, use_qaoa=True)
128
+ ```
129
+
130
+ ## API Server
131
+
132
+ ```bash
133
+ pip install quantum-memory-graph[api]
134
+ python -m quantum_memory_graph.api
135
+ ```
136
+
137
+ Endpoints:
138
+ - `POST /store` — Store a memory
139
+ - `POST /recall` — Graph + QAOA recall
140
+ - `POST /store-batch` — Batch store
141
+ - `GET /stats` — Graph statistics
142
+ - `GET /` — Health check
143
+
144
+ ## Advanced Usage
145
+
146
+ ### Custom Graph
147
+
148
+ ```python
149
+ from quantum_memory_graph import MemoryGraph, recall
150
+ from quantum_memory_graph.pipeline import set_graph
151
+
152
+ # Tune similarity threshold for edge creation
153
+ graph = MemoryGraph(similarity_threshold=0.25)
154
+ set_graph(graph)
155
+
156
+ # Store and recall as normal
157
+ ```
158
+
159
+ ### Tune QAOA Parameters
160
+
161
+ ```python
162
+ result = recall(
163
+ "query",
164
+ K=5,
165
+ alpha=0.4, # Relevance weight
166
+ beta_conn=0.35, # Connectivity weight
167
+ gamma_cov=0.25, # Coverage/diversity weight
168
+ hops=3, # Graph traversal depth
169
+ top_seeds=7, # Initial seed nodes
170
+ max_candidates=14, # Max qubits for QAOA
171
+ )
172
+ ```
173
+
174
+ ### Run MemCombine Benchmark
175
+
176
+ ```python
177
+ from benchmarks.memcombine import run_benchmark
178
+
179
+ def my_recall(memories, query, K):
180
+ # Your recall implementation
181
+ return selected_indices # List[int]
182
+
183
+ results = run_benchmark(my_recall, K=5)
184
+ print(f"Coverage: {results['avg_coverage']*100:.1f}%")
185
+ ```
186
+
187
+ ## IBM Quantum Hardware
188
+
189
+ For production workloads, run QAOA on real quantum hardware:
190
+
191
+ ```bash
192
+ pip install quantum-memory-graph[ibm]
193
+ export IBM_QUANTUM_TOKEN=your_token
194
+ ```
195
+
196
+ Validated on `ibm_fez` and `ibm_kingston` backends.
197
+
198
+ ## Requirements
199
+
200
+ - Python ≥ 3.9
201
+ - sentence-transformers
202
+ - networkx
203
+ - qiskit + qiskit-aer
204
+ - numpy
205
+
206
+ ## License
207
+
208
+ MIT License — Copyright 2026 Coinkong (Chef's Attraction)
209
+
210
+ Built with [MemPalace](https://github.com/milla-jovovich/mempalace) by @bensig (MIT License). See [THIRD-PARTY-LICENSES](THIRD-PARTY-LICENSES).
211
+
212
+ ## Links
213
+
214
+ - [quantum-agent-memory](https://github.com/Dustin-a11y/quantum-agent-memory) — The QAOA optimization engine
215
+ - [MemPalace](https://github.com/milla-jovovich/mempalace) — Storage and retrieval backend
216
+ - [MemCombine Benchmark](benchmarks/memcombine.py) — Test memory combination quality
@@ -0,0 +1,197 @@
1
+ # Quantum Memory Graph ⚛️🧠
2
+
3
+ **Relationship-aware memory for AI agents. Knowledge graphs + quantum-optimized subgraph selection.**
4
+
5
+ Every memory system treats memories as independent documents — search, rank, stuff into context. But memories aren't independent. They have *relationships*. "The team chose React" becomes 10x more useful paired with "because of ecosystem maturity" and "FastAPI handles the backend."
6
+
7
+ Quantum Memory Graph maps these relationships, then uses QAOA to find the optimal *combination* of memories — not just the most relevant individuals, but the best connected subgraph that gives your agent maximum context.
8
+
9
+ ## Benchmark: MemCombine
10
+
11
+ We created MemCombine to test what no existing benchmark measures — **memory combination quality**.
12
+
13
+ | Method | Coverage | Evidence Recall | F1 | Perfect |
14
+ |--------|----------|----------------|----|---------|
15
+ | Embedding Top-K | 69.9% | 65.6% | 68.1% | 1/5 |
16
+ | **Graph + QAOA** | **96.7%** | **91.0%** | **92.6%** | **4/5** |
17
+ | **Advantage** | **+26.8%** | **+25.4%** | **+24.5%** | |
18
+
19
+ When the task is "find memories that work *together*," graph-aware quantum selection crushes pure similarity search.
20
+
21
+ ## Install
22
+
23
+ ```bash
24
+ pip install quantum-memory-graph
25
+ ```
26
+
27
+ ## Quick Start
28
+
29
+ ```python
30
+ from quantum_memory_graph import store, recall
31
+
32
+ # Store memories — automatically builds knowledge graph
33
+ store("Project Alpha uses React frontend with TypeScript.")
34
+ store("Project Alpha backend is FastAPI with PostgreSQL.")
35
+ store("FastAPI connects to PostgreSQL via SQLAlchemy ORM.")
36
+ store("React components use Material UI for styling.")
37
+ store("Team had pizza for lunch. Pepperoni was great.")
38
+
39
+ # Recall — graph traversal + QAOA finds the optimal combination
40
+ result = recall("What is Project Alpha's full tech stack?", K=4)
41
+
42
+ for memory in result["memories"]:
43
+ print(f" {memory['text']}")
44
+ print(f" Connected to {len(memory['connections'])} other selected memories")
45
+ ```
46
+
47
+ Output: Returns React, FastAPI, PostgreSQL, and SQLAlchemy memories — connected, complete, no noise. The pizza memory is excluded because it has no graph connections to the tech stack cluster.
48
+
49
+ ## How It Works
50
+
51
+ ```
52
+ Query: "What's the tech stack?"
53
+
54
+
55
+ ┌─────────────────────┐
56
+ │ 1. Graph Search │ Embedding similarity + multi-hop traversal
57
+ │ Find neighbors │ Discovers memories connected to relevant ones
58
+ └────────┬────────────┘
59
+ │ 14 candidates
60
+
61
+ ┌─────────────────────┐
62
+ │ 2. Subgraph Data │ Extract adjacency matrix + relevance scores
63
+ │ Build problem │ Encode relationships as optimization weights
64
+ └────────┬────────────┘
65
+ │ NP-hard selection
66
+
67
+ ┌─────────────────────┐
68
+ │ 3. QAOA Optimize │ Quantum approximate optimization
69
+ │ Find best K │ Maximizes: relevance + connectivity + coverage
70
+ └────────┬────────────┘
71
+ │ K memories
72
+
73
+ ┌─────────────────────┐
74
+ │ 4. Return with │ Each memory includes its connections
75
+ │ relationships │ to other selected memories
76
+ └─────────────────────┘
77
+ ```
78
+
79
+ ### Why Quantum?
80
+
81
+ Optimal subgraph selection is NP-hard. Given N candidate memories, finding the best K that maximize relevance, connectivity, AND coverage has exponential classical complexity. QAOA provides polynomial-time approximate solutions that beat greedy heuristics — this is the one problem where quantum computing has a genuine algorithmic advantage over classical approaches.
82
+
83
+ ## Architecture
84
+
85
+ ### Three Layers
86
+
87
+ 1. **Knowledge Graph** (`graph.py`) — Memories are nodes. Relationships are weighted edges based on:
88
+ - Semantic similarity (embedding cosine distance)
89
+ - Entity co-occurrence (shared people, projects, concepts)
90
+ - Temporal proximity (memories close in time)
91
+ - Source proximity (same conversation/document)
92
+
93
+ 2. **Subgraph Optimizer** (`subgraph_optimizer.py`) — QAOA circuit that maximizes:
94
+ - α × relevance (individual memory scores)
95
+ - β × connectivity (edge weights within selected subgraph)
96
+ - γ × coverage (topic diversity across selection)
97
+
98
+ 3. **Pipeline** (`pipeline.py`) — Unified `store()` and `recall()` interface.
99
+
100
+ ### Optional: MemPalace Integration
101
+
102
+ Use [MemPalace](https://github.com/milla-jovovich/mempalace) (MIT, by @bensig) as the storage/retrieval backend for 96.6% base retrieval quality:
103
+
104
+ ```python
105
+ from quantum_memory_graph.mempalace_bridge import store_memory, recall_memories
106
+
107
+ # MemPalace stores verbatim → ChromaDB retrieves candidates → QAOA selects optimal subgraph
108
+ result = recall_memories("What happened in the meeting?", K=5, use_qaoa=True)
109
+ ```
110
+
111
+ ## API Server
112
+
113
+ ```bash
114
+ pip install quantum-memory-graph[api]
115
+ python -m quantum_memory_graph.api
116
+ ```
117
+
118
+ Endpoints:
119
+ - `POST /store` — Store a memory
120
+ - `POST /recall` — Graph + QAOA recall
121
+ - `POST /store-batch` — Batch store
122
+ - `GET /stats` — Graph statistics
123
+ - `GET /` — Health check
124
+
125
+ ## Advanced Usage
126
+
127
+ ### Custom Graph
128
+
129
+ ```python
130
+ from quantum_memory_graph import MemoryGraph, recall
131
+ from quantum_memory_graph.pipeline import set_graph
132
+
133
+ # Tune similarity threshold for edge creation
134
+ graph = MemoryGraph(similarity_threshold=0.25)
135
+ set_graph(graph)
136
+
137
+ # Store and recall as normal
138
+ ```
139
+
140
+ ### Tune QAOA Parameters
141
+
142
+ ```python
143
+ result = recall(
144
+ "query",
145
+ K=5,
146
+ alpha=0.4, # Relevance weight
147
+ beta_conn=0.35, # Connectivity weight
148
+ gamma_cov=0.25, # Coverage/diversity weight
149
+ hops=3, # Graph traversal depth
150
+ top_seeds=7, # Initial seed nodes
151
+ max_candidates=14, # Max qubits for QAOA
152
+ )
153
+ ```
154
+
155
+ ### Run MemCombine Benchmark
156
+
157
+ ```python
158
+ from benchmarks.memcombine import run_benchmark
159
+
160
+ def my_recall(memories, query, K):
161
+ # Your recall implementation
162
+ return selected_indices # List[int]
163
+
164
+ results = run_benchmark(my_recall, K=5)
165
+ print(f"Coverage: {results['avg_coverage']*100:.1f}%")
166
+ ```
167
+
168
+ ## IBM Quantum Hardware
169
+
170
+ For production workloads, run QAOA on real quantum hardware:
171
+
172
+ ```bash
173
+ pip install quantum-memory-graph[ibm]
174
+ export IBM_QUANTUM_TOKEN=your_token
175
+ ```
176
+
177
+ Validated on `ibm_fez` and `ibm_kingston` backends.
178
+
179
+ ## Requirements
180
+
181
+ - Python ≥ 3.9
182
+ - sentence-transformers
183
+ - networkx
184
+ - qiskit + qiskit-aer
185
+ - numpy
186
+
187
+ ## License
188
+
189
+ MIT License — Copyright 2026 Coinkong (Chef's Attraction)
190
+
191
+ Built with [MemPalace](https://github.com/milla-jovovich/mempalace) by @bensig (MIT License). See [THIRD-PARTY-LICENSES](THIRD-PARTY-LICENSES).
192
+
193
+ ## Links
194
+
195
+ - [quantum-agent-memory](https://github.com/Dustin-a11y/quantum-agent-memory) — The QAOA optimization engine
196
+ - [MemPalace](https://github.com/milla-jovovich/mempalace) — Storage and retrieval backend
197
+ - [MemCombine Benchmark](benchmarks/memcombine.py) — Test memory combination quality
@@ -0,0 +1 @@
1
+ """MemCombine benchmark suite."""
@@ -0,0 +1,236 @@
1
+ """
2
+ MemCombine Benchmark — Tests memory COMBINATION quality.
3
+
4
+ Unlike LongMemEval (needle-in-haystack retrieval), MemCombine tests whether
5
+ selected memories work TOGETHER to answer complex questions.
6
+
7
+ Questions require synthesizing information from multiple memories:
8
+ - "What was the decision AND its reasoning AND its outcome?"
9
+ - "How do project X and project Y relate?"
10
+ - "What changed between meeting A and meeting B?"
11
+
12
+ Metrics:
13
+ - Combination Score: Do selected memories cover all required facets?
14
+ - Synergy Score: Do memories reference/build on each other?
15
+ - Completeness: Can the question be fully answered from selected memories?
16
+
17
+ Copyright 2026 Coinkong (Chef's Attraction). MIT License.
18
+ """
19
+
20
+ import json
21
+ import numpy as np
22
+ from typing import List, Dict
23
+ from dataclasses import dataclass, field
24
+
25
+
26
+ @dataclass
27
+ class MemCombineQuestion:
28
+ """A question requiring multiple related memories."""
29
+ id: str
30
+ question: str
31
+ category: str # synthesis, temporal, causal, multi-entity
32
+ memories: List[Dict] # All available memories
33
+ evidence_ids: List[int] # Which memories contain evidence
34
+ facets: List[str] # Required information facets
35
+ facet_memory_map: Dict # Which facet comes from which memory
36
+
37
+
38
+ # Built-in benchmark scenarios
39
+ SCENARIOS = [
40
+ {
41
+ "id": "synthesis_1",
42
+ "question": "What technology stack was chosen for the project and why was each component selected?",
43
+ "category": "synthesis",
44
+ "memories": [
45
+ {"id": 0, "text": "Team meeting: Decided to use React for the frontend. Sarah argued it has the best ecosystem for our use case."},
46
+ {"id": 1, "text": "Architecture review: PostgreSQL chosen for the database. Need JSONB support for flexible schemas."},
47
+ {"id": 2, "text": "Sprint planning: Set up CI/CD pipeline using GitHub Actions. Two-week sprint cycles."},
48
+ {"id": 3, "text": "Team lunch at the Italian place. Good pasta. Bob told a funny joke about recursion."},
49
+ {"id": 4, "text": "Backend discussion: FastAPI selected over Django. Need async support for real-time features."},
50
+ {"id": 5, "text": "Deployment strategy: Going with Docker + Kubernetes on AWS. Auto-scaling is critical for launch."},
51
+ {"id": 6, "text": "Budget review: Cloud costs estimated at $2000/month. Within budget allocation."},
52
+ {"id": 7, "text": "Coffee chat about the new office layout. Open floor plan vs cubicles debate."},
53
+ {"id": 8, "text": "Performance testing results: FastAPI handles 10K concurrent connections. Meets our requirements."},
54
+ {"id": 9, "text": "Security audit: Need to add rate limiting and input validation before launch."},
55
+ ],
56
+ "evidence_ids": [0, 1, 4, 5],
57
+ "facets": ["frontend_choice", "frontend_reason", "backend_choice", "backend_reason", "database_choice", "database_reason", "deployment_choice"],
58
+ "facet_memory_map": {"frontend_choice": 0, "frontend_reason": 0, "backend_choice": 4, "backend_reason": 4, "database_choice": 1, "database_reason": 1, "deployment_choice": 5},
59
+ },
60
+ {
61
+ "id": "temporal_1",
62
+ "question": "How did the team's stance on remote work change over the three months?",
63
+ "category": "temporal",
64
+ "memories": [
65
+ {"id": 0, "text": "January all-hands: CEO announced mandatory return to office 5 days a week starting February."},
66
+ {"id": 1, "text": "Q4 revenue report showed 15% growth. Celebrated with team dinner."},
67
+ {"id": 2, "text": "February survey results: 73% of employees reported decreased satisfaction with RTO policy."},
68
+ {"id": 3, "text": "New coffee machine installed in the break room. Everyone loves it."},
69
+ {"id": 4, "text": "February town hall: HR presented data showing 20% increase in turnover since RTO mandate."},
70
+ {"id": 5, "text": "March policy update: CEO reversed course. Now hybrid 3 days in office, 2 remote. Cited retention data."},
71
+ {"id": 6, "text": "IT upgraded the conference room AV equipment for better hybrid meetings."},
72
+ {"id": 7, "text": "Quarterly OKR review. Team hit 4 of 5 objectives."},
73
+ {"id": 8, "text": "March satisfaction survey: Employee satisfaction recovered to 85% after hybrid policy."},
74
+ {"id": 9, "text": "Parking garage construction causing noise complaints from third floor."},
75
+ ],
76
+ "evidence_ids": [0, 2, 4, 5, 8],
77
+ "facets": ["initial_policy", "employee_reaction", "turnover_impact", "policy_change", "final_outcome"],
78
+ "facet_memory_map": {"initial_policy": 0, "employee_reaction": 2, "turnover_impact": 4, "policy_change": 5, "final_outcome": 8},
79
+ },
80
+ {
81
+ "id": "causal_1",
82
+ "question": "What caused the production outage, what was done to fix it, and what prevention measures were taken?",
83
+ "category": "causal",
84
+ "memories": [
85
+ {"id": 0, "text": "Monday 2am alert: Production database hit 100% disk usage. All writes failing."},
86
+ {"id": 1, "text": "Sprint retrospective: Team agreed to improve code review process."},
87
+ {"id": 2, "text": "Root cause analysis: Logging table grew 500GB in 2 weeks due to debug logging left on after feature deploy."},
88
+ {"id": 3, "text": "Incident response: DevOps team purged old log entries and increased disk from 1TB to 2TB."},
89
+ {"id": 4, "text": "New hire orientation for three junior developers. HR handled logistics."},
90
+ {"id": 5, "text": "Post-mortem action item 1: Implement log rotation with 30-day retention policy."},
91
+ {"id": 6, "text": "Post-mortem action item 2: Add disk usage alerts at 70%, 80%, 90% thresholds."},
92
+ {"id": 7, "text": "Post-mortem action item 3: Require removing debug logging before merging to main."},
93
+ {"id": 8, "text": "Team building event at the escape room. Marketing team won."},
94
+ {"id": 9, "text": "Client demo went well. They want to proceed with Phase 2."},
95
+ ],
96
+ "evidence_ids": [0, 2, 3, 5, 6, 7],
97
+ "facets": ["what_happened", "root_cause", "immediate_fix", "prevention_1", "prevention_2", "prevention_3"],
98
+ "facet_memory_map": {"what_happened": 0, "root_cause": 2, "immediate_fix": 3, "prevention_1": 5, "prevention_2": 6, "prevention_3": 7},
99
+ },
100
+ {
101
+ "id": "multi_entity_1",
102
+ "question": "What are each team member's roles and how do their responsibilities interact?",
103
+ "category": "multi_entity",
104
+ "memories": [
105
+ {"id": 0, "text": "Alice leads frontend development. She works closely with Bob on API contracts."},
106
+ {"id": 1, "text": "Company picnic was fun. Great weather this year."},
107
+ {"id": 2, "text": "Bob owns the backend services. He designs APIs that Alice's frontend consumes."},
108
+ {"id": 3, "text": "Carol manages the infrastructure. She provisions the servers Bob's services run on."},
109
+ {"id": 4, "text": "New ping pong table in the break room. Tournament next Friday."},
110
+ {"id": 5, "text": "Dave handles QA. He writes integration tests that cover Alice's UI and Bob's APIs."},
111
+ {"id": 6, "text": "Eve is the project manager. She coordinates between Alice, Bob, Carol, and Dave."},
112
+ {"id": 7, "text": "Office plants are dying. Need to assign someone to water them."},
113
+ {"id": 8, "text": "Alice and Carol paired on improving the CI/CD pipeline. Reduced deploy time by 40%."},
114
+ {"id": 9, "text": "Dave found a critical bug in Bob's API. Bob fixed it same day."},
115
+ ],
116
+ "evidence_ids": [0, 2, 3, 5, 6, 8, 9],
117
+ "facets": ["alice_role", "bob_role", "carol_role", "dave_role", "eve_role", "alice_bob_interaction", "bob_carol_interaction", "dave_integration"],
118
+ "facet_memory_map": {"alice_role": 0, "bob_role": 2, "carol_role": 3, "dave_role": 5, "eve_role": 6, "alice_bob_interaction": 0, "bob_carol_interaction": 3, "dave_integration": 5},
119
+ },
120
+ {
121
+ "id": "synthesis_2",
122
+ "question": "What is the complete customer onboarding process from signup to first value?",
123
+ "category": "synthesis",
124
+ "memories": [
125
+ {"id": 0, "text": "Step 1: Customer signs up via website. Auto-creates account and sends welcome email."},
126
+ {"id": 1, "text": "Marketing team redesigned the landing page. Conversion rate up 12%."},
127
+ {"id": 2, "text": "Step 2: Customer success rep schedules onboarding call within 24 hours of signup."},
128
+ {"id": 3, "text": "Step 3: During onboarding call, rep helps customer import their data and configure integrations."},
129
+ {"id": 4, "text": "Sales team hit quarterly target. Pizza party celebration."},
130
+ {"id": 5, "text": "Step 4: Customer gets access to interactive tutorial. Must complete 3 core modules."},
131
+ {"id": 6, "text": "Step 5: After tutorial completion, customer success checks in at day 7 and day 30."},
132
+ {"id": 7, "text": "Office AC broken again. Facilities contacted."},
133
+ {"id": 8, "text": "Churn analysis: Customers who complete onboarding tutorial have 3x higher retention."},
134
+ {"id": 9, "text": "Support ticket about login issues. Resolved — was a password reset problem."},
135
+ ],
136
+ "evidence_ids": [0, 2, 3, 5, 6],
137
+ "facets": ["signup", "scheduling", "data_import", "tutorial", "followup"],
138
+ "facet_memory_map": {"signup": 0, "scheduling": 2, "data_import": 3, "tutorial": 5, "followup": 6},
139
+ },
140
+ ]
141
+
142
+
143
+ def evaluate_combination(selected_ids: List[int], scenario: Dict) -> Dict:
144
+ """
145
+ Evaluate how well selected memories combine to answer the question.
146
+
147
+ Returns facet coverage, synergy score, and overall combination quality.
148
+ """
149
+ evidence_ids = set(scenario["evidence_ids"])
150
+ facet_map = scenario["facet_memory_map"]
151
+ facets = scenario["facets"]
152
+ selected_set = set(selected_ids)
153
+
154
+ # Facet coverage: what percentage of required facets are covered?
155
+ covered_facets = []
156
+ for facet in facets:
157
+ required_mem = facet_map[facet]
158
+ if required_mem in selected_set:
159
+ covered_facets.append(facet)
160
+
161
+ coverage = len(covered_facets) / len(facets) if facets else 0
162
+
163
+ # Evidence recall: what percentage of evidence memories selected?
164
+ evidence_found = selected_set & evidence_ids
165
+ evidence_recall = len(evidence_found) / len(evidence_ids) if evidence_ids else 0
166
+
167
+ # Precision: what percentage of selected are actually evidence?
168
+ precision = len(evidence_found) / len(selected_set) if selected_set else 0
169
+
170
+ # Noise: non-evidence memories selected
171
+ noise = len(selected_set - evidence_ids)
172
+
173
+ return {
174
+ "coverage": coverage,
175
+ "evidence_recall": evidence_recall,
176
+ "precision": precision,
177
+ "noise": noise,
178
+ "covered_facets": covered_facets,
179
+ "missing_facets": [f for f in facets if f not in covered_facets],
180
+ "f1": (2 * precision * evidence_recall / (precision + evidence_recall)
181
+ if (precision + evidence_recall) > 0 else 0),
182
+ }
183
+
184
+
185
+ def run_benchmark(recall_fn, K: int = 5, scenarios: List[Dict] = None) -> Dict:
186
+ """
187
+ Run MemCombine benchmark against a recall function.
188
+
189
+ Args:
190
+ recall_fn: Function(memories, query, K) -> List[int] (selected indices)
191
+ K: Number of memories to select
192
+ scenarios: Custom scenarios (uses built-in if None)
193
+
194
+ Returns:
195
+ Benchmark results with per-scenario and aggregate scores
196
+ """
197
+ if scenarios is None:
198
+ scenarios = SCENARIOS
199
+
200
+ results = []
201
+ total_coverage = 0
202
+ total_recall = 0
203
+ total_f1 = 0
204
+ perfect = 0
205
+
206
+ for scenario in scenarios:
207
+ memory_texts = [m["text"] for m in scenario["memories"]]
208
+ selected = recall_fn(memory_texts, scenario["question"], K)
209
+
210
+ eval_result = evaluate_combination(selected, scenario)
211
+
212
+ results.append({
213
+ "id": scenario["id"],
214
+ "category": scenario["category"],
215
+ "selected": selected,
216
+ **eval_result,
217
+ })
218
+
219
+ total_coverage += eval_result["coverage"]
220
+ total_recall += eval_result["evidence_recall"]
221
+ total_f1 += eval_result["f1"]
222
+ if eval_result["coverage"] == 1.0:
223
+ perfect += 1
224
+
225
+ n = len(scenarios)
226
+ return {
227
+ "benchmark": "MemCombine",
228
+ "n_scenarios": n,
229
+ "K": K,
230
+ "avg_coverage": total_coverage / n,
231
+ "avg_evidence_recall": total_recall / n,
232
+ "avg_f1": total_f1 / n,
233
+ "perfect_coverage": perfect,
234
+ "perfect_coverage_pct": perfect / n * 100,
235
+ "per_scenario": results,
236
+ }
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"