hivememory 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hivememory-0.1.0/LICENSE +21 -0
- hivememory-0.1.0/PKG-INFO +168 -0
- hivememory-0.1.0/README.md +148 -0
- hivememory-0.1.0/hivememory/__init__.py +13 -0
- hivememory-0.1.0/hivememory/artifact.py +102 -0
- hivememory-0.1.0/hivememory/conflicts.py +137 -0
- hivememory-0.1.0/hivememory/core.py +308 -0
- hivememory-0.1.0/hivememory/provenance.py +97 -0
- hivememory-0.1.0/hivememory/store.py +217 -0
- hivememory-0.1.0/hivememory/wiki.py +188 -0
- hivememory-0.1.0/hivememory.egg-info/PKG-INFO +168 -0
- hivememory-0.1.0/hivememory.egg-info/SOURCES.txt +20 -0
- hivememory-0.1.0/hivememory.egg-info/dependency_links.txt +1 -0
- hivememory-0.1.0/hivememory.egg-info/requires.txt +11 -0
- hivememory-0.1.0/hivememory.egg-info/top_level.txt +1 -0
- hivememory-0.1.0/pyproject.toml +29 -0
- hivememory-0.1.0/setup.cfg +4 -0
- hivememory-0.1.0/setup.py +3 -0
- hivememory-0.1.0/tests/test_artifact.py +130 -0
- hivememory-0.1.0/tests/test_conflicts.py +156 -0
- hivememory-0.1.0/tests/test_provenance.py +183 -0
- hivememory-0.1.0/tests/test_store.py +177 -0
hivememory-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 hivememory contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hivememory
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared reasoning memory layer for multi-agent systems
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: faiss-cpu>=1.7.4
|
|
10
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
11
|
+
Requires-Dist: openai>=1.0.0
|
|
12
|
+
Requires-Dist: anthropic>=0.39.0
|
|
13
|
+
Requires-Dist: numpy>=1.24.0
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
16
|
+
Requires-Dist: matplotlib>=3.7.0; extra == "dev"
|
|
17
|
+
Requires-Dist: networkx>=3.0; extra == "dev"
|
|
18
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# hivememory
|
|
22
|
+
|
|
23
|
+
Shared reasoning memory for multi-agent systems.
|
|
24
|
+
|
|
25
|
+
When multiple AI agents research the same problem independently, they waste tokens re-deriving the same knowledge and produce contradictory conclusions no one catches. hivememory gives agents a shared memory layer where they store structured reasoning artifacts, reuse each other's work, and surface contradictions automatically.
|
|
26
|
+
|
|
27
|
+
[Project page (coming soon)](#)
|
|
28
|
+
|
|
29
|
+
## Results
|
|
30
|
+
|
|
31
|
+
Benchmark: 3 agents research "Competitive Landscape of AI Code Editors in 2026" using gpt-4o-mini, with and without shared memory. Each agent researches 3 sub-topics. In the shared configuration, agents query hivememory before each LLM call — when prior findings exist, the agent receives a focused prompt that avoids redundant research.
|
|
32
|
+
|
|
33
|
+
| Metric | Baseline (no shared memory) | hivememory |
|
|
34
|
+
|---|---|---|
|
|
35
|
+
| Total tokens consumed | 11,896 | 9,810 (-17.5%) |
|
|
36
|
+
| Memory-augmented queries | 0 / 9 | 5 / 9 |
|
|
37
|
+
| Output quality (LLM-as-judge, avg 3 runs) | 8.8 | 9.0 |
|
|
38
|
+
| Contradiction-free score | 9.0 | 9.3 |
|
|
39
|
+
| Reuse rate | 0% | 56% |
|
|
40
|
+
| Wall clock time | 113.5s | 101.9s |
|
|
41
|
+
|
|
42
|
+
Token savings come from agents 2 and 3 receiving memory context that produces shorter, non-redundant LLM responses. Quality is equal or slightly better because memory-augmented agents build on verified findings rather than re-deriving from scratch.
|
|
43
|
+
|
|
44
|
+

|
|
45
|
+
*Agents 2 and 3 use fewer tokens when prior findings are available in memory.*
|
|
46
|
+
|
|
47
|
+

|
|
48
|
+
|
|
49
|
+

|
|
50
|
+
*LLM-as-judge scores across 4 dimensions, averaged over 3 evaluation runs.*
|
|
51
|
+
|
|
52
|
+
## Architecture
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
agent-1 ──┐ ┌── conflict detection
|
|
56
|
+
agent-2 ──┼── hivememory API ────────┼── embedding search (FAISS)
|
|
57
|
+
agent-3 ──┘ write / query / └── provenance DAG
|
|
58
|
+
resolve / export
|
|
59
|
+
│
|
|
60
|
+
┌─────┴─────┐
|
|
61
|
+
│ sqlite │
|
|
62
|
+
│ + FAISS │
|
|
63
|
+
│ index │
|
|
64
|
+
└───────────┘
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+

|
|
68
|
+
*How artifacts flow between agents. Agent 1 writes findings; agents 2 and 3 query memory, reuse relevant work, and focus on gaps.*
|
|
69
|
+
|
|
70
|
+

|
|
71
|
+
*Dependency graph of artifacts. Colors indicate source agent. Edges show "built on" relationships.*
|
|
72
|
+
|
|
73
|
+
## Quickstart
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install hivememory
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from hivememory import HiveMemory, Evidence
|
|
81
|
+
|
|
82
|
+
hive = HiveMemory()
|
|
83
|
+
|
|
84
|
+
# store a finding
|
|
85
|
+
art = hive.write(
|
|
86
|
+
claim="Voice AI market projected to reach $50B by 2028",
|
|
87
|
+
evidence=[Evidence(source="industry report", content="35% CAGR", reliability=0.9)],
|
|
88
|
+
confidence=0.85,
|
|
89
|
+
agent_id="researcher-1",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# query shared memory before doing new research
|
|
93
|
+
existing = hive.query("voice AI market size", top_k=3)
|
|
94
|
+
|
|
95
|
+
# check for contradictions
|
|
96
|
+
open_conflicts = hive.get_conflicts()
|
|
97
|
+
|
|
98
|
+
# resolve
|
|
99
|
+
if open_conflicts:
|
|
100
|
+
hive.resolve_conflict(open_conflicts[0].id, winner_id=art.id,
|
|
101
|
+
reason="stronger evidence", resolved_by="supervisor")
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## How it works
|
|
105
|
+
|
|
106
|
+
### Reasoning artifacts
|
|
107
|
+
|
|
108
|
+
Agents store structured claims with evidence, confidence scores, and provenance links — not raw text. Each artifact records who produced it, what evidence supports it, and which prior artifacts it builds on. This structure makes artifacts queryable, comparable, and auditable.
|
|
109
|
+
|
|
110
|
+
### Conflict detection
|
|
111
|
+
|
|
112
|
+
When a new artifact is stored, hivememory computes its embedding and searches FAISS for similar existing claims. If two artifacts are semantically close but have divergent confidence scores, a conflict is flagged. This first stage can be followed by an LLM contradiction check (OpenAI or Anthropic) for higher-precision detection.
|
|
113
|
+
|
|
114
|
+
### Provenance tracking
|
|
115
|
+
|
|
116
|
+
Every artifact records its dependencies as a list of artifact IDs, forming a directed acyclic graph. This DAG answers "which agent's work did this conclusion build on?" and enables cascading invalidation — if an upstream artifact is superseded, downstream consumers can be notified.
|
|
117
|
+
|
|
118
|
+
## Repo structure
|
|
119
|
+
|
|
120
|
+
```
|
|
121
|
+
hivememory/
|
|
122
|
+
__init__.py # public API exports
|
|
123
|
+
artifact.py # ReasoningArtifact, Evidence, Conflict dataclasses
|
|
124
|
+
core.py # HiveMemory main class (FAISS + sqlite)
|
|
125
|
+
store.py # low-level persistence layer
|
|
126
|
+
conflicts.py # ConflictDetector with LLM client support
|
|
127
|
+
provenance.py # ProvenanceTracker DAG
|
|
128
|
+
wiki.py # WikiExporter — markdown knowledge base export
|
|
129
|
+
examples/
|
|
130
|
+
basic_usage.py # store, query, conflict detect, resolve, export
|
|
131
|
+
research_task.py # 3-agent research demo with full pipeline
|
|
132
|
+
benchmarks/
|
|
133
|
+
real_benchmark.py # real LLM benchmark (gpt-4o-mini)
|
|
134
|
+
generate_charts.py # generate all charts from results.json
|
|
135
|
+
results.json # raw benchmark data
|
|
136
|
+
results_summary.md # human-readable summary
|
|
137
|
+
tests/
|
|
138
|
+
test_artifact.py # artifact serialization and ID generation
|
|
139
|
+
test_store.py # persistence layer tests
|
|
140
|
+
test_conflicts.py # conflict detection tests
|
|
141
|
+
test_provenance.py # provenance DAG tests
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Examples
|
|
145
|
+
|
|
146
|
+
- `python examples/basic_usage.py` — store artifacts, query memory, detect and resolve conflicts, export a wiki. Good first run to verify installation.
|
|
147
|
+
- `python examples/research_task.py` — three agents research AI code editors, sharing findings through hivememory. Shows artifact reuse, conflict detection, provenance tracking, and wiki export end-to-end.
|
|
148
|
+
|
|
149
|
+

|
|
150
|
+
*Where tokens go: baseline is all original research. hivememory splits tokens between original research, focused (memory-augmented) queries, and extraction.*
|
|
151
|
+
|
|
152
|
+
## Setup
|
|
153
|
+
|
|
154
|
+
- Python 3.10+
|
|
155
|
+
- `pip install hivememory`
|
|
156
|
+
- Set `OPENAI_API_KEY` for LLM-based conflict detection (optional -- embedding-based detection works without it)
|
|
157
|
+
- Run `python examples/basic_usage.py` to verify
|
|
158
|
+
|
|
159
|
+
## Related work
|
|
160
|
+
|
|
161
|
+
- Yu et al., "Multi-Agent Memory from a Computer Architecture Perspective: Visions and Challenges Ahead," Architecture 2.0 Workshop (UCSD/CMU), March 2026. Frames multi-agent memory as a systems problem and proposes structured memory hierarchies over flat context passing.
|
|
162
|
+
- Karpathy, "LLM Knowledge Bases" (blog post, 2025). Demonstrates single-agent knowledge accumulation with structured retrieval. hivememory extends this pattern to multi-agent systems, adding conflict detection and provenance tracking across agents.
|
|
163
|
+
|
|
164
|
+
Single-agent knowledge bases work. hivememory makes them multi-agent.
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
MIT License
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# hivememory
|
|
2
|
+
|
|
3
|
+
Shared reasoning memory for multi-agent systems.
|
|
4
|
+
|
|
5
|
+
When multiple AI agents research the same problem independently, they waste tokens re-deriving the same knowledge and produce contradictory conclusions no one catches. hivememory gives agents a shared memory layer where they store structured reasoning artifacts, reuse each other's work, and surface contradictions automatically.
|
|
6
|
+
|
|
7
|
+
[Project page (coming soon)](#)
|
|
8
|
+
|
|
9
|
+
## Results
|
|
10
|
+
|
|
11
|
+
Benchmark: 3 agents research "Competitive Landscape of AI Code Editors in 2026" using gpt-4o-mini, with and without shared memory. Each agent researches 3 sub-topics. In the shared configuration, agents query hivememory before each LLM call — when prior findings exist, the agent receives a focused prompt that avoids redundant research.
|
|
12
|
+
|
|
13
|
+
| Metric | Baseline (no shared memory) | hivememory |
|
|
14
|
+
|---|---|---|
|
|
15
|
+
| Total tokens consumed | 11,896 | 9,810 (-17.5%) |
|
|
16
|
+
| Memory-augmented queries | 0 / 9 | 5 / 9 |
|
|
17
|
+
| Output quality (LLM-as-judge, avg 3 runs) | 8.8 | 9.0 |
|
|
18
|
+
| Contradiction-free score | 9.0 | 9.3 |
|
|
19
|
+
| Reuse rate | 0% | 56% |
|
|
20
|
+
| Wall clock time | 113.5s | 101.9s |
|
|
21
|
+
|
|
22
|
+
Token savings come from agents 2 and 3 receiving memory context that produces shorter, non-redundant LLM responses. Quality is equal or slightly better because memory-augmented agents build on verified findings rather than re-deriving from scratch.
|
|
23
|
+
|
|
24
|
+

|
|
25
|
+
*Agents 2 and 3 use fewer tokens when prior findings are available in memory.*
|
|
26
|
+
|
|
27
|
+

|
|
28
|
+
|
|
29
|
+

|
|
30
|
+
*LLM-as-judge scores across 4 dimensions, averaged over 3 evaluation runs.*
|
|
31
|
+
|
|
32
|
+
## Architecture
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
agent-1 ──┐ ┌── conflict detection
|
|
36
|
+
agent-2 ──┼── hivememory API ────────┼── embedding search (FAISS)
|
|
37
|
+
agent-3 ──┘ write / query / └── provenance DAG
|
|
38
|
+
resolve / export
|
|
39
|
+
│
|
|
40
|
+
┌─────┴─────┐
|
|
41
|
+
│ sqlite │
|
|
42
|
+
│ + FAISS │
|
|
43
|
+
│ index │
|
|
44
|
+
└───────────┘
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+

|
|
48
|
+
*How artifacts flow between agents. Agent 1 writes findings; agents 2 and 3 query memory, reuse relevant work, and focus on gaps.*
|
|
49
|
+
|
|
50
|
+

|
|
51
|
+
*Dependency graph of artifacts. Colors indicate source agent. Edges show "built on" relationships.*
|
|
52
|
+
|
|
53
|
+
## Quickstart
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install hivememory
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from hivememory import HiveMemory, Evidence
|
|
61
|
+
|
|
62
|
+
hive = HiveMemory()
|
|
63
|
+
|
|
64
|
+
# store a finding
|
|
65
|
+
art = hive.write(
|
|
66
|
+
claim="Voice AI market projected to reach $50B by 2028",
|
|
67
|
+
evidence=[Evidence(source="industry report", content="35% CAGR", reliability=0.9)],
|
|
68
|
+
confidence=0.85,
|
|
69
|
+
agent_id="researcher-1",
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# query shared memory before doing new research
|
|
73
|
+
existing = hive.query("voice AI market size", top_k=3)
|
|
74
|
+
|
|
75
|
+
# check for contradictions
|
|
76
|
+
open_conflicts = hive.get_conflicts()
|
|
77
|
+
|
|
78
|
+
# resolve
|
|
79
|
+
if open_conflicts:
|
|
80
|
+
hive.resolve_conflict(open_conflicts[0].id, winner_id=art.id,
|
|
81
|
+
reason="stronger evidence", resolved_by="supervisor")
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## How it works
|
|
85
|
+
|
|
86
|
+
### Reasoning artifacts
|
|
87
|
+
|
|
88
|
+
Agents store structured claims with evidence, confidence scores, and provenance links — not raw text. Each artifact records who produced it, what evidence supports it, and which prior artifacts it builds on. This structure makes artifacts queryable, comparable, and auditable.
|
|
89
|
+
|
|
90
|
+
### Conflict detection
|
|
91
|
+
|
|
92
|
+
When a new artifact is stored, hivememory computes its embedding and searches FAISS for similar existing claims. If two artifacts are semantically close but have divergent confidence scores, a conflict is flagged. This first stage can be followed by an LLM contradiction check (OpenAI or Anthropic) for higher-precision detection.
|
|
93
|
+
|
|
94
|
+
### Provenance tracking
|
|
95
|
+
|
|
96
|
+
Every artifact records its dependencies as a list of artifact IDs, forming a directed acyclic graph. This DAG answers "which agent's work did this conclusion build on?" and enables cascading invalidation — if an upstream artifact is superseded, downstream consumers can be notified.
|
|
97
|
+
|
|
98
|
+
## Repo structure
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
hivememory/
|
|
102
|
+
__init__.py # public API exports
|
|
103
|
+
artifact.py # ReasoningArtifact, Evidence, Conflict dataclasses
|
|
104
|
+
core.py # HiveMemory main class (FAISS + sqlite)
|
|
105
|
+
store.py # low-level persistence layer
|
|
106
|
+
conflicts.py # ConflictDetector with LLM client support
|
|
107
|
+
provenance.py # ProvenanceTracker DAG
|
|
108
|
+
wiki.py # WikiExporter — markdown knowledge base export
|
|
109
|
+
examples/
|
|
110
|
+
basic_usage.py # store, query, conflict detect, resolve, export
|
|
111
|
+
research_task.py # 3-agent research demo with full pipeline
|
|
112
|
+
benchmarks/
|
|
113
|
+
real_benchmark.py # real LLM benchmark (gpt-4o-mini)
|
|
114
|
+
generate_charts.py # generate all charts from results.json
|
|
115
|
+
results.json # raw benchmark data
|
|
116
|
+
results_summary.md # human-readable summary
|
|
117
|
+
tests/
|
|
118
|
+
test_artifact.py # artifact serialization and ID generation
|
|
119
|
+
test_store.py # persistence layer tests
|
|
120
|
+
test_conflicts.py # conflict detection tests
|
|
121
|
+
test_provenance.py # provenance DAG tests
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Examples
|
|
125
|
+
|
|
126
|
+
- `python examples/basic_usage.py` — store artifacts, query memory, detect and resolve conflicts, export a wiki. Good first run to verify installation.
|
|
127
|
+
- `python examples/research_task.py` — three agents research AI code editors, sharing findings through hivememory. Shows artifact reuse, conflict detection, provenance tracking, and wiki export end-to-end.
|
|
128
|
+
|
|
129
|
+

|
|
130
|
+
*Where tokens go: baseline is all original research. hivememory splits tokens between original research, focused (memory-augmented) queries, and extraction.*
|
|
131
|
+
|
|
132
|
+
## Setup
|
|
133
|
+
|
|
134
|
+
- Python 3.10+
|
|
135
|
+
- `pip install hivememory`
|
|
136
|
+
- Set `OPENAI_API_KEY` for LLM-based conflict detection (optional -- embedding-based detection works without it)
|
|
137
|
+
- Run `python examples/basic_usage.py` to verify
|
|
138
|
+
|
|
139
|
+
## Related work
|
|
140
|
+
|
|
141
|
+
- Yu et al., "Multi-Agent Memory from a Computer Architecture Perspective: Visions and Challenges Ahead," Architecture 2.0 Workshop (UCSD/CMU), March 2026. Frames multi-agent memory as a systems problem and proposes structured memory hierarchies over flat context passing.
|
|
142
|
+
- Karpathy, "LLM Knowledge Bases" (blog post, 2025). Demonstrates single-agent knowledge accumulation with structured retrieval. hivememory extends this pattern to multi-agent systems, adding conflict detection and provenance tracking across agents.
|
|
143
|
+
|
|
144
|
+
Single-agent knowledge bases work. hivememory makes them multi-agent.
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
MIT License
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from hivememory.artifact import ReasoningArtifact, Evidence, Conflict
|
|
2
|
+
from hivememory.conflicts import ConflictDetector
|
|
3
|
+
from hivememory.core import HiveMemory
|
|
4
|
+
from hivememory.provenance import ProvenanceTracker
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"HiveMemory",
|
|
8
|
+
"ReasoningArtifact",
|
|
9
|
+
"Evidence",
|
|
10
|
+
"Conflict",
|
|
11
|
+
"ConflictDetector",
|
|
12
|
+
"ProvenanceTracker",
|
|
13
|
+
]
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Evidence:
|
|
11
|
+
source: str
|
|
12
|
+
content: str
|
|
13
|
+
reliability: float = 1.0
|
|
14
|
+
|
|
15
|
+
def to_dict(self) -> dict:
|
|
16
|
+
return {
|
|
17
|
+
"source": self.source,
|
|
18
|
+
"content": self.content,
|
|
19
|
+
"reliability": self.reliability,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_dict(cls, data: dict) -> Evidence:
|
|
24
|
+
return cls(
|
|
25
|
+
source=data["source"],
|
|
26
|
+
content=data["content"],
|
|
27
|
+
reliability=data.get("reliability", 1.0),
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ReasoningArtifact:
|
|
33
|
+
claim: str
|
|
34
|
+
agent_id: str
|
|
35
|
+
id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
|
36
|
+
evidence: list[Evidence] = field(default_factory=list)
|
|
37
|
+
confidence: float = 1.0
|
|
38
|
+
dependencies: list[str] = field(default_factory=list)
|
|
39
|
+
topic_embedding: list[float] = field(default_factory=list)
|
|
40
|
+
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
41
|
+
status: str = "active"
|
|
42
|
+
|
|
43
|
+
def to_dict(self) -> dict:
|
|
44
|
+
return {
|
|
45
|
+
"id": self.id,
|
|
46
|
+
"claim": self.claim,
|
|
47
|
+
"evidence": [e.to_dict() for e in self.evidence],
|
|
48
|
+
"confidence": self.confidence,
|
|
49
|
+
"agent_id": self.agent_id,
|
|
50
|
+
"dependencies": self.dependencies,
|
|
51
|
+
"topic_embedding": self.topic_embedding,
|
|
52
|
+
"created_at": self.created_at.isoformat(),
|
|
53
|
+
"status": self.status,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def from_dict(cls, data: dict) -> ReasoningArtifact:
|
|
58
|
+
return cls(
|
|
59
|
+
id=data["id"],
|
|
60
|
+
claim=data["claim"],
|
|
61
|
+
evidence=[Evidence.from_dict(e) for e in data.get("evidence", [])],
|
|
62
|
+
confidence=data.get("confidence", 1.0),
|
|
63
|
+
agent_id=data["agent_id"],
|
|
64
|
+
dependencies=data.get("dependencies", []),
|
|
65
|
+
topic_embedding=data.get("topic_embedding", []),
|
|
66
|
+
created_at=datetime.fromisoformat(data["created_at"]),
|
|
67
|
+
status=data.get("status", "active"),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class Conflict:
|
|
73
|
+
id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
|
74
|
+
artifact_ids: list[str] = field(default_factory=list)
|
|
75
|
+
description: str = ""
|
|
76
|
+
resolved: bool = False
|
|
77
|
+
winner_id: Optional[str] = None
|
|
78
|
+
resolution_reason: Optional[str] = None
|
|
79
|
+
resolved_by: Optional[str] = None
|
|
80
|
+
|
|
81
|
+
def to_dict(self) -> dict:
|
|
82
|
+
return {
|
|
83
|
+
"id": self.id,
|
|
84
|
+
"artifact_ids": self.artifact_ids,
|
|
85
|
+
"description": self.description,
|
|
86
|
+
"resolved": self.resolved,
|
|
87
|
+
"winner_id": self.winner_id,
|
|
88
|
+
"resolution_reason": self.resolution_reason,
|
|
89
|
+
"resolved_by": self.resolved_by,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def from_dict(cls, data: dict) -> Conflict:
|
|
94
|
+
return cls(
|
|
95
|
+
id=data["id"],
|
|
96
|
+
artifact_ids=data.get("artifact_ids", []),
|
|
97
|
+
description=data.get("description", ""),
|
|
98
|
+
resolved=data.get("resolved", False),
|
|
99
|
+
winner_id=data.get("winner_id"),
|
|
100
|
+
resolution_reason=data.get("resolution_reason"),
|
|
101
|
+
resolved_by=data.get("resolved_by"),
|
|
102
|
+
)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Protocol
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from hivememory.artifact import Conflict, ReasoningArtifact
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LLMClient(Protocol):
|
|
11
|
+
def check_contradiction(self, a: ReasoningArtifact, b: ReasoningArtifact) -> str:
|
|
12
|
+
...
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class OpenAIConflictClient:
|
|
16
|
+
def __init__(self, model: str = "gpt-4o-mini"):
|
|
17
|
+
import openai
|
|
18
|
+
|
|
19
|
+
self.client = openai.OpenAI()
|
|
20
|
+
self.model = model
|
|
21
|
+
|
|
22
|
+
def check_contradiction(self, a: ReasoningArtifact, b: ReasoningArtifact) -> str:
|
|
23
|
+
a_sources = ", ".join(e.source for e in a.evidence)
|
|
24
|
+
b_sources = ", ".join(e.source for e in b.evidence)
|
|
25
|
+
prompt = (
|
|
26
|
+
"Two research agents produced these findings. "
|
|
27
|
+
"Do they contradict each other?\n\n"
|
|
28
|
+
f"Agent {a.agent_id} claims: {a.claim}\n"
|
|
29
|
+
f"Based on: {a_sources}\n\n"
|
|
30
|
+
f"Agent {b.agent_id} claims: {b.claim}\n"
|
|
31
|
+
f"Based on: {b_sources}\n\n"
|
|
32
|
+
"Respond with exactly one word: "
|
|
33
|
+
"CONTRADICTS, SUPPORTS, UNRELATED, or REFINES"
|
|
34
|
+
)
|
|
35
|
+
response = self.client.chat.completions.create(
|
|
36
|
+
model=self.model,
|
|
37
|
+
messages=[{"role": "user", "content": prompt}],
|
|
38
|
+
max_tokens=10,
|
|
39
|
+
)
|
|
40
|
+
return response.choices[0].message.content.strip().upper()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class AnthropicConflictClient:
|
|
44
|
+
def __init__(self, model: str = "claude-haiku-4-5-20251001"):
|
|
45
|
+
import anthropic
|
|
46
|
+
|
|
47
|
+
self.client = anthropic.Anthropic()
|
|
48
|
+
self.model = model
|
|
49
|
+
|
|
50
|
+
def check_contradiction(self, a: ReasoningArtifact, b: ReasoningArtifact) -> str:
|
|
51
|
+
a_sources = ", ".join(e.source for e in a.evidence)
|
|
52
|
+
b_sources = ", ".join(e.source for e in b.evidence)
|
|
53
|
+
prompt = (
|
|
54
|
+
"Two research agents produced these findings. "
|
|
55
|
+
"Do they contradict each other?\n\n"
|
|
56
|
+
f"Agent {a.agent_id} claims: {a.claim}\n"
|
|
57
|
+
f"Based on: {a_sources}\n\n"
|
|
58
|
+
f"Agent {b.agent_id} claims: {b.claim}\n"
|
|
59
|
+
f"Based on: {b_sources}\n\n"
|
|
60
|
+
"Respond with exactly one word: "
|
|
61
|
+
"CONTRADICTS, SUPPORTS, UNRELATED, or REFINES"
|
|
62
|
+
)
|
|
63
|
+
response = self.client.messages.create(
|
|
64
|
+
model=self.model,
|
|
65
|
+
max_tokens=10,
|
|
66
|
+
messages=[{"role": "user", "content": prompt}],
|
|
67
|
+
)
|
|
68
|
+
return response.content[0].text.strip().upper()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _cosine_similarity(a: list[float], b: list[float]) -> float:
|
|
72
|
+
va = np.array(a, dtype=np.float32)
|
|
73
|
+
vb = np.array(b, dtype=np.float32)
|
|
74
|
+
norm_a = np.linalg.norm(va)
|
|
75
|
+
norm_b = np.linalg.norm(vb)
|
|
76
|
+
if norm_a == 0 or norm_b == 0:
|
|
77
|
+
return 0.0
|
|
78
|
+
return float(np.dot(va, vb) / (norm_a * norm_b))
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ConflictDetector:
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
store,
|
|
85
|
+
llm_client: Optional[LLMClient] = None,
|
|
86
|
+
):
|
|
87
|
+
self.store = store
|
|
88
|
+
self.llm_client = llm_client or OpenAIConflictClient()
|
|
89
|
+
|
|
90
|
+
def detect(
|
|
91
|
+
self,
|
|
92
|
+
new_artifact: ReasoningArtifact,
|
|
93
|
+
existing_artifacts: list[ReasoningArtifact],
|
|
94
|
+
) -> list[Conflict]:
|
|
95
|
+
if not new_artifact.topic_embedding:
|
|
96
|
+
return []
|
|
97
|
+
|
|
98
|
+
# stage 1: cosine similarity filter
|
|
99
|
+
candidates = []
|
|
100
|
+
for existing in existing_artifacts:
|
|
101
|
+
if not existing.topic_embedding:
|
|
102
|
+
continue
|
|
103
|
+
if existing.id == new_artifact.id:
|
|
104
|
+
continue
|
|
105
|
+
sim = _cosine_similarity(
|
|
106
|
+
new_artifact.topic_embedding, existing.topic_embedding
|
|
107
|
+
)
|
|
108
|
+
if sim > 0.75:
|
|
109
|
+
candidates.append(existing)
|
|
110
|
+
|
|
111
|
+
# stage 2: LLM verification
|
|
112
|
+
conflicts = []
|
|
113
|
+
for candidate in candidates:
|
|
114
|
+
verdict = self.llm_client.check_contradiction(new_artifact, candidate)
|
|
115
|
+
|
|
116
|
+
if verdict == "CONTRADICTS":
|
|
117
|
+
conflict = Conflict(
|
|
118
|
+
artifact_ids=[new_artifact.id, candidate.id],
|
|
119
|
+
description=(
|
|
120
|
+
f"LLM detected contradiction between "
|
|
121
|
+
f"agent {new_artifact.agent_id} and "
|
|
122
|
+
f"agent {candidate.agent_id}"
|
|
123
|
+
),
|
|
124
|
+
)
|
|
125
|
+
new_artifact.status = "contested"
|
|
126
|
+
candidate.status = "contested"
|
|
127
|
+
self.store._save_artifact(new_artifact)
|
|
128
|
+
self.store._save_artifact(candidate)
|
|
129
|
+
self.store._save_conflict(conflict)
|
|
130
|
+
conflicts.append(conflict)
|
|
131
|
+
|
|
132
|
+
elif verdict == "REFINES":
|
|
133
|
+
if candidate.id not in new_artifact.dependencies:
|
|
134
|
+
new_artifact.dependencies.append(candidate.id)
|
|
135
|
+
self.store._save_artifact(new_artifact)
|
|
136
|
+
|
|
137
|
+
return conflicts
|