hivememory 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 hivememory contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,168 @@
1
+ Metadata-Version: 2.4
2
+ Name: hivememory
3
+ Version: 0.1.0
4
+ Summary: Shared reasoning memory layer for multi-agent systems
5
+ License-Expression: MIT
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: faiss-cpu>=1.7.4
10
+ Requires-Dist: sentence-transformers>=2.2.0
11
+ Requires-Dist: openai>=1.0.0
12
+ Requires-Dist: anthropic>=0.39.0
13
+ Requires-Dist: numpy>=1.24.0
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest>=7.0; extra == "dev"
16
+ Requires-Dist: matplotlib>=3.7.0; extra == "dev"
17
+ Requires-Dist: networkx>=3.0; extra == "dev"
18
+ Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
19
+ Dynamic: license-file
20
+
21
+ # hivememory
22
+
23
+ Shared reasoning memory for multi-agent systems.
24
+
25
+ When multiple AI agents research the same problem independently, they waste tokens re-deriving the same knowledge and produce contradictory conclusions no one catches. hivememory gives agents a shared memory layer where they store structured reasoning artifacts, reuse each other's work, and surface contradictions automatically.
26
+
27
+ [Project page (coming soon)](#)
28
+
29
+ ## Results
30
+
31
+ Benchmark: 3 agents research "Competitive Landscape of AI Code Editors in 2026" using gpt-4o-mini, with and without shared memory. Each agent researches 3 sub-topics. In the shared configuration, agents query hivememory before each LLM call — when prior findings exist, the agent receives a focused prompt that avoids redundant research.
32
+
33
+ | Metric | Baseline (no shared memory) | hivememory |
34
+ |---|---|---|
35
+ | Total tokens consumed | 11,896 | 9,810 (-17.5%) |
36
+ | Memory-augmented queries | 0 / 9 | 5 / 9 |
37
+ | Output quality (LLM-as-judge, avg 3 runs) | 8.8 | 9.0 |
38
+ | Contradiction-free score | 9.0 | 9.3 |
39
+ | Reuse rate | 0% | 56% |
40
+ | Wall clock time | 113.5s | 101.9s |
41
+
42
+ Token savings come from agents 2 and 3 receiving memory context that produces shorter, non-redundant LLM responses. Quality is equal or slightly better because memory-augmented agents build on verified findings rather than re-deriving from scratch.
43
+
44
+ ![Token consumption per agent](screenshots/token_consumption_per_agent.png)
45
+ *Agents 2 and 3 use fewer tokens when prior findings are available in memory.*
46
+
47
+ ![Total token consumption](screenshots/token_consumption_total.png)
48
+
49
+ ![Quality comparison](screenshots/quality_comparison.png)
50
+ *LLM-as-judge scores across 4 dimensions, averaged over 3 evaluation runs.*
51
+
52
+ ## Architecture
53
+
54
+ ```
55
+ agent-1 ──┐ ┌── conflict detection
56
+ agent-2 ──┼── hivememory API ────────┼── embedding search (FAISS)
57
+ agent-3 ──┘ write / query / └── provenance DAG
58
+ resolve / export
59
+
60
+ ┌─────┴─────┐
61
+ │ sqlite │
62
+ │ + FAISS │
63
+ │ index │
64
+ └───────────┘
65
+ ```
66
+
67
+ ![Artifact reuse flow](screenshots/artifact_reuse_flow.png)
68
+ *How artifacts flow between agents. Agent 1 writes findings; agents 2 and 3 query memory, reuse relevant work, and focus on gaps.*
69
+
70
+ ![Provenance DAG](screenshots/provenance_dag.png)
71
+ *Dependency graph of artifacts. Colors indicate source agent. Edges show "built on" relationships.*
72
+
73
+ ## Quickstart
74
+
75
+ ```bash
76
+ pip install hivememory
77
+ ```
78
+
79
+ ```python
80
+ from hivememory import HiveMemory, Evidence
81
+
82
+ hive = HiveMemory()
83
+
84
+ # store a finding
85
+ art = hive.write(
86
+ claim="Voice AI market projected to reach $50B by 2028",
87
+ evidence=[Evidence(source="industry report", content="35% CAGR", reliability=0.9)],
88
+ confidence=0.85,
89
+ agent_id="researcher-1",
90
+ )
91
+
92
+ # query shared memory before doing new research
93
+ existing = hive.query("voice AI market size", top_k=3)
94
+
95
+ # check for contradictions
96
+ open_conflicts = hive.get_conflicts()
97
+
98
+ # resolve
99
+ if open_conflicts:
100
+ hive.resolve_conflict(open_conflicts[0].id, winner_id=art.id,
101
+ reason="stronger evidence", resolved_by="supervisor")
102
+ ```
103
+
104
+ ## How it works
105
+
106
+ ### Reasoning artifacts
107
+
108
+ Agents store structured claims with evidence, confidence scores, and provenance links — not raw text. Each artifact records who produced it, what evidence supports it, and which prior artifacts it builds on. This structure makes artifacts queryable, comparable, and auditable.
109
+
110
+ ### Conflict detection
111
+
112
+ When a new artifact is stored, hivememory computes its embedding and searches FAISS for similar existing claims. If two artifacts are semantically close but have divergent confidence scores, a conflict is flagged. This first stage can be followed by an LLM contradiction check (OpenAI or Anthropic) for higher-precision detection.
113
+
114
+ ### Provenance tracking
115
+
116
+ Every artifact records its dependencies as a list of artifact IDs, forming a directed acyclic graph. This DAG answers "which agent's work did this conclusion build on?" and enables cascading invalidation — if an upstream artifact is superseded, downstream consumers can be notified.
117
+
118
+ ## Repo structure
119
+
120
+ ```
121
+ hivememory/
122
+ __init__.py # public API exports
123
+ artifact.py # ReasoningArtifact, Evidence, Conflict dataclasses
124
+ core.py # HiveMemory main class (FAISS + sqlite)
125
+ store.py # low-level persistence layer
126
+ conflicts.py # ConflictDetector with LLM client support
127
+ provenance.py # ProvenanceTracker DAG
128
+ wiki.py # WikiExporter — markdown knowledge base export
129
+ examples/
130
+ basic_usage.py # store, query, conflict detect, resolve, export
131
+ research_task.py # 3-agent research demo with full pipeline
132
+ benchmarks/
133
+ real_benchmark.py # real LLM benchmark (gpt-4o-mini)
134
+ generate_charts.py # generate all charts from results.json
135
+ results.json # raw benchmark data
136
+ results_summary.md # human-readable summary
137
+ tests/
138
+ test_artifact.py # artifact serialization and ID generation
139
+ test_store.py # persistence layer tests
140
+ test_conflicts.py # conflict detection tests
141
+ test_provenance.py # provenance DAG tests
142
+ ```
143
+
144
+ ## Examples
145
+
146
+ - `python examples/basic_usage.py` — store artifacts, query memory, detect and resolve conflicts, export a wiki. Good first run to verify installation.
147
+ - `python examples/research_task.py` — three agents research AI code editors, sharing findings through hivememory. Shows artifact reuse, conflict detection, provenance tracking, and wiki export end-to-end.
148
+
149
+ ![Token breakdown](screenshots/token_breakdown_pie.png)
150
+ *Where tokens go: baseline is all original research. hivememory splits tokens between original research, focused (memory-augmented) queries, and extraction.*
151
+
152
+ ## Setup
153
+
154
+ - Python 3.10+
155
+ - `pip install hivememory`
156
+ - Set `OPENAI_API_KEY` for LLM-based conflict detection (optional -- embedding-based detection works without it)
157
+ - Run `python examples/basic_usage.py` to verify
158
+
159
+ ## Related work
160
+
161
+ - Yu et al., "Multi-Agent Memory from a Computer Architecture Perspective: Visions and Challenges Ahead," Architecture 2.0 Workshop (UCSD/CMU), March 2026. Frames multi-agent memory as a systems problem and proposes structured memory hierarchies over flat context passing.
162
+ - Karpathy, "LLM Knowledge Bases" (blog post, 2025). Demonstrates single-agent knowledge accumulation with structured retrieval. hivememory extends this pattern to multi-agent systems, adding conflict detection and provenance tracking across agents.
163
+
164
+ Single-agent knowledge bases work. hivememory makes them multi-agent.
165
+
166
+ ---
167
+
168
+ MIT License
@@ -0,0 +1,148 @@
1
+ # hivememory
2
+
3
+ Shared reasoning memory for multi-agent systems.
4
+
5
+ When multiple AI agents research the same problem independently, they waste tokens re-deriving the same knowledge and produce contradictory conclusions no one catches. hivememory gives agents a shared memory layer where they store structured reasoning artifacts, reuse each other's work, and surface contradictions automatically.
6
+
7
+ [Project page (coming soon)](#)
8
+
9
+ ## Results
10
+
11
+ Benchmark: 3 agents research "Competitive Landscape of AI Code Editors in 2026" using gpt-4o-mini, with and without shared memory. Each agent researches 3 sub-topics. In the shared configuration, agents query hivememory before each LLM call — when prior findings exist, the agent receives a focused prompt that avoids redundant research.
12
+
13
+ | Metric | Baseline (no shared memory) | hivememory |
14
+ |---|---|---|
15
+ | Total tokens consumed | 11,896 | 9,810 (-17.5%) |
16
+ | Memory-augmented queries | 0 / 9 | 5 / 9 |
17
+ | Output quality (LLM-as-judge, avg 3 runs) | 8.8 | 9.0 |
18
+ | Contradiction-free score | 9.0 | 9.3 |
19
+ | Reuse rate | 0% | 56% |
20
+ | Wall clock time | 113.5s | 101.9s |
21
+
22
+ Token savings come from agents 2 and 3 receiving memory context that produces shorter, non-redundant LLM responses. Quality is equal or slightly better because memory-augmented agents build on verified findings rather than re-deriving from scratch.
23
+
24
+ ![Token consumption per agent](screenshots/token_consumption_per_agent.png)
25
+ *Agents 2 and 3 use fewer tokens when prior findings are available in memory.*
26
+
27
+ ![Total token consumption](screenshots/token_consumption_total.png)
28
+
29
+ ![Quality comparison](screenshots/quality_comparison.png)
30
+ *LLM-as-judge scores across 4 dimensions, averaged over 3 evaluation runs.*
31
+
32
+ ## Architecture
33
+
34
+ ```
35
+ agent-1 ──┐ ┌── conflict detection
36
+ agent-2 ──┼── hivememory API ────────┼── embedding search (FAISS)
37
+ agent-3 ──┘ write / query / └── provenance DAG
38
+ resolve / export
39
+
40
+ ┌─────┴─────┐
41
+ │ sqlite │
42
+ │ + FAISS │
43
+ │ index │
44
+ └───────────┘
45
+ ```
46
+
47
+ ![Artifact reuse flow](screenshots/artifact_reuse_flow.png)
48
+ *How artifacts flow between agents. Agent 1 writes findings; agents 2 and 3 query memory, reuse relevant work, and focus on gaps.*
49
+
50
+ ![Provenance DAG](screenshots/provenance_dag.png)
51
+ *Dependency graph of artifacts. Colors indicate source agent. Edges show "built on" relationships.*
52
+
53
+ ## Quickstart
54
+
55
+ ```bash
56
+ pip install hivememory
57
+ ```
58
+
59
+ ```python
60
+ from hivememory import HiveMemory, Evidence
61
+
62
+ hive = HiveMemory()
63
+
64
+ # store a finding
65
+ art = hive.write(
66
+ claim="Voice AI market projected to reach $50B by 2028",
67
+ evidence=[Evidence(source="industry report", content="35% CAGR", reliability=0.9)],
68
+ confidence=0.85,
69
+ agent_id="researcher-1",
70
+ )
71
+
72
+ # query shared memory before doing new research
73
+ existing = hive.query("voice AI market size", top_k=3)
74
+
75
+ # check for contradictions
76
+ open_conflicts = hive.get_conflicts()
77
+
78
+ # resolve
79
+ if open_conflicts:
80
+ hive.resolve_conflict(open_conflicts[0].id, winner_id=art.id,
81
+ reason="stronger evidence", resolved_by="supervisor")
82
+ ```
83
+
84
+ ## How it works
85
+
86
+ ### Reasoning artifacts
87
+
88
+ Agents store structured claims with evidence, confidence scores, and provenance links — not raw text. Each artifact records who produced it, what evidence supports it, and which prior artifacts it builds on. This structure makes artifacts queryable, comparable, and auditable.
89
+
90
+ ### Conflict detection
91
+
92
+ When a new artifact is stored, hivememory computes its embedding and searches FAISS for similar existing claims. If two artifacts are semantically close but have divergent confidence scores, a conflict is flagged. This first stage can be followed by an LLM contradiction check (OpenAI or Anthropic) for higher-precision detection.
93
+
94
+ ### Provenance tracking
95
+
96
+ Every artifact records its dependencies as a list of artifact IDs, forming a directed acyclic graph. This DAG answers "which agent's work did this conclusion build on?" and enables cascading invalidation — if an upstream artifact is superseded, downstream consumers can be notified.
97
+
98
+ ## Repo structure
99
+
100
+ ```
101
+ hivememory/
102
+ __init__.py # public API exports
103
+ artifact.py # ReasoningArtifact, Evidence, Conflict dataclasses
104
+ core.py # HiveMemory main class (FAISS + sqlite)
105
+ store.py # low-level persistence layer
106
+ conflicts.py # ConflictDetector with LLM client support
107
+ provenance.py # ProvenanceTracker DAG
108
+ wiki.py # WikiExporter — markdown knowledge base export
109
+ examples/
110
+ basic_usage.py # store, query, conflict detect, resolve, export
111
+ research_task.py # 3-agent research demo with full pipeline
112
+ benchmarks/
113
+ real_benchmark.py # real LLM benchmark (gpt-4o-mini)
114
+ generate_charts.py # generate all charts from results.json
115
+ results.json # raw benchmark data
116
+ results_summary.md # human-readable summary
117
+ tests/
118
+ test_artifact.py # artifact serialization and ID generation
119
+ test_store.py # persistence layer tests
120
+ test_conflicts.py # conflict detection tests
121
+ test_provenance.py # provenance DAG tests
122
+ ```
123
+
124
+ ## Examples
125
+
126
+ - `python examples/basic_usage.py` — store artifacts, query memory, detect and resolve conflicts, export a wiki. Good first run to verify installation.
127
+ - `python examples/research_task.py` — three agents research AI code editors, sharing findings through hivememory. Shows artifact reuse, conflict detection, provenance tracking, and wiki export end-to-end.
128
+
129
+ ![Token breakdown](screenshots/token_breakdown_pie.png)
130
+ *Where tokens go: baseline is all original research. hivememory splits tokens between original research, focused (memory-augmented) queries, and extraction.*
131
+
132
+ ## Setup
133
+
134
+ - Python 3.10+
135
+ - `pip install hivememory`
136
+ - Set `OPENAI_API_KEY` for LLM-based conflict detection (optional -- embedding-based detection works without it)
137
+ - Run `python examples/basic_usage.py` to verify
138
+
139
+ ## Related work
140
+
141
+ - Yu et al., "Multi-Agent Memory from a Computer Architecture Perspective: Visions and Challenges Ahead," Architecture 2.0 Workshop (UCSD/CMU), March 2026. Frames multi-agent memory as a systems problem and proposes structured memory hierarchies over flat context passing.
142
+ - Karpathy, "LLM Knowledge Bases" (blog post, 2025). Demonstrates single-agent knowledge accumulation with structured retrieval. hivememory extends this pattern to multi-agent systems, adding conflict detection and provenance tracking across agents.
143
+
144
+ Single-agent knowledge bases work. hivememory makes them multi-agent.
145
+
146
+ ---
147
+
148
+ MIT License
@@ -0,0 +1,13 @@
1
+ from hivememory.artifact import ReasoningArtifact, Evidence, Conflict
2
+ from hivememory.conflicts import ConflictDetector
3
+ from hivememory.core import HiveMemory
4
+ from hivememory.provenance import ProvenanceTracker
5
+
6
+ __all__ = [
7
+ "HiveMemory",
8
+ "ReasoningArtifact",
9
+ "Evidence",
10
+ "Conflict",
11
+ "ConflictDetector",
12
+ "ProvenanceTracker",
13
+ ]
@@ -0,0 +1,102 @@
1
+ from __future__ import annotations
2
+
3
+ import uuid
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime, timezone
6
+ from typing import Optional
7
+
8
+
9
+ @dataclass
10
+ class Evidence:
11
+ source: str
12
+ content: str
13
+ reliability: float = 1.0
14
+
15
+ def to_dict(self) -> dict:
16
+ return {
17
+ "source": self.source,
18
+ "content": self.content,
19
+ "reliability": self.reliability,
20
+ }
21
+
22
+ @classmethod
23
+ def from_dict(cls, data: dict) -> Evidence:
24
+ return cls(
25
+ source=data["source"],
26
+ content=data["content"],
27
+ reliability=data.get("reliability", 1.0),
28
+ )
29
+
30
+
31
+ @dataclass
32
+ class ReasoningArtifact:
33
+ claim: str
34
+ agent_id: str
35
+ id: str = field(default_factory=lambda: str(uuid.uuid4()))
36
+ evidence: list[Evidence] = field(default_factory=list)
37
+ confidence: float = 1.0
38
+ dependencies: list[str] = field(default_factory=list)
39
+ topic_embedding: list[float] = field(default_factory=list)
40
+ created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
41
+ status: str = "active"
42
+
43
+ def to_dict(self) -> dict:
44
+ return {
45
+ "id": self.id,
46
+ "claim": self.claim,
47
+ "evidence": [e.to_dict() for e in self.evidence],
48
+ "confidence": self.confidence,
49
+ "agent_id": self.agent_id,
50
+ "dependencies": self.dependencies,
51
+ "topic_embedding": self.topic_embedding,
52
+ "created_at": self.created_at.isoformat(),
53
+ "status": self.status,
54
+ }
55
+
56
+ @classmethod
57
+ def from_dict(cls, data: dict) -> ReasoningArtifact:
58
+ return cls(
59
+ id=data["id"],
60
+ claim=data["claim"],
61
+ evidence=[Evidence.from_dict(e) for e in data.get("evidence", [])],
62
+ confidence=data.get("confidence", 1.0),
63
+ agent_id=data["agent_id"],
64
+ dependencies=data.get("dependencies", []),
65
+ topic_embedding=data.get("topic_embedding", []),
66
+ created_at=datetime.fromisoformat(data["created_at"]),
67
+ status=data.get("status", "active"),
68
+ )
69
+
70
+
71
+ @dataclass
72
+ class Conflict:
73
+ id: str = field(default_factory=lambda: str(uuid.uuid4()))
74
+ artifact_ids: list[str] = field(default_factory=list)
75
+ description: str = ""
76
+ resolved: bool = False
77
+ winner_id: Optional[str] = None
78
+ resolution_reason: Optional[str] = None
79
+ resolved_by: Optional[str] = None
80
+
81
+ def to_dict(self) -> dict:
82
+ return {
83
+ "id": self.id,
84
+ "artifact_ids": self.artifact_ids,
85
+ "description": self.description,
86
+ "resolved": self.resolved,
87
+ "winner_id": self.winner_id,
88
+ "resolution_reason": self.resolution_reason,
89
+ "resolved_by": self.resolved_by,
90
+ }
91
+
92
+ @classmethod
93
+ def from_dict(cls, data: dict) -> Conflict:
94
+ return cls(
95
+ id=data["id"],
96
+ artifact_ids=data.get("artifact_ids", []),
97
+ description=data.get("description", ""),
98
+ resolved=data.get("resolved", False),
99
+ winner_id=data.get("winner_id"),
100
+ resolution_reason=data.get("resolution_reason"),
101
+ resolved_by=data.get("resolved_by"),
102
+ )
@@ -0,0 +1,137 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, Protocol
4
+
5
+ import numpy as np
6
+
7
+ from hivememory.artifact import Conflict, ReasoningArtifact
8
+
9
+
10
+ class LLMClient(Protocol):
11
+ def check_contradiction(self, a: ReasoningArtifact, b: ReasoningArtifact) -> str:
12
+ ...
13
+
14
+
15
+ class OpenAIConflictClient:
16
+ def __init__(self, model: str = "gpt-4o-mini"):
17
+ import openai
18
+
19
+ self.client = openai.OpenAI()
20
+ self.model = model
21
+
22
+ def check_contradiction(self, a: ReasoningArtifact, b: ReasoningArtifact) -> str:
23
+ a_sources = ", ".join(e.source for e in a.evidence)
24
+ b_sources = ", ".join(e.source for e in b.evidence)
25
+ prompt = (
26
+ "Two research agents produced these findings. "
27
+ "Do they contradict each other?\n\n"
28
+ f"Agent {a.agent_id} claims: {a.claim}\n"
29
+ f"Based on: {a_sources}\n\n"
30
+ f"Agent {b.agent_id} claims: {b.claim}\n"
31
+ f"Based on: {b_sources}\n\n"
32
+ "Respond with exactly one word: "
33
+ "CONTRADICTS, SUPPORTS, UNRELATED, or REFINES"
34
+ )
35
+ response = self.client.chat.completions.create(
36
+ model=self.model,
37
+ messages=[{"role": "user", "content": prompt}],
38
+ max_tokens=10,
39
+ )
40
+ return response.choices[0].message.content.strip().upper()
41
+
42
+
43
+ class AnthropicConflictClient:
44
+ def __init__(self, model: str = "claude-haiku-4-5-20251001"):
45
+ import anthropic
46
+
47
+ self.client = anthropic.Anthropic()
48
+ self.model = model
49
+
50
+ def check_contradiction(self, a: ReasoningArtifact, b: ReasoningArtifact) -> str:
51
+ a_sources = ", ".join(e.source for e in a.evidence)
52
+ b_sources = ", ".join(e.source for e in b.evidence)
53
+ prompt = (
54
+ "Two research agents produced these findings. "
55
+ "Do they contradict each other?\n\n"
56
+ f"Agent {a.agent_id} claims: {a.claim}\n"
57
+ f"Based on: {a_sources}\n\n"
58
+ f"Agent {b.agent_id} claims: {b.claim}\n"
59
+ f"Based on: {b_sources}\n\n"
60
+ "Respond with exactly one word: "
61
+ "CONTRADICTS, SUPPORTS, UNRELATED, or REFINES"
62
+ )
63
+ response = self.client.messages.create(
64
+ model=self.model,
65
+ max_tokens=10,
66
+ messages=[{"role": "user", "content": prompt}],
67
+ )
68
+ return response.content[0].text.strip().upper()
69
+
70
+
71
+ def _cosine_similarity(a: list[float], b: list[float]) -> float:
72
+ va = np.array(a, dtype=np.float32)
73
+ vb = np.array(b, dtype=np.float32)
74
+ norm_a = np.linalg.norm(va)
75
+ norm_b = np.linalg.norm(vb)
76
+ if norm_a == 0 or norm_b == 0:
77
+ return 0.0
78
+ return float(np.dot(va, vb) / (norm_a * norm_b))
79
+
80
+
81
+ class ConflictDetector:
82
+ def __init__(
83
+ self,
84
+ store,
85
+ llm_client: Optional[LLMClient] = None,
86
+ ):
87
+ self.store = store
88
+ self.llm_client = llm_client or OpenAIConflictClient()
89
+
90
+ def detect(
91
+ self,
92
+ new_artifact: ReasoningArtifact,
93
+ existing_artifacts: list[ReasoningArtifact],
94
+ ) -> list[Conflict]:
95
+ if not new_artifact.topic_embedding:
96
+ return []
97
+
98
+ # stage 1: cosine similarity filter
99
+ candidates = []
100
+ for existing in existing_artifacts:
101
+ if not existing.topic_embedding:
102
+ continue
103
+ if existing.id == new_artifact.id:
104
+ continue
105
+ sim = _cosine_similarity(
106
+ new_artifact.topic_embedding, existing.topic_embedding
107
+ )
108
+ if sim > 0.75:
109
+ candidates.append(existing)
110
+
111
+ # stage 2: LLM verification
112
+ conflicts = []
113
+ for candidate in candidates:
114
+ verdict = self.llm_client.check_contradiction(new_artifact, candidate)
115
+
116
+ if verdict == "CONTRADICTS":
117
+ conflict = Conflict(
118
+ artifact_ids=[new_artifact.id, candidate.id],
119
+ description=(
120
+ f"LLM detected contradiction between "
121
+ f"agent {new_artifact.agent_id} and "
122
+ f"agent {candidate.agent_id}"
123
+ ),
124
+ )
125
+ new_artifact.status = "contested"
126
+ candidate.status = "contested"
127
+ self.store._save_artifact(new_artifact)
128
+ self.store._save_artifact(candidate)
129
+ self.store._save_conflict(conflict)
130
+ conflicts.append(conflict)
131
+
132
+ elif verdict == "REFINES":
133
+ if candidate.id not in new_artifact.dependencies:
134
+ new_artifact.dependencies.append(candidate.id)
135
+ self.store._save_artifact(new_artifact)
136
+
137
+ return conflicts