argus-debate-ai 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- argus_debate_ai-1.0.0/LICENSE +21 -0
- argus_debate_ai-1.0.0/MANIFEST.in +23 -0
- argus_debate_ai-1.0.0/PKG-INFO +537 -0
- argus_debate_ai-1.0.0/PUBLISHING.md +186 -0
- argus_debate_ai-1.0.0/README.md +475 -0
- argus_debate_ai-1.0.0/argus/__init__.py +182 -0
- argus_debate_ai-1.0.0/argus/agents/__init__.py +56 -0
- argus_debate_ai-1.0.0/argus/agents/base.py +236 -0
- argus_debate_ai-1.0.0/argus/agents/jury.py +360 -0
- argus_debate_ai-1.0.0/argus/agents/moderator.py +410 -0
- argus_debate_ai-1.0.0/argus/agents/refuter.py +331 -0
- argus_debate_ai-1.0.0/argus/agents/specialist.py +369 -0
- argus_debate_ai-1.0.0/argus/cdag/__init__.py +49 -0
- argus_debate_ai-1.0.0/argus/cdag/edges.py +345 -0
- argus_debate_ai-1.0.0/argus/cdag/graph.py +709 -0
- argus_debate_ai-1.0.0/argus/cdag/nodes.py +452 -0
- argus_debate_ai-1.0.0/argus/cdag/propagation.py +484 -0
- argus_debate_ai-1.0.0/argus/cli.py +289 -0
- argus_debate_ai-1.0.0/argus/core/__init__.py +27 -0
- argus_debate_ai-1.0.0/argus/core/config.py +524 -0
- argus_debate_ai-1.0.0/argus/core/llm/__init__.py +48 -0
- argus_debate_ai-1.0.0/argus/core/llm/anthropic.py +335 -0
- argus_debate_ai-1.0.0/argus/core/llm/base.py +528 -0
- argus_debate_ai-1.0.0/argus/core/llm/gemini.py +350 -0
- argus_debate_ai-1.0.0/argus/core/llm/ollama.py +411 -0
- argus_debate_ai-1.0.0/argus/core/llm/openai.py +351 -0
- argus_debate_ai-1.0.0/argus/core/llm/registry.py +250 -0
- argus_debate_ai-1.0.0/argus/core/models.py +583 -0
- argus_debate_ai-1.0.0/argus/decision/__init__.py +55 -0
- argus_debate_ai-1.0.0/argus/decision/bayesian.py +369 -0
- argus_debate_ai-1.0.0/argus/decision/calibration.py +420 -0
- argus_debate_ai-1.0.0/argus/decision/eig.py +354 -0
- argus_debate_ai-1.0.0/argus/decision/planner.py +478 -0
- argus_debate_ai-1.0.0/argus/knowledge/__init__.py +48 -0
- argus_debate_ai-1.0.0/argus/knowledge/chunking.py +384 -0
- argus_debate_ai-1.0.0/argus/knowledge/embeddings.py +239 -0
- argus_debate_ai-1.0.0/argus/knowledge/indexing.py +411 -0
- argus_debate_ai-1.0.0/argus/knowledge/ingestion.py +389 -0
- argus_debate_ai-1.0.0/argus/orchestrator.py +293 -0
- argus_debate_ai-1.0.0/argus/provenance/__init__.py +28 -0
- argus_debate_ai-1.0.0/argus/provenance/integrity.py +250 -0
- argus_debate_ai-1.0.0/argus/provenance/ledger.py +361 -0
- argus_debate_ai-1.0.0/argus/py.typed +1 -0
- argus_debate_ai-1.0.0/argus/retrieval/__init__.py +33 -0
- argus_debate_ai-1.0.0/argus/retrieval/cite_critique.py +209 -0
- argus_debate_ai-1.0.0/argus/retrieval/hybrid.py +298 -0
- argus_debate_ai-1.0.0/argus/retrieval/reranker.py +164 -0
- argus_debate_ai-1.0.0/argus_debate_ai.egg-info/PKG-INFO +537 -0
- argus_debate_ai-1.0.0/argus_debate_ai.egg-info/SOURCES.txt +53 -0
- argus_debate_ai-1.0.0/argus_debate_ai.egg-info/dependency_links.txt +1 -0
- argus_debate_ai-1.0.0/argus_debate_ai.egg-info/entry_points.txt +2 -0
- argus_debate_ai-1.0.0/argus_debate_ai.egg-info/requires.txt +38 -0
- argus_debate_ai-1.0.0/argus_debate_ai.egg-info/top_level.txt +1 -0
- argus_debate_ai-1.0.0/pyproject.toml +131 -0
- argus_debate_ai-1.0.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Ankush Pandey, Rishi Ghodawat, Ronit Mehta
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Manifest for ARGUS package
|
|
2
|
+
|
|
3
|
+
# Include documentation
|
|
4
|
+
include README.md
|
|
5
|
+
include LICENSE
|
|
6
|
+
include PUBLISHING.md
|
|
7
|
+
|
|
8
|
+
# Include type hints marker
|
|
9
|
+
include argus/py.typed
|
|
10
|
+
|
|
11
|
+
# Include test data (but not tests themselves in sdist)
|
|
12
|
+
recursive-exclude tests *
|
|
13
|
+
|
|
14
|
+
# Exclude development files
|
|
15
|
+
exclude .gitignore
|
|
16
|
+
exclude .pre-commit-config.yaml
|
|
17
|
+
recursive-exclude .github *
|
|
18
|
+
|
|
19
|
+
# Exclude build artifacts
|
|
20
|
+
global-exclude *.pyc
|
|
21
|
+
global-exclude *.pyo
|
|
22
|
+
global-exclude __pycache__
|
|
23
|
+
global-exclude *.egg-info
|
|
@@ -0,0 +1,537 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: argus-debate-ai
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: ARGUS: A Debate-Native Multi-Agent AI Architecture for Accelerating Scientific Discovery
|
|
5
|
+
Author: Ankush Pandey, Rishi Ghodawat, Ronit Mehta
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/argus-ai/argus
|
|
8
|
+
Project-URL: Documentation, https://argus-ai.readthedocs.io
|
|
9
|
+
Project-URL: Repository, https://github.com/argus-ai/argus
|
|
10
|
+
Project-URL: Issues, https://github.com/argus-ai/argus/issues
|
|
11
|
+
Keywords: ai,multi-agent,debate,reasoning,scientific-discovery,llm,knowledge-graph,argumentation,bayesian-inference,provenance
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: pydantic<3.0,>=2.0
|
|
27
|
+
Requires-Dist: pydantic-settings<3.0,>=2.0
|
|
28
|
+
Requires-Dist: numpy<2.0,>=1.24
|
|
29
|
+
Requires-Dist: scipy<2.0,>=1.10
|
|
30
|
+
Requires-Dist: networkx<4.0,>=3.0
|
|
31
|
+
Requires-Dist: litellm<2.0,>=1.40
|
|
32
|
+
Requires-Dist: openai<2.0,>=1.30
|
|
33
|
+
Requires-Dist: anthropic<1.0,>=0.25
|
|
34
|
+
Requires-Dist: google-generativeai<1.0,>=0.5
|
|
35
|
+
Requires-Dist: sentence-transformers<3.0,>=2.2
|
|
36
|
+
Requires-Dist: rank-bm25<1.0,>=0.2
|
|
37
|
+
Requires-Dist: faiss-cpu<2.0,>=1.7
|
|
38
|
+
Requires-Dist: pymupdf<2.0,>=1.23
|
|
39
|
+
Requires-Dist: beautifulsoup4<5.0,>=4.12
|
|
40
|
+
Requires-Dist: lxml<6.0,>=4.9
|
|
41
|
+
Requires-Dist: chardet<6.0,>=5.0
|
|
42
|
+
Requires-Dist: click<9.0,>=8.0
|
|
43
|
+
Requires-Dist: rich<14.0,>=13.0
|
|
44
|
+
Requires-Dist: python-dotenv<2.0,>=1.0
|
|
45
|
+
Requires-Dist: httpx<1.0,>=0.25
|
|
46
|
+
Requires-Dist: tenacity<9.0,>=8.2
|
|
47
|
+
Requires-Dist: tiktoken<1.0,>=0.5
|
|
48
|
+
Provides-Extra: dev
|
|
49
|
+
Requires-Dist: pytest<9.0,>=7.4; extra == "dev"
|
|
50
|
+
Requires-Dist: pytest-asyncio<1.0,>=0.21; extra == "dev"
|
|
51
|
+
Requires-Dist: pytest-cov<6.0,>=4.1; extra == "dev"
|
|
52
|
+
Requires-Dist: black<25.0,>=23.0; extra == "dev"
|
|
53
|
+
Requires-Dist: ruff<1.0,>=0.1; extra == "dev"
|
|
54
|
+
Requires-Dist: mypy<2.0,>=1.5; extra == "dev"
|
|
55
|
+
Requires-Dist: pre-commit<4.0,>=3.4; extra == "dev"
|
|
56
|
+
Provides-Extra: ollama
|
|
57
|
+
Requires-Dist: ollama<1.0,>=0.2; extra == "ollama"
|
|
58
|
+
Provides-Extra: all
|
|
59
|
+
Requires-Dist: argus-debate-ai[dev]; extra == "all"
|
|
60
|
+
Requires-Dist: argus-debate-ai[ollama]; extra == "all"
|
|
61
|
+
Dynamic: license-file
|
|
62
|
+
|
|
63
|
+
# ARGUS
|
|
64
|
+
|
|
65
|
+
**Agentic Research & Governance Unified System**
|
|
66
|
+
|
|
67
|
+
*A debate-native, multi-agent AI framework for evidence-based reasoning with structured argumentation, decision-theoretic planning, and full provenance tracking.*
|
|
68
|
+
|
|
69
|
+
[](https://www.python.org/downloads/)
|
|
70
|
+
[](https://opensource.org/licenses/MIT)
|
|
71
|
+
[](https://pypi.org/project/argus-debate-ai/)
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Overview
|
|
76
|
+
|
|
77
|
+
ARGUS implements **Research Debate Chain (RDC)** - a novel approach to AI reasoning that structures knowledge evaluation as multi-agent debates. Instead of single-pass inference, ARGUS orchestrates specialist agents that gather evidence, generate rebuttals, and render verdicts through Bayesian aggregation.
|
|
78
|
+
|
|
79
|
+
### Key Innovations
|
|
80
|
+
|
|
81
|
+
- **Conceptual Debate Graph (C-DAG)**: A directed graph structure where propositions, evidence, and rebuttals are nodes with signed edges representing support/attack relationships
|
|
82
|
+
- **Evidence-Directed Debate Orchestration (EDDO)**: Algorithm for managing multi-round debates with stopping criteria
|
|
83
|
+
- **Value of Information Planning**: Decision-theoretic experiment selection using Expected Information Gain
|
|
84
|
+
- **Full Provenance**: PROV-O compatible ledger with hash-chain integrity for audit trails
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## Features
|
|
89
|
+
|
|
90
|
+
### Multi-Agent Debate System
|
|
91
|
+
- **Moderator**: Creates debate agendas, manages rounds, evaluates stopping criteria
|
|
92
|
+
- **Specialist Agents**: Domain-specific evidence gathering with hybrid retrieval
|
|
93
|
+
- **Refuter**: Generates counter-evidence and methodological critiques
|
|
94
|
+
- **Jury**: Aggregates evidence via Bayesian updating, renders verdicts
|
|
95
|
+
|
|
96
|
+
### Conceptual Debate Graph (C-DAG)
|
|
97
|
+
- **Node Types**: Propositions, Evidence, Rebuttals, Findings, Assumptions
|
|
98
|
+
- **Edge Types**: Supports, Attacks, Refines, Rebuts with signed weights
|
|
99
|
+
- **Propagation**: Log-odds Bayesian belief updating across the graph
|
|
100
|
+
- **Visualization**: Export to NetworkX for analysis
|
|
101
|
+
|
|
102
|
+
### Hybrid Retrieval System
|
|
103
|
+
- **BM25 Sparse**: Traditional keyword-based retrieval
|
|
104
|
+
- **FAISS Dense**: Semantic vector search with sentence-transformers
|
|
105
|
+
- **Fusion Methods**: Weighted combination or Reciprocal Rank Fusion (RRF)
|
|
106
|
+
- **Cross-Encoder Reranking**: Neural reranking for precision
|
|
107
|
+
|
|
108
|
+
### Decision-Theoretic Planning
|
|
109
|
+
- **Expected Information Gain (EIG)**: Monte Carlo estimation for experiment value
|
|
110
|
+
- **VoI Planner**: Knapsack-based optimal action selection under budget
|
|
111
|
+
- **Calibration**: Brier score, ECE, temperature scaling for confidence tuning
|
|
112
|
+
|
|
113
|
+
### Provenance & Governance
|
|
114
|
+
- **PROV-O Compatible**: W3C standard provenance model
|
|
115
|
+
- **Hash-Chain Integrity**: SHA-256 linked events for tamper detection
|
|
116
|
+
- **Attestations**: Cryptographic proofs for content integrity
|
|
117
|
+
- **Query API**: Filter events by entity, agent, time range
|
|
118
|
+
|
|
119
|
+
### LLM Provider Support
|
|
120
|
+
|
|
121
|
+
| Provider | Models | Features |
|
|
122
|
+
|----------|--------|----------|
|
|
123
|
+
| **OpenAI** | GPT-4o, GPT-4, GPT-3.5 | Generate, Stream, Embed |
|
|
124
|
+
| **Anthropic** | Claude 3.5, Claude 3 | Generate, Stream |
|
|
125
|
+
| **Google** | Gemini 1.5 Pro/Flash | Generate, Stream, Embed |
|
|
126
|
+
| **Ollama** | Llama, Mistral, Phi | Local deployment |
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Installation
|
|
131
|
+
|
|
132
|
+
### From PyPI
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
pip install argus-debate-ai
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### From Source
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
git clone https://github.com/argus-ai/argus.git
|
|
142
|
+
cd argus
|
|
143
|
+
pip install -e ".[dev]"
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Optional Dependencies
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
# For all features including dev tools
|
|
150
|
+
pip install argus-debate-ai[all]
|
|
151
|
+
|
|
152
|
+
# For Ollama local LLM support
|
|
153
|
+
pip install argus-debate-ai[ollama]
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## Quick Start
|
|
159
|
+
|
|
160
|
+
### Basic Usage
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
from argus import RDCOrchestrator, get_llm
|
|
164
|
+
|
|
165
|
+
# Initialize with any supported LLM
|
|
166
|
+
llm = get_llm("openai", model="gpt-4o")
|
|
167
|
+
|
|
168
|
+
# Run a debate on a proposition
|
|
169
|
+
orchestrator = RDCOrchestrator(llm=llm, max_rounds=5)
|
|
170
|
+
result = orchestrator.debate(
|
|
171
|
+
"The new treatment reduces symptoms by more than 20%",
|
|
172
|
+
prior=0.5, # Start with 50/50 uncertainty
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
print(f"Verdict: {result.verdict.label}")
|
|
176
|
+
print(f"Posterior: {result.verdict.posterior:.3f}")
|
|
177
|
+
print(f"Evidence: {result.num_evidence} items")
|
|
178
|
+
print(f"Reasoning: {result.verdict.reasoning}")
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Building a Debate Graph Manually
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
from argus import CDAG, Proposition, Evidence, EdgeType
|
|
185
|
+
from argus.cdag.nodes import EvidenceType
|
|
186
|
+
from argus.cdag.propagation import compute_posterior
|
|
187
|
+
|
|
188
|
+
# Create the graph
|
|
189
|
+
graph = CDAG(name="drug_efficacy_debate")
|
|
190
|
+
|
|
191
|
+
# Add the proposition to evaluate
|
|
192
|
+
prop = Proposition(
|
|
193
|
+
text="Drug X is effective for treating condition Y",
|
|
194
|
+
prior=0.5,
|
|
195
|
+
domain="clinical",
|
|
196
|
+
)
|
|
197
|
+
graph.add_proposition(prop)
|
|
198
|
+
|
|
199
|
+
# Add supporting evidence
|
|
200
|
+
trial_evidence = Evidence(
|
|
201
|
+
text="Phase 3 RCT showed 35% symptom reduction (n=500, p<0.001)",
|
|
202
|
+
evidence_type=EvidenceType.EMPIRICAL,
|
|
203
|
+
polarity=1, # Supports
|
|
204
|
+
confidence=0.9,
|
|
205
|
+
relevance=0.95,
|
|
206
|
+
quality=0.85,
|
|
207
|
+
)
|
|
208
|
+
graph.add_evidence(trial_evidence, prop.id, EdgeType.SUPPORTS)
|
|
209
|
+
|
|
210
|
+
# Add challenging evidence
|
|
211
|
+
side_effect = Evidence(
|
|
212
|
+
text="15% of patients experienced adverse events",
|
|
213
|
+
evidence_type=EvidenceType.EMPIRICAL,
|
|
214
|
+
polarity=-1, # Attacks
|
|
215
|
+
confidence=0.8,
|
|
216
|
+
relevance=0.7,
|
|
217
|
+
)
|
|
218
|
+
graph.add_evidence(side_effect, prop.id, EdgeType.ATTACKS)
|
|
219
|
+
|
|
220
|
+
# Compute Bayesian posterior
|
|
221
|
+
posterior = compute_posterior(graph, prop.id)
|
|
222
|
+
print(f"Posterior probability: {posterior:.3f}")
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Document Ingestion & Retrieval
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
from argus import DocumentLoader, Chunker, EmbeddingGenerator
|
|
229
|
+
from argus.retrieval import HybridRetriever
|
|
230
|
+
|
|
231
|
+
# Load documents
|
|
232
|
+
loader = DocumentLoader()
|
|
233
|
+
doc = loader.load("research_paper.pdf")
|
|
234
|
+
|
|
235
|
+
# Chunk with overlap
|
|
236
|
+
chunker = Chunker(chunk_size=512, chunk_overlap=50)
|
|
237
|
+
chunks = chunker.chunk(doc)
|
|
238
|
+
|
|
239
|
+
# Create hybrid retriever
|
|
240
|
+
retriever = HybridRetriever(
|
|
241
|
+
embedding_model="all-MiniLM-L6-v2",
|
|
242
|
+
lambda_param=0.7, # Weight toward dense retrieval
|
|
243
|
+
use_reranker=True,
|
|
244
|
+
)
|
|
245
|
+
retriever.index_chunks(chunks)
|
|
246
|
+
|
|
247
|
+
# Search
|
|
248
|
+
results = retriever.retrieve("treatment efficacy results", top_k=10)
|
|
249
|
+
for r in results:
|
|
250
|
+
print(f"[{r.rank}] Score: {r.score:.3f} - {r.chunk.text[:100]}...")
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### Multi-Agent Debate
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
from argus import get_llm
|
|
257
|
+
from argus.agents import Moderator, Specialist, Refuter, Jury
|
|
258
|
+
from argus import CDAG, Proposition
|
|
259
|
+
|
|
260
|
+
llm = get_llm("anthropic", model="claude-3-5-sonnet-20241022")
|
|
261
|
+
|
|
262
|
+
# Initialize agents
|
|
263
|
+
moderator = Moderator(llm)
|
|
264
|
+
specialist = Specialist(llm, domain="clinical")
|
|
265
|
+
refuter = Refuter(llm)
|
|
266
|
+
jury = Jury(llm)
|
|
267
|
+
|
|
268
|
+
# Create debate
|
|
269
|
+
graph = CDAG()
|
|
270
|
+
prop = Proposition(text="The intervention is cost-effective", prior=0.5)
|
|
271
|
+
graph.add_proposition(prop)
|
|
272
|
+
|
|
273
|
+
# Moderator creates agenda
|
|
274
|
+
agenda = moderator.create_agenda(graph, prop.id)
|
|
275
|
+
|
|
276
|
+
# Specialists gather evidence
|
|
277
|
+
evidence = specialist.gather_evidence(graph, prop.id)
|
|
278
|
+
|
|
279
|
+
# Refuter challenges
|
|
280
|
+
rebuttals = refuter.generate_rebuttals(graph, prop.id)
|
|
281
|
+
|
|
282
|
+
# Jury renders verdict
|
|
283
|
+
verdict = jury.evaluate(graph, prop.id)
|
|
284
|
+
print(f"Verdict: {verdict.label} (posterior={verdict.posterior:.3f})")
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## Command Line Interface
|
|
290
|
+
|
|
291
|
+
ARGUS provides a CLI for common operations:
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
# Run a debate
|
|
295
|
+
argus debate "The hypothesis is supported by evidence" --prior 0.5 --rounds 3
|
|
296
|
+
|
|
297
|
+
# Quick evaluation (single LLM call)
|
|
298
|
+
argus evaluate "Climate change increases wildfire frequency"
|
|
299
|
+
|
|
300
|
+
# Ingest documents into index
|
|
301
|
+
argus ingest ./documents --output ./index
|
|
302
|
+
|
|
303
|
+
# Show configuration
|
|
304
|
+
argus config
|
|
305
|
+
|
|
306
|
+
# Specify provider
|
|
307
|
+
argus debate "Query" --provider anthropic --model claude-3-5-sonnet-20241022
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
---
|
|
311
|
+
|
|
312
|
+
## Configuration
|
|
313
|
+
|
|
314
|
+
### Environment Variables
|
|
315
|
+
|
|
316
|
+
```bash
|
|
317
|
+
# LLM API Keys
|
|
318
|
+
export OPENAI_API_KEY="sk-..."
|
|
319
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
320
|
+
export GOOGLE_API_KEY="..."
|
|
321
|
+
|
|
322
|
+
# Default settings
|
|
323
|
+
export ARGUS_DEFAULT_PROVIDER="openai"
|
|
324
|
+
export ARGUS_DEFAULT_MODEL="gpt-4o"
|
|
325
|
+
export ARGUS_TEMPERATURE="0.7"
|
|
326
|
+
export ARGUS_MAX_TOKENS="2048"
|
|
327
|
+
|
|
328
|
+
# Ollama (local)
|
|
329
|
+
export ARGUS_OLLAMA_HOST="http://localhost:11434"
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
### Programmatic Configuration
|
|
333
|
+
|
|
334
|
+
```python
|
|
335
|
+
from argus import ArgusConfig, get_config
|
|
336
|
+
|
|
337
|
+
config = ArgusConfig(
|
|
338
|
+
default_provider="anthropic",
|
|
339
|
+
default_model="claude-3-5-sonnet-20241022",
|
|
340
|
+
temperature=0.5,
|
|
341
|
+
max_tokens=4096,
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# Or get global config
|
|
345
|
+
config = get_config()
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
---
|
|
349
|
+
|
|
350
|
+
## Architecture
|
|
351
|
+
|
|
352
|
+
```
|
|
353
|
+
+-----------------------------------------------------------------------------+
|
|
354
|
+
| ARGUS Architecture |
|
|
355
|
+
+-----------------------------------------------------------------------------+
|
|
356
|
+
| |
|
|
357
|
+
| +---------------+ +---------------+ +---------------+ |
|
|
358
|
+
| | Moderator |--->| Specialists |--->| Refuter | |
|
|
359
|
+
| | (Planner) | | (Evidence) | | (Challenges) | |
|
|
360
|
+
| +-------+-------+ +-------+-------+ +-------+-------+ |
|
|
361
|
+
| | | | |
|
|
362
|
+
| v v v |
|
|
363
|
+
| +---------------------------------------------------------------------+ |
|
|
364
|
+
| | C-DAG (Debate Graph) | |
|
|
365
|
+
| | +--------+ +----------+ +----------+ | |
|
|
366
|
+
| | | Props |---->| Evidence |---->| Rebuttals| | |
|
|
367
|
+
| | +--------+ +----------+ +----------+ | |
|
|
368
|
+
| | ^ | | | |
|
|
369
|
+
| | +--------------+---------------+ | |
|
|
370
|
+
| | Signed Influence Propagation | |
|
|
371
|
+
| +---------------------------------------------------------------------+ |
|
|
372
|
+
| | |
|
|
373
|
+
| v |
|
|
374
|
+
| +---------------------------------------------------------------------+ |
|
|
375
|
+
| | Jury (Verdict) | |
|
|
376
|
+
| | Bayesian Aggregation -> Posterior -> Label | |
|
|
377
|
+
| +---------------------------------------------------------------------+ |
|
|
378
|
+
| |
|
|
379
|
+
| +-----------------+ +-----------------+ +-----------------+ |
|
|
380
|
+
| | Knowledge Layer | | Decision Layer | | Provenance | |
|
|
381
|
+
| | - Ingestion | | - Bayesian | | - PROV-O Ledger | |
|
|
382
|
+
| | - Chunking | | - EIG/VoI | | - Hash Chain | |
|
|
383
|
+
| | - Hybrid Index | | - Calibration | | - Attestations | |
|
|
384
|
+
| +-----------------+ +-----------------+ +-----------------+ |
|
|
385
|
+
| |
|
|
386
|
+
+-----------------------------------------------------------------------------+
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
### Module Overview
|
|
390
|
+
|
|
391
|
+
| Module | Description |
|
|
392
|
+
|--------|-------------|
|
|
393
|
+
| `argus.core` | Configuration, data models, LLM abstractions |
|
|
394
|
+
| `argus.cdag` | Conceptual Debate Graph implementation |
|
|
395
|
+
| `argus.decision` | Bayesian updating, EIG, VoI planning, calibration |
|
|
396
|
+
| `argus.knowledge` | Document ingestion, chunking, embeddings, indexing |
|
|
397
|
+
| `argus.retrieval` | Hybrid retrieval, reranking |
|
|
398
|
+
| `argus.agents` | Moderator, Specialist, Refuter, Jury agents |
|
|
399
|
+
| `argus.provenance` | PROV-O ledger, integrity, attestations |
|
|
400
|
+
| `argus.orchestrator` | RDC orchestration engine |
|
|
401
|
+
|
|
402
|
+
---
|
|
403
|
+
|
|
404
|
+
## Algorithms
|
|
405
|
+
|
|
406
|
+
### Signed Influence Propagation
|
|
407
|
+
|
|
408
|
+
The C-DAG uses log-odds space for numerically stable belief propagation:
|
|
409
|
+
|
|
410
|
+
```
|
|
411
|
+
posterior = sigmoid(log-odds(prior) + sum(signed_weight_i * log(LR_i)))
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
Where:
|
|
415
|
+
- `sigmoid` is the logistic function
|
|
416
|
+
- `LR_i` is the likelihood ratio for evidence i
|
|
417
|
+
- `signed_weight = polarity * confidence * relevance * quality`
|
|
418
|
+
|
|
419
|
+
### Expected Information Gain
|
|
420
|
+
|
|
421
|
+
For experiment planning, ARGUS computes EIG via Monte Carlo:
|
|
422
|
+
|
|
423
|
+
```
|
|
424
|
+
EIG(a) = H(p) - E_y[H(p|y)]
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
Where `H` is entropy and the expectation is over possible outcomes.
|
|
428
|
+
|
|
429
|
+
### Calibration
|
|
430
|
+
|
|
431
|
+
Temperature scaling optimizes:
|
|
432
|
+
|
|
433
|
+
```
|
|
434
|
+
T* = argmin_T sum(-y_i * log(sigmoid(z_i/T)) - (1-y_i) * log(1-sigmoid(z_i/T)))
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
---
|
|
438
|
+
|
|
439
|
+
## Testing
|
|
440
|
+
|
|
441
|
+
```bash
|
|
442
|
+
# Run all tests
|
|
443
|
+
pytest
|
|
444
|
+
|
|
445
|
+
# Run with coverage
|
|
446
|
+
pytest --cov=argus --cov-report=html
|
|
447
|
+
|
|
448
|
+
# Run specific test module
|
|
449
|
+
pytest tests/unit/test_cdag.py -v
|
|
450
|
+
|
|
451
|
+
# Run integration tests
|
|
452
|
+
pytest tests/integration/ -v
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
---
|
|
456
|
+
|
|
457
|
+
## Examples
|
|
458
|
+
|
|
459
|
+
### Clinical Evidence Evaluation
|
|
460
|
+
|
|
461
|
+
```python
|
|
462
|
+
from argus import RDCOrchestrator, get_llm
|
|
463
|
+
from argus.retrieval import HybridRetriever
|
|
464
|
+
|
|
465
|
+
# Load clinical literature
|
|
466
|
+
retriever = HybridRetriever()
|
|
467
|
+
retriever.index_chunks(clinical_chunks)
|
|
468
|
+
|
|
469
|
+
# Evaluate treatment claim
|
|
470
|
+
orchestrator = RDCOrchestrator(
|
|
471
|
+
llm=get_llm("openai", model="gpt-4o"),
|
|
472
|
+
max_rounds=5,
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
result = orchestrator.debate(
|
|
476
|
+
"Metformin reduces HbA1c by >1% in Type 2 diabetes",
|
|
477
|
+
prior=0.6, # Prior based on existing knowledge
|
|
478
|
+
retriever=retriever,
|
|
479
|
+
domain="clinical",
|
|
480
|
+
)
|
|
481
|
+
```
|
|
482
|
+
|
|
483
|
+
### Research Claim Verification
|
|
484
|
+
|
|
485
|
+
```python
|
|
486
|
+
from argus import CDAG, Proposition, Evidence
|
|
487
|
+
from argus.cdag.propagation import compute_all_posteriors
|
|
488
|
+
|
|
489
|
+
graph = CDAG(name="research_verification")
|
|
490
|
+
|
|
491
|
+
# Main claim
|
|
492
|
+
claim = Proposition(
|
|
493
|
+
text="Neural scaling laws predict emergent capabilities",
|
|
494
|
+
prior=0.5,
|
|
495
|
+
)
|
|
496
|
+
graph.add_proposition(claim)
|
|
497
|
+
|
|
498
|
+
# Add evidence from multiple papers
|
|
499
|
+
# ... (add supporting/attacking evidence)
|
|
500
|
+
|
|
501
|
+
# Compute all posteriors
|
|
502
|
+
posteriors = compute_all_posteriors(graph)
|
|
503
|
+
|
|
504
|
+
for prop_id, posterior in posteriors.items():
|
|
505
|
+
prop = graph.get_proposition(prop_id)
|
|
506
|
+
print(f"{prop.text[:50]}... : {posterior:.3f}")
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
---
|
|
510
|
+
|
|
511
|
+
## Contributing
|
|
512
|
+
|
|
513
|
+
We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
514
|
+
|
|
515
|
+
1. Fork the repository
|
|
516
|
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
517
|
+
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
|
518
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
519
|
+
5. Open a Pull Request
|
|
520
|
+
|
|
521
|
+
---
|
|
522
|
+
|
|
523
|
+
## License
|
|
524
|
+
|
|
525
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
526
|
+
|
|
527
|
+
---
|
|
528
|
+
|
|
529
|
+
## Acknowledgments
|
|
530
|
+
|
|
531
|
+
- Inspired by debate-native reasoning approaches in AI safety research
|
|
532
|
+
- Built on excellent open-source libraries: Pydantic, NetworkX, FAISS, Sentence-Transformers
|
|
533
|
+
- LLM integrations powered by OpenAI, Anthropic, and Google APIs
|
|
534
|
+
|
|
535
|
+
---
|
|
536
|
+
|
|
537
|
+
**[Documentation](https://argus-ai.readthedocs.io)** | **[PyPI](https://pypi.org/project/argus-debate-ai/)** | **[GitHub](https://github.com/argus-ai/argus)**
|