dodar 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dodar-0.1.0/.gitignore +41 -0
- dodar-0.1.0/PKG-INFO +95 -0
- dodar-0.1.0/README.md +59 -0
- dodar-0.1.0/pyproject.toml +46 -0
- dodar-0.1.0/src/dodar/__init__.py +37 -0
- dodar-0.1.0/src/dodar/core.py +335 -0
- dodar-0.1.0/src/dodar/prompts.py +159 -0
- dodar-0.1.0/src/dodar/runners.py +156 -0
dodar-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
.eggs/
|
|
8
|
+
*.egg
|
|
9
|
+
|
|
10
|
+
# Virtual environments
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
env/
|
|
14
|
+
|
|
15
|
+
# IDE
|
|
16
|
+
.idea/
|
|
17
|
+
.vscode/
|
|
18
|
+
*.swp
|
|
19
|
+
*.swo
|
|
20
|
+
|
|
21
|
+
# Environment
|
|
22
|
+
.env
|
|
23
|
+
.env.local
|
|
24
|
+
|
|
25
|
+
# Data (runs and scores are generated, not committed)
|
|
26
|
+
backend/data/runs/
|
|
27
|
+
backend/data/scores/
|
|
28
|
+
|
|
29
|
+
# Node
|
|
30
|
+
node_modules/
|
|
31
|
+
frontend/dist/
|
|
32
|
+
frontend/.vite/
|
|
33
|
+
|
|
34
|
+
# OS
|
|
35
|
+
.DS_Store
|
|
36
|
+
Thumbs.db
|
|
37
|
+
|
|
38
|
+
# Testing
|
|
39
|
+
.pytest_cache/
|
|
40
|
+
.coverage
|
|
41
|
+
htmlcov/
|
dodar-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dodar
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: DODAR — Structured reasoning framework for AI agents, adapted from aviation CRM
|
|
5
|
+
Project-URL: Homepage, https://dodar.crox.io
|
|
6
|
+
Project-URL: Documentation, https://dodar.crox.io/framework
|
|
7
|
+
Project-URL: Repository, https://github.com/afieldofdreams/dodar
|
|
8
|
+
Project-URL: Research, https://dodar.crox.io/research
|
|
9
|
+
Author-email: Adam Field <adam@crox.io>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
Keywords: agents,ai,crm,decision-making,dodar,llm,reasoning
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: anthropic>=0.30
|
|
24
|
+
Requires-Dist: openai>=1.30
|
|
25
|
+
Provides-Extra: all
|
|
26
|
+
Requires-Dist: google-genai>=1.0; extra == 'all'
|
|
27
|
+
Requires-Dist: httpx>=0.27; extra == 'all'
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
31
|
+
Provides-Extra: google
|
|
32
|
+
Requires-Dist: google-genai>=1.0; extra == 'google'
|
|
33
|
+
Provides-Extra: ollama
|
|
34
|
+
Requires-Dist: httpx>=0.27; extra == 'ollama'
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
# DODAR
|
|
38
|
+
|
|
39
|
+
**Structured reasoning framework for AI agents** — adapted from aviation Crew Resource Management.
|
|
40
|
+
|
|
41
|
+
DODAR (Diagnose, Options, Decide, Action, Review) imposes explicit gates at each stage of analysis, preventing the reasoning failures that LLMs share with humans under pressure: premature anchoring, option narrowing, and treating decisions as final.
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install dodar
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Quick start
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from dodar import DODAR
|
|
53
|
+
|
|
54
|
+
dodar = DODAR(model="gpt-4.1-mini")
|
|
55
|
+
result = dodar.analyze("Your scenario here...")
|
|
56
|
+
|
|
57
|
+
# Structured access to each reasoning phase
|
|
58
|
+
result.diagnosis.hypotheses # Ranked competing causes
|
|
59
|
+
result.options.alternatives # Distinct paths with trade-offs
|
|
60
|
+
result.decision.recommendation # The call + justification
|
|
61
|
+
result.action.steps # Sequenced implementation plan
|
|
62
|
+
result.review.failure_modes # Self-critique
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Pipeline mode
|
|
66
|
+
|
|
67
|
+
For maximum quality, use the pipeline where each DODAR phase runs as a separate model call:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from dodar import DODAR
|
|
71
|
+
|
|
72
|
+
dodar = DODAR(model="gpt-4.1-mini", mode="pipeline")
|
|
73
|
+
result = dodar.analyze("Your scenario here...")
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Research shows GPT-4.1 Mini + pipeline scores 104% of Claude Opus 4.6 zero-shot quality at 89% lower cost. [Read the whitepaper](https://dodar.crox.io/research).
|
|
77
|
+
|
|
78
|
+
## Supported models
|
|
79
|
+
|
|
80
|
+
| Provider | Models |
|
|
81
|
+
|----------|--------|
|
|
82
|
+
| Anthropic | claude-opus-4-6, claude-sonnet-4-5, claude-haiku-4-5 |
|
|
83
|
+
| OpenAI | gpt-5.4, gpt-4o, gpt-4o-mini, gpt-4.1-mini, gpt-4.1-nano |
|
|
84
|
+
| Google | gemini-2.0-flash (install with `pip install dodar[google]`) |
|
|
85
|
+
| Ollama | Any local model (install with `pip install dodar[ollama]`) |
|
|
86
|
+
|
|
87
|
+
## Links
|
|
88
|
+
|
|
89
|
+
- [Documentation](https://dodar.crox.io/framework)
|
|
90
|
+
- [Research & whitepaper](https://dodar.crox.io/research)
|
|
91
|
+
- [GitHub](https://github.com/afieldofdreams/dodar)
|
|
92
|
+
|
|
93
|
+
## License
|
|
94
|
+
|
|
95
|
+
MIT
|
dodar-0.1.0/README.md
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# DODAR
|
|
2
|
+
|
|
3
|
+
**Structured reasoning framework for AI agents** — adapted from aviation Crew Resource Management.
|
|
4
|
+
|
|
5
|
+
DODAR (Diagnose, Options, Decide, Action, Review) imposes explicit gates at each stage of analysis, preventing the reasoning failures that LLMs share with humans under pressure: premature anchoring, option narrowing, and treating decisions as final.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install dodar
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from dodar import DODAR
|
|
17
|
+
|
|
18
|
+
dodar = DODAR(model="gpt-4.1-mini")
|
|
19
|
+
result = dodar.analyze("Your scenario here...")
|
|
20
|
+
|
|
21
|
+
# Structured access to each reasoning phase
|
|
22
|
+
result.diagnosis.hypotheses # Ranked competing causes
|
|
23
|
+
result.options.alternatives # Distinct paths with trade-offs
|
|
24
|
+
result.decision.recommendation # The call + justification
|
|
25
|
+
result.action.steps # Sequenced implementation plan
|
|
26
|
+
result.review.failure_modes # Self-critique
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Pipeline mode
|
|
30
|
+
|
|
31
|
+
For maximum quality, use the pipeline where each DODAR phase runs as a separate model call:
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from dodar import DODAR
|
|
35
|
+
|
|
36
|
+
dodar = DODAR(model="gpt-4.1-mini", mode="pipeline")
|
|
37
|
+
result = dodar.analyze("Your scenario here...")
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Research shows GPT-4.1 Mini + pipeline scores 104% of Claude Opus 4.6 zero-shot quality at 89% lower cost. [Read the whitepaper](https://dodar.crox.io/research).
|
|
41
|
+
|
|
42
|
+
## Supported models
|
|
43
|
+
|
|
44
|
+
| Provider | Models |
|
|
45
|
+
|----------|--------|
|
|
46
|
+
| Anthropic | claude-opus-4-6, claude-sonnet-4-5, claude-haiku-4-5 |
|
|
47
|
+
| OpenAI | gpt-5.4, gpt-4o, gpt-4o-mini, gpt-4.1-mini, gpt-4.1-nano |
|
|
48
|
+
| Google | gemini-2.0-flash (install with `pip install dodar[google]`) |
|
|
49
|
+
| Ollama | Any local model (install with `pip install dodar[ollama]`) |
|
|
50
|
+
|
|
51
|
+
## Links
|
|
52
|
+
|
|
53
|
+
- [Documentation](https://dodar.crox.io/framework)
|
|
54
|
+
- [Research & whitepaper](https://dodar.crox.io/research)
|
|
55
|
+
- [GitHub](https://github.com/afieldofdreams/dodar)
|
|
56
|
+
|
|
57
|
+
## License
|
|
58
|
+
|
|
59
|
+
MIT
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "dodar"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "DODAR — Structured reasoning framework for AI agents, adapted from aviation CRM"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
requires-python = ">=3.10"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Adam Field", email = "adam@crox.io" },
|
|
10
|
+
]
|
|
11
|
+
keywords = ["ai", "agents", "reasoning", "llm", "decision-making", "crm", "dodar"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 3 - Alpha",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
22
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
23
|
+
]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"anthropic>=0.30",
|
|
26
|
+
"openai>=1.30",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
google = ["google-genai>=1.0"]
|
|
31
|
+
ollama = ["httpx>=0.27"]
|
|
32
|
+
all = ["google-genai>=1.0", "httpx>=0.27"]
|
|
33
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.24"]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://dodar.crox.io"
|
|
37
|
+
Documentation = "https://dodar.crox.io/framework"
|
|
38
|
+
Repository = "https://github.com/afieldofdreams/dodar"
|
|
39
|
+
Research = "https://dodar.crox.io/research"
|
|
40
|
+
|
|
41
|
+
[build-system]
|
|
42
|
+
requires = ["hatchling"]
|
|
43
|
+
build-backend = "hatchling.build"
|
|
44
|
+
|
|
45
|
+
[tool.hatch.build.targets.wheel]
|
|
46
|
+
packages = ["src/dodar"]
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""DODAR — Structured reasoning framework for AI agents.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
from dodar import DODAR
|
|
5
|
+
|
|
6
|
+
dodar = DODAR(model="gpt-4.1-mini")
|
|
7
|
+
result = dodar.analyze("Your scenario here...")
|
|
8
|
+
|
|
9
|
+
print(result.diagnosis.hypotheses)
|
|
10
|
+
print(result.decision.recommendation)
|
|
11
|
+
print(result.review.failure_modes)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from dodar.core import (
|
|
15
|
+
DODAR,
|
|
16
|
+
DODARResult,
|
|
17
|
+
DiagnosisResult,
|
|
18
|
+
OptionsResult,
|
|
19
|
+
DecisionResult,
|
|
20
|
+
ActionResult,
|
|
21
|
+
ReviewResult,
|
|
22
|
+
)
|
|
23
|
+
from dodar.runners import available_models
|
|
24
|
+
|
|
25
|
+
__version__ = "0.1.0"
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"DODAR",
|
|
29
|
+
"DODARResult",
|
|
30
|
+
"DiagnosisResult",
|
|
31
|
+
"OptionsResult",
|
|
32
|
+
"DecisionResult",
|
|
33
|
+
"ActionResult",
|
|
34
|
+
"ReviewResult",
|
|
35
|
+
"available_models",
|
|
36
|
+
"__version__",
|
|
37
|
+
]
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
"""DODAR core — the main DODAR class and result types."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import re
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Literal
|
|
9
|
+
|
|
10
|
+
from dodar.prompts import (
|
|
11
|
+
DODAR_SINGLE, ZERO_SHOT, COT,
|
|
12
|
+
PIPELINE_DIAGNOSE, PIPELINE_OPTIONS, PIPELINE_DECIDE,
|
|
13
|
+
PIPELINE_ACTION, PIPELINE_REVIEW,
|
|
14
|
+
)
|
|
15
|
+
from dodar.runners import run_model, available_models
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# --------------------------------------------------------------------------- #
|
|
19
|
+
# Result dataclasses
|
|
20
|
+
# --------------------------------------------------------------------------- #
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class DiagnosisResult:
|
|
24
|
+
raw_text: str = ""
|
|
25
|
+
hypotheses: list[str] = field(default_factory=list)
|
|
26
|
+
assumptions: list[str] = field(default_factory=list)
|
|
27
|
+
unknowns: list[str] = field(default_factory=list)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class OptionsResult:
|
|
32
|
+
raw_text: str = ""
|
|
33
|
+
alternatives: list[str] = field(default_factory=list)
|
|
34
|
+
core_tension: str = ""
|
|
35
|
+
trade_offs: list[str] = field(default_factory=list)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class DecisionResult:
|
|
40
|
+
raw_text: str = ""
|
|
41
|
+
recommendation: str = ""
|
|
42
|
+
confidence: str = ""
|
|
43
|
+
falsifiability: str = ""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class ActionResult:
|
|
48
|
+
raw_text: str = ""
|
|
49
|
+
steps: list[str] = field(default_factory=list)
|
|
50
|
+
reversible_steps: list[str] = field(default_factory=list)
|
|
51
|
+
irreversible_steps: list[str] = field(default_factory=list)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class ReviewResult:
|
|
56
|
+
raw_text: str = ""
|
|
57
|
+
failure_modes: list[str] = field(default_factory=list)
|
|
58
|
+
assumptions_to_validate: list[str] = field(default_factory=list)
|
|
59
|
+
abort_conditions: list[str] = field(default_factory=list)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class DODARResult:
|
|
64
|
+
"""Complete DODAR analysis result with structured phase access."""
|
|
65
|
+
text: str = ""
|
|
66
|
+
diagnosis: DiagnosisResult = field(default_factory=DiagnosisResult)
|
|
67
|
+
options: OptionsResult = field(default_factory=OptionsResult)
|
|
68
|
+
decision: DecisionResult = field(default_factory=DecisionResult)
|
|
69
|
+
action: ActionResult = field(default_factory=ActionResult)
|
|
70
|
+
review: ReviewResult = field(default_factory=ReviewResult)
|
|
71
|
+
input_tokens: int = 0
|
|
72
|
+
output_tokens: int = 0
|
|
73
|
+
latency_seconds: float = 0.0
|
|
74
|
+
model: str = ""
|
|
75
|
+
mode: str = "dodar"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# --------------------------------------------------------------------------- #
|
|
79
|
+
# Parsing
|
|
80
|
+
# --------------------------------------------------------------------------- #
|
|
81
|
+
|
|
82
|
+
def _extract_list_items(text: str) -> list[str]:
|
|
83
|
+
items = []
|
|
84
|
+
for line in text.split("\n"):
|
|
85
|
+
line = line.strip()
|
|
86
|
+
m = re.match(r"^(?:\d+[\.\)]\s*|\-\s+|\*\s+|•\s+)(.+)", line)
|
|
87
|
+
if m:
|
|
88
|
+
items.append(m.group(1).strip())
|
|
89
|
+
return items
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _split_phases(text: str) -> dict[str, str]:
|
|
93
|
+
phases: dict[str, str] = {}
|
|
94
|
+
phase_names = ["DIAGNOSE", "OPTIONS", "DECIDE", "ACTION", "REVIEW"]
|
|
95
|
+
pattern = r"##?\s*(?:Phase\s*\d+\s*[:\-]\s*)?(" + "|".join(phase_names) + r")\b"
|
|
96
|
+
matches = list(re.finditer(pattern, text, re.IGNORECASE))
|
|
97
|
+
for i, match in enumerate(matches):
|
|
98
|
+
name = match.group(1).upper()
|
|
99
|
+
start = match.end()
|
|
100
|
+
end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
|
|
101
|
+
phases[name] = text[start:end].strip()
|
|
102
|
+
return phases
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _parse_phase(text: str, phase: str) -> DiagnosisResult | OptionsResult | DecisionResult | ActionResult | ReviewResult:
|
|
106
|
+
items = _extract_list_items(text)
|
|
107
|
+
lower = text.lower()
|
|
108
|
+
|
|
109
|
+
if phase == "DIAGNOSE":
|
|
110
|
+
r = DiagnosisResult(raw_text=text, hypotheses=items[:10])
|
|
111
|
+
for line in text.split("\n"):
|
|
112
|
+
if any(kw in line.lower() for kw in ["assumption", "assuming"]):
|
|
113
|
+
r.assumptions.append(line.strip().lstrip("-*• "))
|
|
114
|
+
if any(kw in line.lower() for kw in ["unknown", "missing"]):
|
|
115
|
+
r.unknowns.append(line.strip().lstrip("-*• "))
|
|
116
|
+
return r
|
|
117
|
+
|
|
118
|
+
if phase == "OPTIONS":
|
|
119
|
+
r = OptionsResult(raw_text=text, alternatives=items)
|
|
120
|
+
for line in text.split("\n"):
|
|
121
|
+
if any(kw in line.lower() for kw in ["core tension", "fundamental trade-off"]):
|
|
122
|
+
r.core_tension = line.strip().lstrip("-*• :").strip()
|
|
123
|
+
break
|
|
124
|
+
return r
|
|
125
|
+
|
|
126
|
+
if phase == "DECIDE":
|
|
127
|
+
r = DecisionResult(raw_text=text)
|
|
128
|
+
paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
|
|
129
|
+
if paragraphs:
|
|
130
|
+
r.recommendation = paragraphs[0]
|
|
131
|
+
for line in text.split("\n"):
|
|
132
|
+
if "confidence" in line.lower():
|
|
133
|
+
r.confidence = line.strip().lstrip("-*• :").strip()
|
|
134
|
+
if any(kw in line.lower() for kw in ["change my mind", "falsif"]):
|
|
135
|
+
r.falsifiability = line.strip().lstrip("-*• :").strip()
|
|
136
|
+
return r
|
|
137
|
+
|
|
138
|
+
if phase == "ACTION":
|
|
139
|
+
r = ActionResult(raw_text=text, steps=items)
|
|
140
|
+
for step in items:
|
|
141
|
+
sl = step.lower()
|
|
142
|
+
if any(kw in sl for kw in ["reversible", "can undo"]):
|
|
143
|
+
r.reversible_steps.append(step)
|
|
144
|
+
if any(kw in sl for kw in ["irreversible", "cannot undo", "permanent"]):
|
|
145
|
+
r.irreversible_steps.append(step)
|
|
146
|
+
return r
|
|
147
|
+
|
|
148
|
+
# REVIEW
|
|
149
|
+
r = ReviewResult(raw_text=text, failure_modes=items)
|
|
150
|
+
for line in text.split("\n"):
|
|
151
|
+
ll = line.lower()
|
|
152
|
+
if any(kw in ll for kw in ["assumption", "validate"]):
|
|
153
|
+
r.assumptions_to_validate.append(line.strip().lstrip("-*• "))
|
|
154
|
+
if any(kw in ll for kw in ["abandon", "abort", "pivot"]):
|
|
155
|
+
r.abort_conditions.append(line.strip().lstrip("-*• "))
|
|
156
|
+
return r
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _parse_response(text: str) -> DODARResult:
|
|
160
|
+
result = DODARResult(text=text)
|
|
161
|
+
phases = _split_phases(text)
|
|
162
|
+
if "DIAGNOSE" in phases:
|
|
163
|
+
result.diagnosis = _parse_phase(phases["DIAGNOSE"], "DIAGNOSE") # type: ignore
|
|
164
|
+
if "OPTIONS" in phases:
|
|
165
|
+
result.options = _parse_phase(phases["OPTIONS"], "OPTIONS") # type: ignore
|
|
166
|
+
if "DECIDE" in phases:
|
|
167
|
+
result.decision = _parse_phase(phases["DECIDE"], "DECIDE") # type: ignore
|
|
168
|
+
if "ACTION" in phases:
|
|
169
|
+
result.action = _parse_phase(phases["ACTION"], "ACTION") # type: ignore
|
|
170
|
+
if "REVIEW" in phases:
|
|
171
|
+
result.review = _parse_phase(phases["REVIEW"], "REVIEW") # type: ignore
|
|
172
|
+
return result
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# --------------------------------------------------------------------------- #
|
|
176
|
+
# Main class
|
|
177
|
+
# --------------------------------------------------------------------------- #
|
|
178
|
+
|
|
179
|
+
Mode = Literal["dodar", "pipeline", "zero_shot", "cot"]
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class DODAR:
|
|
183
|
+
"""DODAR structured reasoning framework for AI agents.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
model: Model ID (e.g., "gpt-4.1-mini", "claude-sonnet-4-5").
|
|
187
|
+
mode: Default mode — "dodar" (single prompt), "pipeline" (5 calls),
|
|
188
|
+
"zero_shot", or "cot".
|
|
189
|
+
max_tokens: Maximum tokens per model call.
|
|
190
|
+
|
|
191
|
+
Example::
|
|
192
|
+
|
|
193
|
+
dodar = DODAR(model="gpt-4.1-mini")
|
|
194
|
+
result = dodar.analyze("Your scenario...")
|
|
195
|
+
print(result.diagnosis.hypotheses)
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
def __init__(
|
|
199
|
+
self,
|
|
200
|
+
model: str = "gpt-4.1-mini",
|
|
201
|
+
mode: Mode = "dodar",
|
|
202
|
+
max_tokens: int = 4096,
|
|
203
|
+
) -> None:
|
|
204
|
+
self._model = model
|
|
205
|
+
self._default_mode = mode
|
|
206
|
+
self._max_tokens = max_tokens
|
|
207
|
+
|
|
208
|
+
@property
|
|
209
|
+
def model(self) -> str:
|
|
210
|
+
return self._model
|
|
211
|
+
|
|
212
|
+
def analyze(self, scenario: str, mode: Mode | None = None) -> DODARResult:
|
|
213
|
+
"""Analyze a scenario synchronously.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
scenario: The scenario text to analyze.
|
|
217
|
+
mode: Override the default mode for this call.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
DODARResult with structured phase access.
|
|
221
|
+
"""
|
|
222
|
+
try:
|
|
223
|
+
loop = asyncio.get_running_loop()
|
|
224
|
+
except RuntimeError:
|
|
225
|
+
loop = None
|
|
226
|
+
|
|
227
|
+
if loop and loop.is_running():
|
|
228
|
+
import concurrent.futures
|
|
229
|
+
with concurrent.futures.ThreadPoolExecutor() as pool:
|
|
230
|
+
return pool.submit(
|
|
231
|
+
asyncio.run, self.analyze_async(scenario, mode)
|
|
232
|
+
).result()
|
|
233
|
+
return asyncio.run(self.analyze_async(scenario, mode))
|
|
234
|
+
|
|
235
|
+
async def analyze_async(self, scenario: str, mode: Mode | None = None) -> DODARResult:
|
|
236
|
+
"""Analyze a scenario asynchronously."""
|
|
237
|
+
m = mode or self._default_mode
|
|
238
|
+
|
|
239
|
+
if m == "pipeline":
|
|
240
|
+
return await self._run_pipeline(scenario)
|
|
241
|
+
|
|
242
|
+
prompt = self._build_prompt(scenario, m)
|
|
243
|
+
response = await run_model(self._model, prompt, max_tokens=self._max_tokens)
|
|
244
|
+
|
|
245
|
+
if m == "dodar":
|
|
246
|
+
result = _parse_response(response.text)
|
|
247
|
+
else:
|
|
248
|
+
result = DODARResult(text=response.text)
|
|
249
|
+
|
|
250
|
+
result.input_tokens = response.input_tokens
|
|
251
|
+
result.output_tokens = response.output_tokens
|
|
252
|
+
result.latency_seconds = response.latency_seconds
|
|
253
|
+
result.model = self._model
|
|
254
|
+
result.mode = m
|
|
255
|
+
return result
|
|
256
|
+
|
|
257
|
+
async def _run_pipeline(self, scenario: str) -> DODARResult:
|
|
258
|
+
"""Run the 5-phase DODAR pipeline."""
|
|
259
|
+
total_input = 0
|
|
260
|
+
total_output = 0
|
|
261
|
+
t0 = __import__("time").monotonic()
|
|
262
|
+
context_parts: list[str] = []
|
|
263
|
+
|
|
264
|
+
# Phase 1: Diagnose
|
|
265
|
+
r1 = await run_model(
|
|
266
|
+
self._model,
|
|
267
|
+
PIPELINE_DIAGNOSE.format(scenario=scenario),
|
|
268
|
+
max_tokens=self._max_tokens,
|
|
269
|
+
)
|
|
270
|
+
total_input += r1.input_tokens
|
|
271
|
+
total_output += r1.output_tokens
|
|
272
|
+
context_parts.append(f"## DIAGNOSE\n{r1.text}")
|
|
273
|
+
|
|
274
|
+
# Phase 2: Options
|
|
275
|
+
r2 = await run_model(
|
|
276
|
+
self._model,
|
|
277
|
+
PIPELINE_OPTIONS.format(scenario=scenario, prior_context="\n\n".join(context_parts)),
|
|
278
|
+
max_tokens=self._max_tokens,
|
|
279
|
+
)
|
|
280
|
+
total_input += r2.input_tokens
|
|
281
|
+
total_output += r2.output_tokens
|
|
282
|
+
context_parts.append(f"## OPTIONS\n{r2.text}")
|
|
283
|
+
|
|
284
|
+
# Phase 3: Decide
|
|
285
|
+
r3 = await run_model(
|
|
286
|
+
self._model,
|
|
287
|
+
PIPELINE_DECIDE.format(scenario=scenario, prior_context="\n\n".join(context_parts)),
|
|
288
|
+
max_tokens=self._max_tokens,
|
|
289
|
+
)
|
|
290
|
+
total_input += r3.input_tokens
|
|
291
|
+
total_output += r3.output_tokens
|
|
292
|
+
context_parts.append(f"## DECIDE\n{r3.text}")
|
|
293
|
+
|
|
294
|
+
# Phase 4: Action
|
|
295
|
+
r4 = await run_model(
|
|
296
|
+
self._model,
|
|
297
|
+
PIPELINE_ACTION.format(scenario=scenario, prior_context="\n\n".join(context_parts)),
|
|
298
|
+
max_tokens=self._max_tokens,
|
|
299
|
+
)
|
|
300
|
+
total_input += r4.input_tokens
|
|
301
|
+
total_output += r4.output_tokens
|
|
302
|
+
context_parts.append(f"## ACTION\n{r4.text}")
|
|
303
|
+
|
|
304
|
+
# Phase 5: Review
|
|
305
|
+
r5 = await run_model(
|
|
306
|
+
self._model,
|
|
307
|
+
PIPELINE_REVIEW.format(scenario=scenario, prior_context="\n\n".join(context_parts)),
|
|
308
|
+
max_tokens=self._max_tokens,
|
|
309
|
+
)
|
|
310
|
+
total_input += r5.input_tokens
|
|
311
|
+
total_output += r5.output_tokens
|
|
312
|
+
context_parts.append(f"## REVIEW\n{r5.text}")
|
|
313
|
+
|
|
314
|
+
full_text = "\n\n".join(context_parts)
|
|
315
|
+
result = _parse_response(full_text)
|
|
316
|
+
result.input_tokens = total_input
|
|
317
|
+
result.output_tokens = total_output
|
|
318
|
+
result.latency_seconds = __import__("time").monotonic() - t0
|
|
319
|
+
result.model = self._model
|
|
320
|
+
result.mode = "pipeline"
|
|
321
|
+
return result
|
|
322
|
+
|
|
323
|
+
def _build_prompt(self, scenario: str, mode: Mode) -> str:
|
|
324
|
+
match mode:
|
|
325
|
+
case "dodar":
|
|
326
|
+
return DODAR_SINGLE.format(scenario=scenario)
|
|
327
|
+
case "zero_shot":
|
|
328
|
+
return ZERO_SHOT.format(scenario=scenario)
|
|
329
|
+
case "cot":
|
|
330
|
+
return COT.format(scenario=scenario)
|
|
331
|
+
case _:
|
|
332
|
+
raise ValueError(f"Unknown mode: {mode}")
|
|
333
|
+
|
|
334
|
+
def __repr__(self) -> str:
|
|
335
|
+
return f"DODAR(model={self._model!r}, mode={self._default_mode!r})"
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""Prompt templates for DODAR framework."""
|
|
2
|
+
|
|
3
|
+
ZERO_SHOT = """\
|
|
4
|
+
You are an expert analyst. Please analyze the following scenario and provide \
|
|
5
|
+
your best response.
|
|
6
|
+
|
|
7
|
+
{scenario}
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
COT = """\
|
|
11
|
+
You are an expert analyst. Please analyze the following scenario step by step. \
|
|
12
|
+
Think through the problem carefully, showing your reasoning at each stage \
|
|
13
|
+
before reaching your conclusions.
|
|
14
|
+
|
|
15
|
+
{scenario}
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
DODAR_SINGLE = """\
|
|
19
|
+
You are an expert analyst using the DODAR structured reasoning framework. \
|
|
20
|
+
Analyze the following scenario by working through ALL FIVE phases explicitly. \
|
|
21
|
+
Each phase is a cognitive gate — do not skip or combine phases.
|
|
22
|
+
|
|
23
|
+
## Phase 1: DIAGNOSE
|
|
24
|
+
- List at least 3 competing hypotheses for what is happening
|
|
25
|
+
- Challenge your first instinct — what would a contrarian view say?
|
|
26
|
+
- Surface any latent assumptions you are making
|
|
27
|
+
- Identify paradoxes or contradictions in the information
|
|
28
|
+
- Map the unknowns — what information would change your diagnosis?
|
|
29
|
+
- Consider polycontribution — could multiple causes be interacting?
|
|
30
|
+
|
|
31
|
+
## Phase 2: OPTIONS
|
|
32
|
+
- Generate at least 4 genuinely distinct options (not minor variations)
|
|
33
|
+
- Name the core tension — the fundamental trade-off this decision hinges on
|
|
34
|
+
- Identify different types of risk for each option (financial, reputational, technical, etc.)
|
|
35
|
+
- Test your assumptions — what must be true for each option to work?
|
|
36
|
+
- Quantify opportunity costs — what do you give up with each choice?
|
|
37
|
+
- Consider hidden stakeholders and constraints
|
|
38
|
+
|
|
39
|
+
## Phase 3: DECIDE
|
|
40
|
+
- Make a clear recommendation
|
|
41
|
+
- Justify your choice explicitly against each alternative you rejected
|
|
42
|
+
- Identify the binding constraints that shaped this decision
|
|
43
|
+
- Reframe the time horizon — is this optimised for the right timeframe?
|
|
44
|
+
- Quantify the opportunity cost of your chosen path
|
|
45
|
+
- State your confidence level and what would change your mind (falsifiability)
|
|
46
|
+
|
|
47
|
+
## Phase 4: ACTION
|
|
48
|
+
- Define specific, concrete implementation steps
|
|
49
|
+
- Identify dependencies between steps
|
|
50
|
+
- Specify timeline and resource requirements
|
|
51
|
+
- Identify blockers and prerequisites
|
|
52
|
+
- Mark which actions are reversible vs. irreversible
|
|
53
|
+
|
|
54
|
+
## Phase 5: REVIEW
|
|
55
|
+
- Identify at least 3 specific failure modes/risks, each with:
|
|
56
|
+
(a) trigger condition, (b) detection method, (c) contingency response
|
|
57
|
+
- List assumptions from earlier phases that need validation
|
|
58
|
+
- Define conditions under which you would abandon this plan entirely
|
|
59
|
+
|
|
60
|
+
SCENARIO:
|
|
61
|
+
{scenario}
|
|
62
|
+
|
|
63
|
+
Work through each phase systematically. Label each phase clearly.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
PIPELINE_DIAGNOSE = """\
|
|
67
|
+
You are a diagnostic reasoning specialist. Your role is to hold diagnosis OPEN \
|
|
68
|
+
and resist premature closure.
|
|
69
|
+
|
|
70
|
+
Analyze the following scenario. Do NOT recommend solutions yet — focus only on \
|
|
71
|
+
understanding what is happening.
|
|
72
|
+
|
|
73
|
+
1. List at least 3 competing hypotheses for what is happening
|
|
74
|
+
2. Challenge your first instinct — what would a contrarian view say?
|
|
75
|
+
3. Surface any latent assumptions
|
|
76
|
+
4. Identify paradoxes or contradictions
|
|
77
|
+
5. Map the unknowns — what information would change the diagnosis?
|
|
78
|
+
6. Consider polycontribution — could multiple causes interact?
|
|
79
|
+
|
|
80
|
+
SCENARIO:
|
|
81
|
+
{scenario}
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
PIPELINE_OPTIONS = """\
|
|
85
|
+
You are a strategic options analyst. Your role is to enumerate genuinely \
|
|
86
|
+
distinct alternatives with explicit trade-offs.
|
|
87
|
+
|
|
88
|
+
Given the scenario and the diagnosis below, generate options. Do NOT decide yet.
|
|
89
|
+
|
|
90
|
+
1. Generate at least 4 genuinely distinct options (not minor variations)
|
|
91
|
+
2. Name the core tension — the fundamental trade-off
|
|
92
|
+
3. Separate different types of risk for each option
|
|
93
|
+
4. Quantify opportunity costs for each path
|
|
94
|
+
5. Identify hidden stakeholders and constraints
|
|
95
|
+
|
|
96
|
+
SCENARIO:
|
|
97
|
+
{scenario}
|
|
98
|
+
|
|
99
|
+
DIAGNOSIS (from prior phase):
|
|
100
|
+
{prior_context}
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
PIPELINE_DECIDE = """\
|
|
104
|
+
You are a decision architect. Your role is to commit to a recommendation \
|
|
105
|
+
with transparent, falsifiable reasoning.
|
|
106
|
+
|
|
107
|
+
Given the scenario, diagnosis, and options below, make the call.
|
|
108
|
+
|
|
109
|
+
1. Make a clear recommendation
|
|
110
|
+
2. Justify against each rejected alternative specifically
|
|
111
|
+
3. Identify binding constraints that shaped this decision
|
|
112
|
+
4. State your confidence level
|
|
113
|
+
5. State what would change your mind (falsifiability)
|
|
114
|
+
6. Quantify the opportunity cost of your chosen path
|
|
115
|
+
|
|
116
|
+
SCENARIO:
|
|
117
|
+
{scenario}
|
|
118
|
+
|
|
119
|
+
PRIOR ANALYSIS:
|
|
120
|
+
{prior_context}
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
PIPELINE_ACTION = """\
|
|
124
|
+
You are an implementation planning specialist. Your role is to translate \
|
|
125
|
+
decisions into concrete, sequenced action plans.
|
|
126
|
+
|
|
127
|
+
Given the scenario and the decision below, create the action plan.
|
|
128
|
+
|
|
129
|
+
1. Define specific, concrete implementation steps
|
|
130
|
+
2. Identify dependencies between steps
|
|
131
|
+
3. Specify timeline and resource requirements
|
|
132
|
+
4. Identify blockers and prerequisites
|
|
133
|
+
5. Mark which actions are reversible vs. irreversible
|
|
134
|
+
|
|
135
|
+
SCENARIO:
|
|
136
|
+
{scenario}
|
|
137
|
+
|
|
138
|
+
PRIOR ANALYSIS:
|
|
139
|
+
{prior_context}
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
PIPELINE_REVIEW = """\
|
|
143
|
+
You are a critical review analyst. Your role is to identify failure modes \
|
|
144
|
+
and validate assumptions. Be adversarial — find the weaknesses.
|
|
145
|
+
|
|
146
|
+
Given the full analysis below, conduct the review.
|
|
147
|
+
|
|
148
|
+
1. Identify at least 3 specific failure modes, each with:
|
|
149
|
+
(a) trigger condition, (b) detection method, (c) contingency response
|
|
150
|
+
2. Audit assumptions from earlier phases — are they still valid?
|
|
151
|
+
3. Define conditions for abandoning this plan entirely
|
|
152
|
+
4. What would you monitor to detect early warning signs?
|
|
153
|
+
|
|
154
|
+
SCENARIO:
|
|
155
|
+
{scenario}
|
|
156
|
+
|
|
157
|
+
FULL ANALYSIS:
|
|
158
|
+
{prior_context}
|
|
159
|
+
"""
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Model runners for DODAR — thin wrappers around provider SDKs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Literal
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class RunnerResponse:
|
|
12
|
+
text: str
|
|
13
|
+
input_tokens: int = 0
|
|
14
|
+
output_tokens: int = 0
|
|
15
|
+
latency_seconds: float = 0.0
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _detect_provider(model: str) -> str:
|
|
19
|
+
if model.startswith("claude"):
|
|
20
|
+
return "anthropic"
|
|
21
|
+
elif model.startswith(("gpt-", "o1", "o3", "o4")):
|
|
22
|
+
return "openai"
|
|
23
|
+
elif model.startswith("gemini"):
|
|
24
|
+
return "google"
|
|
25
|
+
else:
|
|
26
|
+
return "ollama"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# ---- Anthropic ---- #
|
|
30
|
+
|
|
31
|
+
async def _run_anthropic(model: str, prompt: str, system: str | None = None, max_tokens: int = 4096) -> RunnerResponse:
|
|
32
|
+
import anthropic
|
|
33
|
+
client = anthropic.AsyncAnthropic()
|
|
34
|
+
messages = [{"role": "user", "content": prompt}]
|
|
35
|
+
kwargs: dict = {"model": model, "messages": messages, "max_tokens": max_tokens}
|
|
36
|
+
if system:
|
|
37
|
+
kwargs["system"] = system
|
|
38
|
+
|
|
39
|
+
t0 = time.monotonic()
|
|
40
|
+
response = await client.messages.create(**kwargs)
|
|
41
|
+
latency = time.monotonic() - t0
|
|
42
|
+
|
|
43
|
+
return RunnerResponse(
|
|
44
|
+
text=response.content[0].text,
|
|
45
|
+
input_tokens=response.usage.input_tokens,
|
|
46
|
+
output_tokens=response.usage.output_tokens,
|
|
47
|
+
latency_seconds=latency,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ---- OpenAI ---- #
|
|
52
|
+
|
|
53
|
+
async def _run_openai(model: str, prompt: str, system: str | None = None, max_tokens: int = 4096) -> RunnerResponse:
|
|
54
|
+
import openai
|
|
55
|
+
client = openai.AsyncOpenAI()
|
|
56
|
+
messages = []
|
|
57
|
+
if system:
|
|
58
|
+
messages.append({"role": "system", "content": system})
|
|
59
|
+
messages.append({"role": "user", "content": prompt})
|
|
60
|
+
|
|
61
|
+
# Newer models use max_completion_tokens
|
|
62
|
+
use_new_param = model.startswith(("gpt-5", "o3", "o4"))
|
|
63
|
+
kwargs: dict = {"model": model, "messages": messages}
|
|
64
|
+
if use_new_param:
|
|
65
|
+
kwargs["max_completion_tokens"] = max_tokens
|
|
66
|
+
else:
|
|
67
|
+
kwargs["max_tokens"] = max_tokens
|
|
68
|
+
|
|
69
|
+
t0 = time.monotonic()
|
|
70
|
+
response = await client.chat.completions.create(**kwargs)
|
|
71
|
+
latency = time.monotonic() - t0
|
|
72
|
+
|
|
73
|
+
usage = response.usage
|
|
74
|
+
return RunnerResponse(
|
|
75
|
+
text=response.choices[0].message.content or "",
|
|
76
|
+
input_tokens=usage.prompt_tokens if usage else 0,
|
|
77
|
+
output_tokens=usage.completion_tokens if usage else 0,
|
|
78
|
+
latency_seconds=latency,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ---- Google ---- #
|
|
83
|
+
|
|
84
|
+
async def _run_google(model: str, prompt: str, system: str | None = None, max_tokens: int = 4096) -> RunnerResponse:
|
|
85
|
+
from google import genai
|
|
86
|
+
client = genai.Client()
|
|
87
|
+
full_prompt = f"{system}\n\n{prompt}" if system else prompt
|
|
88
|
+
|
|
89
|
+
t0 = time.monotonic()
|
|
90
|
+
response = await client.aio.models.generate_content(
|
|
91
|
+
model=model,
|
|
92
|
+
contents=full_prompt,
|
|
93
|
+
)
|
|
94
|
+
latency = time.monotonic() - t0
|
|
95
|
+
|
|
96
|
+
return RunnerResponse(
|
|
97
|
+
text=response.text or "",
|
|
98
|
+
latency_seconds=latency,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# ---- Ollama ---- #
|
|
103
|
+
|
|
104
|
+
async def _run_ollama(model: str, prompt: str, system: str | None = None, max_tokens: int = 4096) -> RunnerResponse:
|
|
105
|
+
import httpx
|
|
106
|
+
messages = []
|
|
107
|
+
if system:
|
|
108
|
+
messages.append({"role": "system", "content": system})
|
|
109
|
+
messages.append({"role": "user", "content": prompt})
|
|
110
|
+
|
|
111
|
+
t0 = time.monotonic()
|
|
112
|
+
async with httpx.AsyncClient(timeout=600) as client:
|
|
113
|
+
resp = await client.post(
|
|
114
|
+
"http://localhost:11434/api/chat",
|
|
115
|
+
json={"model": model, "messages": messages, "stream": False},
|
|
116
|
+
)
|
|
117
|
+
resp.raise_for_status()
|
|
118
|
+
data = resp.json()
|
|
119
|
+
latency = time.monotonic() - t0
|
|
120
|
+
|
|
121
|
+
return RunnerResponse(
|
|
122
|
+
text=data.get("message", {}).get("content", ""),
|
|
123
|
+
input_tokens=data.get("prompt_eval_count", 0),
|
|
124
|
+
output_tokens=data.get("eval_count", 0),
|
|
125
|
+
latency_seconds=latency,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ---- Registry ---- #
|
|
130
|
+
|
|
131
|
+
_MODELS = {
|
|
132
|
+
"claude-opus-4-6", "claude-sonnet-4-5", "claude-haiku-4-5",
|
|
133
|
+
"gpt-5.4", "gpt-4o", "gpt-4o-mini", "gpt-4.1-mini", "gpt-4.1-nano",
|
|
134
|
+
"gemini-2.0-flash",
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def available_models() -> list[str]:
|
|
139
|
+
"""List all known model IDs."""
|
|
140
|
+
return sorted(_MODELS)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
async def run_model(model: str, prompt: str, system: str | None = None, max_tokens: int = 4096) -> RunnerResponse:
|
|
144
|
+
"""Run a prompt against any supported model."""
|
|
145
|
+
provider = _detect_provider(model)
|
|
146
|
+
match provider:
|
|
147
|
+
case "anthropic":
|
|
148
|
+
return await _run_anthropic(model, prompt, system, max_tokens)
|
|
149
|
+
case "openai":
|
|
150
|
+
return await _run_openai(model, prompt, system, max_tokens)
|
|
151
|
+
case "google":
|
|
152
|
+
return await _run_google(model, prompt, system, max_tokens)
|
|
153
|
+
case "ollama":
|
|
154
|
+
return await _run_ollama(model, prompt, system, max_tokens)
|
|
155
|
+
case _:
|
|
156
|
+
raise ValueError(f"Unknown provider for model: {model}")
|