dodar 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dodar-0.1.0/.gitignore ADDED
@@ -0,0 +1,41 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+
10
+ # Virtual environments
11
+ .venv/
12
+ venv/
13
+ env/
14
+
15
+ # IDE
16
+ .idea/
17
+ .vscode/
18
+ *.swp
19
+ *.swo
20
+
21
+ # Environment
22
+ .env
23
+ .env.local
24
+
25
+ # Data (runs and scores are generated, not committed)
26
+ backend/data/runs/
27
+ backend/data/scores/
28
+
29
+ # Node
30
+ node_modules/
31
+ frontend/dist/
32
+ frontend/.vite/
33
+
34
+ # OS
35
+ .DS_Store
36
+ Thumbs.db
37
+
38
+ # Testing
39
+ .pytest_cache/
40
+ .coverage
41
+ htmlcov/
dodar-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,95 @@
1
+ Metadata-Version: 2.4
2
+ Name: dodar
3
+ Version: 0.1.0
4
+ Summary: DODAR — Structured reasoning framework for AI agents, adapted from aviation CRM
5
+ Project-URL: Homepage, https://dodar.crox.io
6
+ Project-URL: Documentation, https://dodar.crox.io/framework
7
+ Project-URL: Repository, https://github.com/afieldofdreams/dodar
8
+ Project-URL: Research, https://dodar.crox.io/research
9
+ Author-email: Adam Field <adam@crox.io>
10
+ License-Expression: MIT
11
+ Keywords: agents,ai,crm,decision-making,dodar,llm,reasoning
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: anthropic>=0.30
24
+ Requires-Dist: openai>=1.30
25
+ Provides-Extra: all
26
+ Requires-Dist: google-genai>=1.0; extra == 'all'
27
+ Requires-Dist: httpx>=0.27; extra == 'all'
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
30
+ Requires-Dist: pytest>=8.0; extra == 'dev'
31
+ Provides-Extra: google
32
+ Requires-Dist: google-genai>=1.0; extra == 'google'
33
+ Provides-Extra: ollama
34
+ Requires-Dist: httpx>=0.27; extra == 'ollama'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # DODAR
38
+
39
+ **Structured reasoning framework for AI agents** — adapted from aviation Crew Resource Management.
40
+
41
+ DODAR (Diagnose, Options, Decide, Action, Review) imposes explicit gates at each stage of analysis, preventing the reasoning failures that LLMs share with humans under pressure: premature anchoring, option narrowing, and treating decisions as final.
42
+
43
+ ## Install
44
+
45
+ ```bash
46
+ pip install dodar
47
+ ```
48
+
49
+ ## Quick start
50
+
51
+ ```python
52
+ from dodar import DODAR
53
+
54
+ dodar = DODAR(model="gpt-4.1-mini")
55
+ result = dodar.analyze("Your scenario here...")
56
+
57
+ # Structured access to each reasoning phase
58
+ result.diagnosis.hypotheses # Ranked competing causes
59
+ result.options.alternatives # Distinct paths with trade-offs
60
+ result.decision.recommendation # The call + justification
61
+ result.action.steps # Sequenced implementation plan
62
+ result.review.failure_modes # Self-critique
63
+ ```
64
+
65
+ ## Pipeline mode
66
+
67
+ For maximum quality, use the pipeline where each DODAR phase runs as a separate model call:
68
+
69
+ ```python
70
+ from dodar import DODAR
71
+
72
+ dodar = DODAR(model="gpt-4.1-mini", mode="pipeline")
73
+ result = dodar.analyze("Your scenario here...")
74
+ ```
75
+
76
+ Research shows GPT-4.1 Mini + pipeline scores 104% of Claude Opus 4.6 zero-shot quality at 89% lower cost. [Read the whitepaper](https://dodar.crox.io/research).
77
+
78
+ ## Supported models
79
+
80
+ | Provider | Models |
81
+ |----------|--------|
82
+ | Anthropic | claude-opus-4-6, claude-sonnet-4-5, claude-haiku-4-5 |
83
+ | OpenAI | gpt-5.4, gpt-4o, gpt-4o-mini, gpt-4.1-mini, gpt-4.1-nano |
84
+ | Google | gemini-2.0-flash (install with `pip install dodar[google]`) |
85
+ | Ollama | Any local model (install with `pip install dodar[ollama]`) |
86
+
87
+ ## Links
88
+
89
+ - [Documentation](https://dodar.crox.io/framework)
90
+ - [Research & whitepaper](https://dodar.crox.io/research)
91
+ - [GitHub](https://github.com/afieldofdreams/dodar)
92
+
93
+ ## License
94
+
95
+ MIT
dodar-0.1.0/README.md ADDED
@@ -0,0 +1,59 @@
1
+ # DODAR
2
+
3
+ **Structured reasoning framework for AI agents** — adapted from aviation Crew Resource Management.
4
+
5
+ DODAR (Diagnose, Options, Decide, Action, Review) imposes explicit gates at each stage of analysis, preventing the reasoning failures that LLMs share with humans under pressure: premature anchoring, option narrowing, and treating decisions as final.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install dodar
11
+ ```
12
+
13
+ ## Quick start
14
+
15
+ ```python
16
+ from dodar import DODAR
17
+
18
+ dodar = DODAR(model="gpt-4.1-mini")
19
+ result = dodar.analyze("Your scenario here...")
20
+
21
+ # Structured access to each reasoning phase
22
+ result.diagnosis.hypotheses # Ranked competing causes
23
+ result.options.alternatives # Distinct paths with trade-offs
24
+ result.decision.recommendation # The call + justification
25
+ result.action.steps # Sequenced implementation plan
26
+ result.review.failure_modes # Self-critique
27
+ ```
28
+
29
+ ## Pipeline mode
30
+
31
+ For maximum quality, use the pipeline where each DODAR phase runs as a separate model call:
32
+
33
+ ```python
34
+ from dodar import DODAR
35
+
36
+ dodar = DODAR(model="gpt-4.1-mini", mode="pipeline")
37
+ result = dodar.analyze("Your scenario here...")
38
+ ```
39
+
40
+ Research shows GPT-4.1 Mini + pipeline scores 104% of Claude Opus 4.6 zero-shot quality at 89% lower cost. [Read the whitepaper](https://dodar.crox.io/research).
41
+
42
+ ## Supported models
43
+
44
+ | Provider | Models |
45
+ |----------|--------|
46
+ | Anthropic | claude-opus-4-6, claude-sonnet-4-5, claude-haiku-4-5 |
47
+ | OpenAI | gpt-5.4, gpt-4o, gpt-4o-mini, gpt-4.1-mini, gpt-4.1-nano |
48
+ | Google | gemini-2.0-flash (install with `pip install dodar[google]`) |
49
+ | Ollama | Any local model (install with `pip install dodar[ollama]`) |
50
+
51
+ ## Links
52
+
53
+ - [Documentation](https://dodar.crox.io/framework)
54
+ - [Research & whitepaper](https://dodar.crox.io/research)
55
+ - [GitHub](https://github.com/afieldofdreams/dodar)
56
+
57
+ ## License
58
+
59
+ MIT
@@ -0,0 +1,46 @@
1
+ [project]
2
+ name = "dodar"
3
+ version = "0.1.0"
4
+ description = "DODAR — Structured reasoning framework for AI agents, adapted from aviation CRM"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.10"
8
+ authors = [
9
+ { name = "Adam Field", email = "adam@crox.io" },
10
+ ]
11
+ keywords = ["ai", "agents", "reasoning", "llm", "decision-making", "crm", "dodar"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Intended Audience :: Developers",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Programming Language :: Python :: 3.13",
21
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
22
+ "Topic :: Software Development :: Libraries :: Python Modules",
23
+ ]
24
+ dependencies = [
25
+ "anthropic>=0.30",
26
+ "openai>=1.30",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ google = ["google-genai>=1.0"]
31
+ ollama = ["httpx>=0.27"]
32
+ all = ["google-genai>=1.0", "httpx>=0.27"]
33
+ dev = ["pytest>=8.0", "pytest-asyncio>=0.24"]
34
+
35
+ [project.urls]
36
+ Homepage = "https://dodar.crox.io"
37
+ Documentation = "https://dodar.crox.io/framework"
38
+ Repository = "https://github.com/afieldofdreams/dodar"
39
+ Research = "https://dodar.crox.io/research"
40
+
41
+ [build-system]
42
+ requires = ["hatchling"]
43
+ build-backend = "hatchling.build"
44
+
45
+ [tool.hatch.build.targets.wheel]
46
+ packages = ["src/dodar"]
@@ -0,0 +1,37 @@
1
+ """DODAR — Structured reasoning framework for AI agents.
2
+
3
+ Usage:
4
+ from dodar import DODAR
5
+
6
+ dodar = DODAR(model="gpt-4.1-mini")
7
+ result = dodar.analyze("Your scenario here...")
8
+
9
+ print(result.diagnosis.hypotheses)
10
+ print(result.decision.recommendation)
11
+ print(result.review.failure_modes)
12
+ """
13
+
14
+ from dodar.core import (
15
+ DODAR,
16
+ DODARResult,
17
+ DiagnosisResult,
18
+ OptionsResult,
19
+ DecisionResult,
20
+ ActionResult,
21
+ ReviewResult,
22
+ )
23
+ from dodar.runners import available_models
24
+
25
+ __version__ = "0.1.0"
26
+
27
+ __all__ = [
28
+ "DODAR",
29
+ "DODARResult",
30
+ "DiagnosisResult",
31
+ "OptionsResult",
32
+ "DecisionResult",
33
+ "ActionResult",
34
+ "ReviewResult",
35
+ "available_models",
36
+ "__version__",
37
+ ]
@@ -0,0 +1,335 @@
1
+ """DODAR core — the main DODAR class and result types."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import re
7
+ from dataclasses import dataclass, field
8
+ from typing import Literal
9
+
10
+ from dodar.prompts import (
11
+ DODAR_SINGLE, ZERO_SHOT, COT,
12
+ PIPELINE_DIAGNOSE, PIPELINE_OPTIONS, PIPELINE_DECIDE,
13
+ PIPELINE_ACTION, PIPELINE_REVIEW,
14
+ )
15
+ from dodar.runners import run_model, available_models
16
+
17
+
18
+ # --------------------------------------------------------------------------- #
19
+ # Result dataclasses
20
+ # --------------------------------------------------------------------------- #
21
+
22
+ @dataclass
23
+ class DiagnosisResult:
24
+ raw_text: str = ""
25
+ hypotheses: list[str] = field(default_factory=list)
26
+ assumptions: list[str] = field(default_factory=list)
27
+ unknowns: list[str] = field(default_factory=list)
28
+
29
+
30
+ @dataclass
31
+ class OptionsResult:
32
+ raw_text: str = ""
33
+ alternatives: list[str] = field(default_factory=list)
34
+ core_tension: str = ""
35
+ trade_offs: list[str] = field(default_factory=list)
36
+
37
+
38
+ @dataclass
39
+ class DecisionResult:
40
+ raw_text: str = ""
41
+ recommendation: str = ""
42
+ confidence: str = ""
43
+ falsifiability: str = ""
44
+
45
+
46
+ @dataclass
47
+ class ActionResult:
48
+ raw_text: str = ""
49
+ steps: list[str] = field(default_factory=list)
50
+ reversible_steps: list[str] = field(default_factory=list)
51
+ irreversible_steps: list[str] = field(default_factory=list)
52
+
53
+
54
+ @dataclass
55
+ class ReviewResult:
56
+ raw_text: str = ""
57
+ failure_modes: list[str] = field(default_factory=list)
58
+ assumptions_to_validate: list[str] = field(default_factory=list)
59
+ abort_conditions: list[str] = field(default_factory=list)
60
+
61
+
62
+ @dataclass
63
+ class DODARResult:
64
+ """Complete DODAR analysis result with structured phase access."""
65
+ text: str = ""
66
+ diagnosis: DiagnosisResult = field(default_factory=DiagnosisResult)
67
+ options: OptionsResult = field(default_factory=OptionsResult)
68
+ decision: DecisionResult = field(default_factory=DecisionResult)
69
+ action: ActionResult = field(default_factory=ActionResult)
70
+ review: ReviewResult = field(default_factory=ReviewResult)
71
+ input_tokens: int = 0
72
+ output_tokens: int = 0
73
+ latency_seconds: float = 0.0
74
+ model: str = ""
75
+ mode: str = "dodar"
76
+
77
+
78
+ # --------------------------------------------------------------------------- #
79
+ # Parsing
80
+ # --------------------------------------------------------------------------- #
81
+
82
+ def _extract_list_items(text: str) -> list[str]:
83
+ items = []
84
+ for line in text.split("\n"):
85
+ line = line.strip()
86
+ m = re.match(r"^(?:\d+[\.\)]\s*|\-\s+|\*\s+|•\s+)(.+)", line)
87
+ if m:
88
+ items.append(m.group(1).strip())
89
+ return items
90
+
91
+
92
+ def _split_phases(text: str) -> dict[str, str]:
93
+ phases: dict[str, str] = {}
94
+ phase_names = ["DIAGNOSE", "OPTIONS", "DECIDE", "ACTION", "REVIEW"]
95
+ pattern = r"##?\s*(?:Phase\s*\d+\s*[:\-]\s*)?(" + "|".join(phase_names) + r")\b"
96
+ matches = list(re.finditer(pattern, text, re.IGNORECASE))
97
+ for i, match in enumerate(matches):
98
+ name = match.group(1).upper()
99
+ start = match.end()
100
+ end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
101
+ phases[name] = text[start:end].strip()
102
+ return phases
103
+
104
+
105
+ def _parse_phase(text: str, phase: str) -> DiagnosisResult | OptionsResult | DecisionResult | ActionResult | ReviewResult:
106
+ items = _extract_list_items(text)
107
+ lower = text.lower()
108
+
109
+ if phase == "DIAGNOSE":
110
+ r = DiagnosisResult(raw_text=text, hypotheses=items[:10])
111
+ for line in text.split("\n"):
112
+ if any(kw in line.lower() for kw in ["assumption", "assuming"]):
113
+ r.assumptions.append(line.strip().lstrip("-*• "))
114
+ if any(kw in line.lower() for kw in ["unknown", "missing"]):
115
+ r.unknowns.append(line.strip().lstrip("-*• "))
116
+ return r
117
+
118
+ if phase == "OPTIONS":
119
+ r = OptionsResult(raw_text=text, alternatives=items)
120
+ for line in text.split("\n"):
121
+ if any(kw in line.lower() for kw in ["core tension", "fundamental trade-off"]):
122
+ r.core_tension = line.strip().lstrip("-*• :").strip()
123
+ break
124
+ return r
125
+
126
+ if phase == "DECIDE":
127
+ r = DecisionResult(raw_text=text)
128
+ paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
129
+ if paragraphs:
130
+ r.recommendation = paragraphs[0]
131
+ for line in text.split("\n"):
132
+ if "confidence" in line.lower():
133
+ r.confidence = line.strip().lstrip("-*• :").strip()
134
+ if any(kw in line.lower() for kw in ["change my mind", "falsif"]):
135
+ r.falsifiability = line.strip().lstrip("-*• :").strip()
136
+ return r
137
+
138
+ if phase == "ACTION":
139
+ r = ActionResult(raw_text=text, steps=items)
140
+ for step in items:
141
+ sl = step.lower()
142
+ if any(kw in sl for kw in ["reversible", "can undo"]):
143
+ r.reversible_steps.append(step)
144
+ if any(kw in sl for kw in ["irreversible", "cannot undo", "permanent"]):
145
+ r.irreversible_steps.append(step)
146
+ return r
147
+
148
+ # REVIEW
149
+ r = ReviewResult(raw_text=text, failure_modes=items)
150
+ for line in text.split("\n"):
151
+ ll = line.lower()
152
+ if any(kw in ll for kw in ["assumption", "validate"]):
153
+ r.assumptions_to_validate.append(line.strip().lstrip("-*• "))
154
+ if any(kw in ll for kw in ["abandon", "abort", "pivot"]):
155
+ r.abort_conditions.append(line.strip().lstrip("-*• "))
156
+ return r
157
+
158
+
159
+ def _parse_response(text: str) -> DODARResult:
160
+ result = DODARResult(text=text)
161
+ phases = _split_phases(text)
162
+ if "DIAGNOSE" in phases:
163
+ result.diagnosis = _parse_phase(phases["DIAGNOSE"], "DIAGNOSE") # type: ignore
164
+ if "OPTIONS" in phases:
165
+ result.options = _parse_phase(phases["OPTIONS"], "OPTIONS") # type: ignore
166
+ if "DECIDE" in phases:
167
+ result.decision = _parse_phase(phases["DECIDE"], "DECIDE") # type: ignore
168
+ if "ACTION" in phases:
169
+ result.action = _parse_phase(phases["ACTION"], "ACTION") # type: ignore
170
+ if "REVIEW" in phases:
171
+ result.review = _parse_phase(phases["REVIEW"], "REVIEW") # type: ignore
172
+ return result
173
+
174
+
175
+ # --------------------------------------------------------------------------- #
176
+ # Main class
177
+ # --------------------------------------------------------------------------- #
178
+
179
+ Mode = Literal["dodar", "pipeline", "zero_shot", "cot"]
180
+
181
+
182
+ class DODAR:
183
+ """DODAR structured reasoning framework for AI agents.
184
+
185
+ Args:
186
+ model: Model ID (e.g., "gpt-4.1-mini", "claude-sonnet-4-5").
187
+ mode: Default mode — "dodar" (single prompt), "pipeline" (5 calls),
188
+ "zero_shot", or "cot".
189
+ max_tokens: Maximum tokens per model call.
190
+
191
+ Example::
192
+
193
+ dodar = DODAR(model="gpt-4.1-mini")
194
+ result = dodar.analyze("Your scenario...")
195
+ print(result.diagnosis.hypotheses)
196
+ """
197
+
198
+ def __init__(
199
+ self,
200
+ model: str = "gpt-4.1-mini",
201
+ mode: Mode = "dodar",
202
+ max_tokens: int = 4096,
203
+ ) -> None:
204
+ self._model = model
205
+ self._default_mode = mode
206
+ self._max_tokens = max_tokens
207
+
208
+ @property
209
+ def model(self) -> str:
210
+ return self._model
211
+
212
+ def analyze(self, scenario: str, mode: Mode | None = None) -> DODARResult:
213
+ """Analyze a scenario synchronously.
214
+
215
+ Args:
216
+ scenario: The scenario text to analyze.
217
+ mode: Override the default mode for this call.
218
+
219
+ Returns:
220
+ DODARResult with structured phase access.
221
+ """
222
+ try:
223
+ loop = asyncio.get_running_loop()
224
+ except RuntimeError:
225
+ loop = None
226
+
227
+ if loop and loop.is_running():
228
+ import concurrent.futures
229
+ with concurrent.futures.ThreadPoolExecutor() as pool:
230
+ return pool.submit(
231
+ asyncio.run, self.analyze_async(scenario, mode)
232
+ ).result()
233
+ return asyncio.run(self.analyze_async(scenario, mode))
234
+
235
+ async def analyze_async(self, scenario: str, mode: Mode | None = None) -> DODARResult:
236
+ """Analyze a scenario asynchronously."""
237
+ m = mode or self._default_mode
238
+
239
+ if m == "pipeline":
240
+ return await self._run_pipeline(scenario)
241
+
242
+ prompt = self._build_prompt(scenario, m)
243
+ response = await run_model(self._model, prompt, max_tokens=self._max_tokens)
244
+
245
+ if m == "dodar":
246
+ result = _parse_response(response.text)
247
+ else:
248
+ result = DODARResult(text=response.text)
249
+
250
+ result.input_tokens = response.input_tokens
251
+ result.output_tokens = response.output_tokens
252
+ result.latency_seconds = response.latency_seconds
253
+ result.model = self._model
254
+ result.mode = m
255
+ return result
256
+
257
+ async def _run_pipeline(self, scenario: str) -> DODARResult:
258
+ """Run the 5-phase DODAR pipeline."""
259
+ total_input = 0
260
+ total_output = 0
261
+ t0 = __import__("time").monotonic()
262
+ context_parts: list[str] = []
263
+
264
+ # Phase 1: Diagnose
265
+ r1 = await run_model(
266
+ self._model,
267
+ PIPELINE_DIAGNOSE.format(scenario=scenario),
268
+ max_tokens=self._max_tokens,
269
+ )
270
+ total_input += r1.input_tokens
271
+ total_output += r1.output_tokens
272
+ context_parts.append(f"## DIAGNOSE\n{r1.text}")
273
+
274
+ # Phase 2: Options
275
+ r2 = await run_model(
276
+ self._model,
277
+ PIPELINE_OPTIONS.format(scenario=scenario, prior_context="\n\n".join(context_parts)),
278
+ max_tokens=self._max_tokens,
279
+ )
280
+ total_input += r2.input_tokens
281
+ total_output += r2.output_tokens
282
+ context_parts.append(f"## OPTIONS\n{r2.text}")
283
+
284
+ # Phase 3: Decide
285
+ r3 = await run_model(
286
+ self._model,
287
+ PIPELINE_DECIDE.format(scenario=scenario, prior_context="\n\n".join(context_parts)),
288
+ max_tokens=self._max_tokens,
289
+ )
290
+ total_input += r3.input_tokens
291
+ total_output += r3.output_tokens
292
+ context_parts.append(f"## DECIDE\n{r3.text}")
293
+
294
+ # Phase 4: Action
295
+ r4 = await run_model(
296
+ self._model,
297
+ PIPELINE_ACTION.format(scenario=scenario, prior_context="\n\n".join(context_parts)),
298
+ max_tokens=self._max_tokens,
299
+ )
300
+ total_input += r4.input_tokens
301
+ total_output += r4.output_tokens
302
+ context_parts.append(f"## ACTION\n{r4.text}")
303
+
304
+ # Phase 5: Review
305
+ r5 = await run_model(
306
+ self._model,
307
+ PIPELINE_REVIEW.format(scenario=scenario, prior_context="\n\n".join(context_parts)),
308
+ max_tokens=self._max_tokens,
309
+ )
310
+ total_input += r5.input_tokens
311
+ total_output += r5.output_tokens
312
+ context_parts.append(f"## REVIEW\n{r5.text}")
313
+
314
+ full_text = "\n\n".join(context_parts)
315
+ result = _parse_response(full_text)
316
+ result.input_tokens = total_input
317
+ result.output_tokens = total_output
318
+ result.latency_seconds = __import__("time").monotonic() - t0
319
+ result.model = self._model
320
+ result.mode = "pipeline"
321
+ return result
322
+
323
+ def _build_prompt(self, scenario: str, mode: Mode) -> str:
324
+ match mode:
325
+ case "dodar":
326
+ return DODAR_SINGLE.format(scenario=scenario)
327
+ case "zero_shot":
328
+ return ZERO_SHOT.format(scenario=scenario)
329
+ case "cot":
330
+ return COT.format(scenario=scenario)
331
+ case _:
332
+ raise ValueError(f"Unknown mode: {mode}")
333
+
334
+ def __repr__(self) -> str:
335
+ return f"DODAR(model={self._model!r}, mode={self._default_mode!r})"
@@ -0,0 +1,159 @@
1
+ """Prompt templates for DODAR framework."""
2
+
3
+ ZERO_SHOT = """\
4
+ You are an expert analyst. Please analyze the following scenario and provide \
5
+ your best response.
6
+
7
+ {scenario}
8
+ """
9
+
10
+ COT = """\
11
+ You are an expert analyst. Please analyze the following scenario step by step. \
12
+ Think through the problem carefully, showing your reasoning at each stage \
13
+ before reaching your conclusions.
14
+
15
+ {scenario}
16
+ """
17
+
18
+ DODAR_SINGLE = """\
19
+ You are an expert analyst using the DODAR structured reasoning framework. \
20
+ Analyze the following scenario by working through ALL FIVE phases explicitly. \
21
+ Each phase is a cognitive gate — do not skip or combine phases.
22
+
23
+ ## Phase 1: DIAGNOSE
24
+ - List at least 3 competing hypotheses for what is happening
25
+ - Challenge your first instinct — what would a contrarian view say?
26
+ - Surface any latent assumptions you are making
27
+ - Identify paradoxes or contradictions in the information
28
+ - Map the unknowns — what information would change your diagnosis?
29
+ - Consider polycontribution — could multiple causes be interacting?
30
+
31
+ ## Phase 2: OPTIONS
32
+ - Generate at least 4 genuinely distinct options (not minor variations)
33
+ - Name the core tension — the fundamental trade-off this decision hinges on
34
+ - Identify different types of risk for each option (financial, reputational, technical, etc.)
35
+ - Test your assumptions — what must be true for each option to work?
36
+ - Quantify opportunity costs — what do you give up with each choice?
37
+ - Consider hidden stakeholders and constraints
38
+
39
+ ## Phase 3: DECIDE
40
+ - Make a clear recommendation
41
+ - Justify your choice explicitly against each alternative you rejected
42
+ - Identify the binding constraints that shaped this decision
43
+ - Reframe the time horizon — is this optimised for the right timeframe?
44
+ - Quantify the opportunity cost of your chosen path
45
+ - State your confidence level and what would change your mind (falsifiability)
46
+
47
+ ## Phase 4: ACTION
48
+ - Define specific, concrete implementation steps
49
+ - Identify dependencies between steps
50
+ - Specify timeline and resource requirements
51
+ - Identify blockers and prerequisites
52
+ - Mark which actions are reversible vs. irreversible
53
+
54
+ ## Phase 5: REVIEW
55
+ - Identify at least 3 specific failure modes/risks, each with:
56
+ (a) trigger condition, (b) detection method, (c) contingency response
57
+ - List assumptions from earlier phases that need validation
58
+ - Define conditions under which you would abandon this plan entirely
59
+
60
+ SCENARIO:
61
+ {scenario}
62
+
63
+ Work through each phase systematically. Label each phase clearly.
64
+ """
65
+
66
+ PIPELINE_DIAGNOSE = """\
67
+ You are a diagnostic reasoning specialist. Your role is to hold diagnosis OPEN \
68
+ and resist premature closure.
69
+
70
+ Analyze the following scenario. Do NOT recommend solutions yet — focus only on \
71
+ understanding what is happening.
72
+
73
+ 1. List at least 3 competing hypotheses for what is happening
74
+ 2. Challenge your first instinct — what would a contrarian view say?
75
+ 3. Surface any latent assumptions
76
+ 4. Identify paradoxes or contradictions
77
+ 5. Map the unknowns — what information would change the diagnosis?
78
+ 6. Consider polycontribution — could multiple causes interact?
79
+
80
+ SCENARIO:
81
+ {scenario}
82
+ """
83
+
84
+ PIPELINE_OPTIONS = """\
85
+ You are a strategic options analyst. Your role is to enumerate genuinely \
86
+ distinct alternatives with explicit trade-offs.
87
+
88
+ Given the scenario and the diagnosis below, generate options. Do NOT decide yet.
89
+
90
+ 1. Generate at least 4 genuinely distinct options (not minor variations)
91
+ 2. Name the core tension — the fundamental trade-off
92
+ 3. Separate different types of risk for each option
93
+ 4. Quantify opportunity costs for each path
94
+ 5. Identify hidden stakeholders and constraints
95
+
96
+ SCENARIO:
97
+ {scenario}
98
+
99
+ DIAGNOSIS (from prior phase):
100
+ {prior_context}
101
+ """
102
+
103
+ PIPELINE_DECIDE = """\
104
+ You are a decision architect. Your role is to commit to a recommendation \
105
+ with transparent, falsifiable reasoning.
106
+
107
+ Given the scenario, diagnosis, and options below, make the call.
108
+
109
+ 1. Make a clear recommendation
110
+ 2. Justify against each rejected alternative specifically
111
+ 3. Identify binding constraints that shaped this decision
112
+ 4. State your confidence level
113
+ 5. State what would change your mind (falsifiability)
114
+ 6. Quantify the opportunity cost of your chosen path
115
+
116
+ SCENARIO:
117
+ {scenario}
118
+
119
+ PRIOR ANALYSIS:
120
+ {prior_context}
121
+ """
122
+
123
+ PIPELINE_ACTION = """\
124
+ You are an implementation planning specialist. Your role is to translate \
125
+ decisions into concrete, sequenced action plans.
126
+
127
+ Given the scenario and the decision below, create the action plan.
128
+
129
+ 1. Define specific, concrete implementation steps
130
+ 2. Identify dependencies between steps
131
+ 3. Specify timeline and resource requirements
132
+ 4. Identify blockers and prerequisites
133
+ 5. Mark which actions are reversible vs. irreversible
134
+
135
+ SCENARIO:
136
+ {scenario}
137
+
138
+ PRIOR ANALYSIS:
139
+ {prior_context}
140
+ """
141
+
142
+ PIPELINE_REVIEW = """\
143
+ You are a critical review analyst. Your role is to identify failure modes \
144
+ and validate assumptions. Be adversarial — find the weaknesses.
145
+
146
+ Given the full analysis below, conduct the review.
147
+
148
+ 1. Identify at least 3 specific failure modes, each with:
149
+ (a) trigger condition, (b) detection method, (c) contingency response
150
+ 2. Audit assumptions from earlier phases — are they still valid?
151
+ 3. Define conditions for abandoning this plan entirely
152
+ 4. What would you monitor to detect early warning signs?
153
+
154
+ SCENARIO:
155
+ {scenario}
156
+
157
+ FULL ANALYSIS:
158
+ {prior_context}
159
+ """
@@ -0,0 +1,156 @@
1
+ """Model runners for DODAR — thin wrappers around provider SDKs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Literal
7
+ import time
8
+
9
+
10
+ @dataclass
11
+ class RunnerResponse:
12
+ text: str
13
+ input_tokens: int = 0
14
+ output_tokens: int = 0
15
+ latency_seconds: float = 0.0
16
+
17
+
18
+ def _detect_provider(model: str) -> str:
19
+ if model.startswith("claude"):
20
+ return "anthropic"
21
+ elif model.startswith(("gpt-", "o1", "o3", "o4")):
22
+ return "openai"
23
+ elif model.startswith("gemini"):
24
+ return "google"
25
+ else:
26
+ return "ollama"
27
+
28
+
29
+ # ---- Anthropic ---- #
30
+
31
+ async def _run_anthropic(model: str, prompt: str, system: str | None = None, max_tokens: int = 4096) -> RunnerResponse:
32
+ import anthropic
33
+ client = anthropic.AsyncAnthropic()
34
+ messages = [{"role": "user", "content": prompt}]
35
+ kwargs: dict = {"model": model, "messages": messages, "max_tokens": max_tokens}
36
+ if system:
37
+ kwargs["system"] = system
38
+
39
+ t0 = time.monotonic()
40
+ response = await client.messages.create(**kwargs)
41
+ latency = time.monotonic() - t0
42
+
43
+ return RunnerResponse(
44
+ text=response.content[0].text,
45
+ input_tokens=response.usage.input_tokens,
46
+ output_tokens=response.usage.output_tokens,
47
+ latency_seconds=latency,
48
+ )
49
+
50
+
51
+ # ---- OpenAI ---- #
52
+
53
+ async def _run_openai(model: str, prompt: str, system: str | None = None, max_tokens: int = 4096) -> RunnerResponse:
54
+ import openai
55
+ client = openai.AsyncOpenAI()
56
+ messages = []
57
+ if system:
58
+ messages.append({"role": "system", "content": system})
59
+ messages.append({"role": "user", "content": prompt})
60
+
61
+ # Newer models use max_completion_tokens
62
+ use_new_param = model.startswith(("gpt-5", "o3", "o4"))
63
+ kwargs: dict = {"model": model, "messages": messages}
64
+ if use_new_param:
65
+ kwargs["max_completion_tokens"] = max_tokens
66
+ else:
67
+ kwargs["max_tokens"] = max_tokens
68
+
69
+ t0 = time.monotonic()
70
+ response = await client.chat.completions.create(**kwargs)
71
+ latency = time.monotonic() - t0
72
+
73
+ usage = response.usage
74
+ return RunnerResponse(
75
+ text=response.choices[0].message.content or "",
76
+ input_tokens=usage.prompt_tokens if usage else 0,
77
+ output_tokens=usage.completion_tokens if usage else 0,
78
+ latency_seconds=latency,
79
+ )
80
+
81
+
82
+ # ---- Google ---- #
83
+
84
+ async def _run_google(model: str, prompt: str, system: str | None = None, max_tokens: int = 4096) -> RunnerResponse:
85
+ from google import genai
86
+ client = genai.Client()
87
+ full_prompt = f"{system}\n\n{prompt}" if system else prompt
88
+
89
+ t0 = time.monotonic()
90
+ response = await client.aio.models.generate_content(
91
+ model=model,
92
+ contents=full_prompt,
93
+ )
94
+ latency = time.monotonic() - t0
95
+
96
+ return RunnerResponse(
97
+ text=response.text or "",
98
+ latency_seconds=latency,
99
+ )
100
+
101
+
102
+ # ---- Ollama ---- #
103
+
104
+ async def _run_ollama(model: str, prompt: str, system: str | None = None, max_tokens: int = 4096) -> RunnerResponse:
105
+ import httpx
106
+ messages = []
107
+ if system:
108
+ messages.append({"role": "system", "content": system})
109
+ messages.append({"role": "user", "content": prompt})
110
+
111
+ t0 = time.monotonic()
112
+ async with httpx.AsyncClient(timeout=600) as client:
113
+ resp = await client.post(
114
+ "http://localhost:11434/api/chat",
115
+ json={"model": model, "messages": messages, "stream": False},
116
+ )
117
+ resp.raise_for_status()
118
+ data = resp.json()
119
+ latency = time.monotonic() - t0
120
+
121
+ return RunnerResponse(
122
+ text=data.get("message", {}).get("content", ""),
123
+ input_tokens=data.get("prompt_eval_count", 0),
124
+ output_tokens=data.get("eval_count", 0),
125
+ latency_seconds=latency,
126
+ )
127
+
128
+
129
+ # ---- Registry ---- #
130
+
131
+ _MODELS = {
132
+ "claude-opus-4-6", "claude-sonnet-4-5", "claude-haiku-4-5",
133
+ "gpt-5.4", "gpt-4o", "gpt-4o-mini", "gpt-4.1-mini", "gpt-4.1-nano",
134
+ "gemini-2.0-flash",
135
+ }
136
+
137
+
138
+ def available_models() -> list[str]:
139
+ """List all known model IDs."""
140
+ return sorted(_MODELS)
141
+
142
+
143
+ async def run_model(model: str, prompt: str, system: str | None = None, max_tokens: int = 4096) -> RunnerResponse:
144
+ """Run a prompt against any supported model."""
145
+ provider = _detect_provider(model)
146
+ match provider:
147
+ case "anthropic":
148
+ return await _run_anthropic(model, prompt, system, max_tokens)
149
+ case "openai":
150
+ return await _run_openai(model, prompt, system, max_tokens)
151
+ case "google":
152
+ return await _run_google(model, prompt, system, max_tokens)
153
+ case "ollama":
154
+ return await _run_ollama(model, prompt, system, max_tokens)
155
+ case _:
156
+ raise ValueError(f"Unknown provider for model: {model}")