ouroboros-ai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ouroboros-ai might be problematic. Click here for more details.
- ouroboros/__init__.py +15 -0
- ouroboros/__main__.py +9 -0
- ouroboros/bigbang/__init__.py +39 -0
- ouroboros/bigbang/ambiguity.py +464 -0
- ouroboros/bigbang/interview.py +530 -0
- ouroboros/bigbang/seed_generator.py +610 -0
- ouroboros/cli/__init__.py +9 -0
- ouroboros/cli/commands/__init__.py +7 -0
- ouroboros/cli/commands/config.py +79 -0
- ouroboros/cli/commands/init.py +425 -0
- ouroboros/cli/commands/run.py +201 -0
- ouroboros/cli/commands/status.py +85 -0
- ouroboros/cli/formatters/__init__.py +31 -0
- ouroboros/cli/formatters/panels.py +157 -0
- ouroboros/cli/formatters/progress.py +112 -0
- ouroboros/cli/formatters/tables.py +166 -0
- ouroboros/cli/main.py +60 -0
- ouroboros/config/__init__.py +81 -0
- ouroboros/config/loader.py +292 -0
- ouroboros/config/models.py +332 -0
- ouroboros/core/__init__.py +62 -0
- ouroboros/core/ac_tree.py +401 -0
- ouroboros/core/context.py +472 -0
- ouroboros/core/errors.py +246 -0
- ouroboros/core/seed.py +212 -0
- ouroboros/core/types.py +205 -0
- ouroboros/evaluation/__init__.py +110 -0
- ouroboros/evaluation/consensus.py +350 -0
- ouroboros/evaluation/mechanical.py +351 -0
- ouroboros/evaluation/models.py +235 -0
- ouroboros/evaluation/pipeline.py +286 -0
- ouroboros/evaluation/semantic.py +302 -0
- ouroboros/evaluation/trigger.py +278 -0
- ouroboros/events/__init__.py +5 -0
- ouroboros/events/base.py +80 -0
- ouroboros/events/decomposition.py +153 -0
- ouroboros/events/evaluation.py +248 -0
- ouroboros/execution/__init__.py +44 -0
- ouroboros/execution/atomicity.py +451 -0
- ouroboros/execution/decomposition.py +481 -0
- ouroboros/execution/double_diamond.py +1386 -0
- ouroboros/execution/subagent.py +275 -0
- ouroboros/observability/__init__.py +63 -0
- ouroboros/observability/drift.py +383 -0
- ouroboros/observability/logging.py +504 -0
- ouroboros/observability/retrospective.py +338 -0
- ouroboros/orchestrator/__init__.py +78 -0
- ouroboros/orchestrator/adapter.py +391 -0
- ouroboros/orchestrator/events.py +278 -0
- ouroboros/orchestrator/runner.py +597 -0
- ouroboros/orchestrator/session.py +486 -0
- ouroboros/persistence/__init__.py +23 -0
- ouroboros/persistence/checkpoint.py +511 -0
- ouroboros/persistence/event_store.py +183 -0
- ouroboros/persistence/migrations/__init__.py +1 -0
- ouroboros/persistence/migrations/runner.py +100 -0
- ouroboros/persistence/migrations/scripts/001_initial.sql +20 -0
- ouroboros/persistence/schema.py +56 -0
- ouroboros/persistence/uow.py +230 -0
- ouroboros/providers/__init__.py +28 -0
- ouroboros/providers/base.py +133 -0
- ouroboros/providers/claude_code_adapter.py +212 -0
- ouroboros/providers/litellm_adapter.py +316 -0
- ouroboros/py.typed +0 -0
- ouroboros/resilience/__init__.py +67 -0
- ouroboros/resilience/lateral.py +595 -0
- ouroboros/resilience/stagnation.py +727 -0
- ouroboros/routing/__init__.py +60 -0
- ouroboros/routing/complexity.py +272 -0
- ouroboros/routing/downgrade.py +664 -0
- ouroboros/routing/escalation.py +340 -0
- ouroboros/routing/router.py +204 -0
- ouroboros/routing/tiers.py +247 -0
- ouroboros/secondary/__init__.py +40 -0
- ouroboros/secondary/scheduler.py +467 -0
- ouroboros/secondary/todo_registry.py +483 -0
- ouroboros_ai-0.1.0.dist-info/METADATA +607 -0
- ouroboros_ai-0.1.0.dist-info/RECORD +81 -0
- ouroboros_ai-0.1.0.dist-info/WHEEL +4 -0
- ouroboros_ai-0.1.0.dist-info/entry_points.txt +2 -0
- ouroboros_ai-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Routing module for Ouroboros.
|
|
2
|
+
|
|
3
|
+
This module handles model tier routing and selection, including:
|
|
4
|
+
- Tier enumeration and configuration (Frugal, Standard, Frontier)
|
|
5
|
+
- Complexity estimation for routing decisions
|
|
6
|
+
- PAL (Progressive Adaptive LLM) router for automatic tier selection
|
|
7
|
+
- Escalation on failure with automatic tier upgrades
|
|
8
|
+
- Downgrade on success for cost optimization
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from ouroboros.routing.complexity import (
|
|
12
|
+
ComplexityScore,
|
|
13
|
+
TaskContext,
|
|
14
|
+
estimate_complexity,
|
|
15
|
+
)
|
|
16
|
+
from ouroboros.routing.downgrade import (
|
|
17
|
+
DOWNGRADE_THRESHOLD,
|
|
18
|
+
SIMILARITY_THRESHOLD,
|
|
19
|
+
DowngradeManager,
|
|
20
|
+
DowngradeResult,
|
|
21
|
+
PatternMatcher,
|
|
22
|
+
SuccessTracker,
|
|
23
|
+
)
|
|
24
|
+
from ouroboros.routing.escalation import (
|
|
25
|
+
FAILURE_THRESHOLD,
|
|
26
|
+
EscalationAction,
|
|
27
|
+
EscalationManager,
|
|
28
|
+
FailureTracker,
|
|
29
|
+
StagnationEvent,
|
|
30
|
+
)
|
|
31
|
+
from ouroboros.routing.router import PALRouter, RoutingDecision, route_task
|
|
32
|
+
from ouroboros.routing.tiers import Tier, get_model_for_tier, get_tier_config
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
# Tiers
|
|
36
|
+
"Tier",
|
|
37
|
+
"get_model_for_tier",
|
|
38
|
+
"get_tier_config",
|
|
39
|
+
# Complexity
|
|
40
|
+
"TaskContext",
|
|
41
|
+
"ComplexityScore",
|
|
42
|
+
"estimate_complexity",
|
|
43
|
+
# Router
|
|
44
|
+
"PALRouter",
|
|
45
|
+
"RoutingDecision",
|
|
46
|
+
"route_task",
|
|
47
|
+
# Escalation
|
|
48
|
+
"EscalationManager",
|
|
49
|
+
"EscalationAction",
|
|
50
|
+
"FailureTracker",
|
|
51
|
+
"StagnationEvent",
|
|
52
|
+
"FAILURE_THRESHOLD",
|
|
53
|
+
# Downgrade
|
|
54
|
+
"DowngradeManager",
|
|
55
|
+
"DowngradeResult",
|
|
56
|
+
"SuccessTracker",
|
|
57
|
+
"PatternMatcher",
|
|
58
|
+
"DOWNGRADE_THRESHOLD",
|
|
59
|
+
"SIMILARITY_THRESHOLD",
|
|
60
|
+
]
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
"""Complexity estimation for task routing in Ouroboros.
|
|
2
|
+
|
|
3
|
+
This module provides complexity estimation for the PAL (Progressive Adaptive LLM)
|
|
4
|
+
routing system. Complexity scores determine which tier (Frugal, Standard, or
|
|
5
|
+
Frontier) should handle a given task.
|
|
6
|
+
|
|
7
|
+
Complexity Factors:
|
|
8
|
+
- Token count: 30% weight - Approximated task size
|
|
9
|
+
- Tool dependency count: 30% weight - Number of external tools needed
|
|
10
|
+
- AC nesting depth: 40% weight - Acceptance criteria complexity
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
from ouroboros.routing.complexity import TaskContext, estimate_complexity
|
|
14
|
+
|
|
15
|
+
# Create a task context
|
|
16
|
+
context = TaskContext(
|
|
17
|
+
token_count=500,
|
|
18
|
+
tool_dependencies=["git", "npm"],
|
|
19
|
+
ac_depth=2,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Estimate complexity
|
|
23
|
+
result = estimate_complexity(context)
|
|
24
|
+
if result.is_ok:
|
|
25
|
+
score = result.value
|
|
26
|
+
print(f"Complexity: {score.score:.2f}")
|
|
27
|
+
print(f"Breakdown: {score.breakdown}")
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from dataclasses import dataclass, field
|
|
31
|
+
|
|
32
|
+
from ouroboros.core.errors import ValidationError
|
|
33
|
+
from ouroboros.core.types import Result
|
|
34
|
+
from ouroboros.observability.logging import get_logger
|
|
35
|
+
|
|
36
|
+
log = get_logger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Weight constants for complexity calculation
|
|
40
|
+
WEIGHT_TOKEN_COUNT = 0.30
|
|
41
|
+
WEIGHT_TOOL_DEPENDENCIES = 0.30
|
|
42
|
+
WEIGHT_AC_DEPTH = 0.40
|
|
43
|
+
|
|
44
|
+
# Normalization thresholds
|
|
45
|
+
# Token count: normalized between 0 (0 tokens) and 1 (>= MAX_TOKEN_THRESHOLD)
|
|
46
|
+
MAX_TOKEN_THRESHOLD = 4000 # Tasks above this are considered maximally complex for tokens
|
|
47
|
+
|
|
48
|
+
# Tool dependencies: normalized between 0 (0 tools) and 1 (>= MAX_TOOL_THRESHOLD)
|
|
49
|
+
MAX_TOOL_THRESHOLD = 5 # Tasks with 5+ tools are considered maximally complex for tools
|
|
50
|
+
|
|
51
|
+
# AC depth: normalized between 0 (depth 0) and 1 (depth >= MAX_DEPTH_THRESHOLD)
|
|
52
|
+
MAX_DEPTH_THRESHOLD = 5 # AC depth of 5+ is considered maximally complex
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(frozen=True, slots=True)
|
|
56
|
+
class TaskContext:
|
|
57
|
+
"""Context information about a task for complexity estimation.
|
|
58
|
+
|
|
59
|
+
This dataclass holds the information needed to estimate a task's complexity.
|
|
60
|
+
All fields contribute to the final complexity score with different weights.
|
|
61
|
+
|
|
62
|
+
Attributes:
|
|
63
|
+
token_count: Estimated number of tokens in the task (prompt + expected output).
|
|
64
|
+
Must be non-negative. Default is 0.
|
|
65
|
+
tool_dependencies: List of tool names the task depends on.
|
|
66
|
+
Each unique tool adds to complexity. Default is empty list.
|
|
67
|
+
ac_depth: Acceptance criteria nesting depth (0-indexed).
|
|
68
|
+
Deeper nesting indicates more complex requirements. Default is 0.
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
context = TaskContext(
|
|
72
|
+
token_count=1500,
|
|
73
|
+
tool_dependencies=["git", "npm", "docker"],
|
|
74
|
+
ac_depth=3,
|
|
75
|
+
)
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
token_count: int = 0
|
|
79
|
+
tool_dependencies: list[str] = field(default_factory=list)
|
|
80
|
+
ac_depth: int = 0
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass(frozen=True, slots=True)
|
|
84
|
+
class ComplexityScore:
|
|
85
|
+
"""Result of complexity estimation.
|
|
86
|
+
|
|
87
|
+
Contains both the normalized overall score and a breakdown of how
|
|
88
|
+
each factor contributed to the final score.
|
|
89
|
+
|
|
90
|
+
Attributes:
|
|
91
|
+
score: Normalized complexity score between 0.0 and 1.0.
|
|
92
|
+
- < 0.4: Low complexity (Frugal tier)
|
|
93
|
+
- 0.4-0.7: Medium complexity (Standard tier)
|
|
94
|
+
- > 0.7: High complexity (Frontier tier)
|
|
95
|
+
breakdown: Dictionary showing individual factor contributions:
|
|
96
|
+
- "token_score": Normalized token count contribution (0.0-1.0)
|
|
97
|
+
- "tool_score": Normalized tool dependency contribution (0.0-1.0)
|
|
98
|
+
- "depth_score": Normalized AC depth contribution (0.0-1.0)
|
|
99
|
+
- "weighted_token": Token contribution after weight applied
|
|
100
|
+
- "weighted_tool": Tool contribution after weight applied
|
|
101
|
+
- "weighted_depth": Depth contribution after weight applied
|
|
102
|
+
|
|
103
|
+
Example:
|
|
104
|
+
score = ComplexityScore(
|
|
105
|
+
score=0.65,
|
|
106
|
+
breakdown={
|
|
107
|
+
"token_score": 0.5,
|
|
108
|
+
"tool_score": 0.6,
|
|
109
|
+
"depth_score": 0.8,
|
|
110
|
+
"weighted_token": 0.15,
|
|
111
|
+
"weighted_tool": 0.18,
|
|
112
|
+
"weighted_depth": 0.32,
|
|
113
|
+
},
|
|
114
|
+
)
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
score: float
|
|
118
|
+
breakdown: dict[str, float]
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _normalize_token_count(token_count: int) -> float:
|
|
122
|
+
"""Normalize token count to 0.0-1.0 range.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
token_count: Number of tokens in the task.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Normalized score between 0.0 and 1.0.
|
|
129
|
+
"""
|
|
130
|
+
if token_count <= 0:
|
|
131
|
+
return 0.0
|
|
132
|
+
if token_count >= MAX_TOKEN_THRESHOLD:
|
|
133
|
+
return 1.0
|
|
134
|
+
return token_count / MAX_TOKEN_THRESHOLD
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _normalize_tool_dependencies(tool_count: int) -> float:
|
|
138
|
+
"""Normalize tool dependency count to 0.0-1.0 range.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
tool_count: Number of tool dependencies.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Normalized score between 0.0 and 1.0.
|
|
145
|
+
"""
|
|
146
|
+
if tool_count <= 0:
|
|
147
|
+
return 0.0
|
|
148
|
+
if tool_count >= MAX_TOOL_THRESHOLD:
|
|
149
|
+
return 1.0
|
|
150
|
+
return tool_count / MAX_TOOL_THRESHOLD
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _normalize_ac_depth(ac_depth: int) -> float:
|
|
154
|
+
"""Normalize AC nesting depth to 0.0-1.0 range.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
ac_depth: Acceptance criteria nesting depth.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Normalized score between 0.0 and 1.0.
|
|
161
|
+
"""
|
|
162
|
+
if ac_depth <= 0:
|
|
163
|
+
return 0.0
|
|
164
|
+
if ac_depth >= MAX_DEPTH_THRESHOLD:
|
|
165
|
+
return 1.0
|
|
166
|
+
return ac_depth / MAX_DEPTH_THRESHOLD
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _validate_task_context(context: TaskContext) -> Result[None, ValidationError]:
|
|
170
|
+
"""Validate task context inputs.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
context: The task context to validate.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Result containing None on success or ValidationError on failure.
|
|
177
|
+
"""
|
|
178
|
+
if context.token_count < 0:
|
|
179
|
+
error = ValidationError(
|
|
180
|
+
"Token count must be non-negative",
|
|
181
|
+
field="token_count",
|
|
182
|
+
value=context.token_count,
|
|
183
|
+
)
|
|
184
|
+
log.warning(
|
|
185
|
+
"complexity.validation.failed",
|
|
186
|
+
field="token_count",
|
|
187
|
+
value=context.token_count,
|
|
188
|
+
)
|
|
189
|
+
return Result.err(error)
|
|
190
|
+
|
|
191
|
+
if context.ac_depth < 0:
|
|
192
|
+
error = ValidationError(
|
|
193
|
+
"AC depth must be non-negative",
|
|
194
|
+
field="ac_depth",
|
|
195
|
+
value=context.ac_depth,
|
|
196
|
+
)
|
|
197
|
+
log.warning(
|
|
198
|
+
"complexity.validation.failed",
|
|
199
|
+
field="ac_depth",
|
|
200
|
+
value=context.ac_depth,
|
|
201
|
+
)
|
|
202
|
+
return Result.err(error)
|
|
203
|
+
|
|
204
|
+
return Result.ok(None)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def estimate_complexity(
|
|
208
|
+
context: TaskContext,
|
|
209
|
+
) -> Result[ComplexityScore, ValidationError]:
|
|
210
|
+
"""Estimate the complexity of a task based on its context.
|
|
211
|
+
|
|
212
|
+
Calculates a weighted complexity score from:
|
|
213
|
+
- Token count: 30% weight
|
|
214
|
+
- Tool dependency count: 30% weight
|
|
215
|
+
- AC nesting depth: 40% weight
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
context: Task context containing complexity factors.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Result containing ComplexityScore on success or ValidationError on failure.
|
|
222
|
+
|
|
223
|
+
Example:
|
|
224
|
+
context = TaskContext(
|
|
225
|
+
token_count=2000,
|
|
226
|
+
tool_dependencies=["git", "npm"],
|
|
227
|
+
ac_depth=3,
|
|
228
|
+
)
|
|
229
|
+
result = estimate_complexity(context)
|
|
230
|
+
if result.is_ok:
|
|
231
|
+
print(f"Score: {result.value.score}") # e.g., 0.59
|
|
232
|
+
"""
|
|
233
|
+
# Validate input
|
|
234
|
+
validation_result = _validate_task_context(context)
|
|
235
|
+
if validation_result.is_err:
|
|
236
|
+
return Result.err(validation_result.error)
|
|
237
|
+
|
|
238
|
+
# Calculate normalized scores for each factor
|
|
239
|
+
token_score = _normalize_token_count(context.token_count)
|
|
240
|
+
tool_score = _normalize_tool_dependencies(len(context.tool_dependencies))
|
|
241
|
+
depth_score = _normalize_ac_depth(context.ac_depth)
|
|
242
|
+
|
|
243
|
+
# Apply weights
|
|
244
|
+
weighted_token = token_score * WEIGHT_TOKEN_COUNT
|
|
245
|
+
weighted_tool = tool_score * WEIGHT_TOOL_DEPENDENCIES
|
|
246
|
+
weighted_depth = depth_score * WEIGHT_AC_DEPTH
|
|
247
|
+
|
|
248
|
+
# Calculate final score
|
|
249
|
+
final_score = weighted_token + weighted_tool + weighted_depth
|
|
250
|
+
|
|
251
|
+
# Build breakdown for transparency
|
|
252
|
+
breakdown = {
|
|
253
|
+
"token_score": token_score,
|
|
254
|
+
"tool_score": tool_score,
|
|
255
|
+
"depth_score": depth_score,
|
|
256
|
+
"weighted_token": weighted_token,
|
|
257
|
+
"weighted_tool": weighted_tool,
|
|
258
|
+
"weighted_depth": weighted_depth,
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
complexity_score = ComplexityScore(score=final_score, breakdown=breakdown)
|
|
262
|
+
|
|
263
|
+
log.debug(
|
|
264
|
+
"complexity.estimated",
|
|
265
|
+
score=final_score,
|
|
266
|
+
token_count=context.token_count,
|
|
267
|
+
tool_count=len(context.tool_dependencies),
|
|
268
|
+
ac_depth=context.ac_depth,
|
|
269
|
+
breakdown=breakdown,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
return Result.ok(complexity_score)
|