ouroboros-ai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ouroboros-ai might be problematic. Click here for more details.
- ouroboros/__init__.py +15 -0
- ouroboros/__main__.py +9 -0
- ouroboros/bigbang/__init__.py +39 -0
- ouroboros/bigbang/ambiguity.py +464 -0
- ouroboros/bigbang/interview.py +530 -0
- ouroboros/bigbang/seed_generator.py +610 -0
- ouroboros/cli/__init__.py +9 -0
- ouroboros/cli/commands/__init__.py +7 -0
- ouroboros/cli/commands/config.py +79 -0
- ouroboros/cli/commands/init.py +425 -0
- ouroboros/cli/commands/run.py +201 -0
- ouroboros/cli/commands/status.py +85 -0
- ouroboros/cli/formatters/__init__.py +31 -0
- ouroboros/cli/formatters/panels.py +157 -0
- ouroboros/cli/formatters/progress.py +112 -0
- ouroboros/cli/formatters/tables.py +166 -0
- ouroboros/cli/main.py +60 -0
- ouroboros/config/__init__.py +81 -0
- ouroboros/config/loader.py +292 -0
- ouroboros/config/models.py +332 -0
- ouroboros/core/__init__.py +62 -0
- ouroboros/core/ac_tree.py +401 -0
- ouroboros/core/context.py +472 -0
- ouroboros/core/errors.py +246 -0
- ouroboros/core/seed.py +212 -0
- ouroboros/core/types.py +205 -0
- ouroboros/evaluation/__init__.py +110 -0
- ouroboros/evaluation/consensus.py +350 -0
- ouroboros/evaluation/mechanical.py +351 -0
- ouroboros/evaluation/models.py +235 -0
- ouroboros/evaluation/pipeline.py +286 -0
- ouroboros/evaluation/semantic.py +302 -0
- ouroboros/evaluation/trigger.py +278 -0
- ouroboros/events/__init__.py +5 -0
- ouroboros/events/base.py +80 -0
- ouroboros/events/decomposition.py +153 -0
- ouroboros/events/evaluation.py +248 -0
- ouroboros/execution/__init__.py +44 -0
- ouroboros/execution/atomicity.py +451 -0
- ouroboros/execution/decomposition.py +481 -0
- ouroboros/execution/double_diamond.py +1386 -0
- ouroboros/execution/subagent.py +275 -0
- ouroboros/observability/__init__.py +63 -0
- ouroboros/observability/drift.py +383 -0
- ouroboros/observability/logging.py +504 -0
- ouroboros/observability/retrospective.py +338 -0
- ouroboros/orchestrator/__init__.py +78 -0
- ouroboros/orchestrator/adapter.py +391 -0
- ouroboros/orchestrator/events.py +278 -0
- ouroboros/orchestrator/runner.py +597 -0
- ouroboros/orchestrator/session.py +486 -0
- ouroboros/persistence/__init__.py +23 -0
- ouroboros/persistence/checkpoint.py +511 -0
- ouroboros/persistence/event_store.py +183 -0
- ouroboros/persistence/migrations/__init__.py +1 -0
- ouroboros/persistence/migrations/runner.py +100 -0
- ouroboros/persistence/migrations/scripts/001_initial.sql +20 -0
- ouroboros/persistence/schema.py +56 -0
- ouroboros/persistence/uow.py +230 -0
- ouroboros/providers/__init__.py +28 -0
- ouroboros/providers/base.py +133 -0
- ouroboros/providers/claude_code_adapter.py +212 -0
- ouroboros/providers/litellm_adapter.py +316 -0
- ouroboros/py.typed +0 -0
- ouroboros/resilience/__init__.py +67 -0
- ouroboros/resilience/lateral.py +595 -0
- ouroboros/resilience/stagnation.py +727 -0
- ouroboros/routing/__init__.py +60 -0
- ouroboros/routing/complexity.py +272 -0
- ouroboros/routing/downgrade.py +664 -0
- ouroboros/routing/escalation.py +340 -0
- ouroboros/routing/router.py +204 -0
- ouroboros/routing/tiers.py +247 -0
- ouroboros/secondary/__init__.py +40 -0
- ouroboros/secondary/scheduler.py +467 -0
- ouroboros/secondary/todo_registry.py +483 -0
- ouroboros_ai-0.1.0.dist-info/METADATA +607 -0
- ouroboros_ai-0.1.0.dist-info/RECORD +81 -0
- ouroboros_ai-0.1.0.dist-info/WHEEL +4 -0
- ouroboros_ai-0.1.0.dist-info/entry_points.txt +2 -0
- ouroboros_ai-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""Event factories for the evaluation pipeline.
|
|
2
|
+
|
|
3
|
+
This module provides factory functions for creating evaluation-related events.
|
|
4
|
+
All events follow the dot.notation.past_tense naming convention.
|
|
5
|
+
|
|
6
|
+
Event Types:
|
|
7
|
+
evaluation.stage1.started - Mechanical verification started
|
|
8
|
+
evaluation.stage1.completed - Mechanical verification completed
|
|
9
|
+
evaluation.stage2.started - Semantic evaluation started
|
|
10
|
+
evaluation.stage2.completed - Semantic evaluation completed
|
|
11
|
+
evaluation.stage3.started - Consensus evaluation started
|
|
12
|
+
evaluation.stage3.completed - Consensus evaluation completed
|
|
13
|
+
evaluation.consensus.triggered - Consensus trigger activated
|
|
14
|
+
evaluation.pipeline.completed - Full pipeline completed
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from ouroboros.events.base import BaseEvent
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def create_stage1_started_event(
|
|
23
|
+
execution_id: str,
|
|
24
|
+
checks_to_run: list[str],
|
|
25
|
+
) -> BaseEvent:
|
|
26
|
+
"""Create event for Stage 1 mechanical verification starting.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
execution_id: Unique execution identifier
|
|
30
|
+
checks_to_run: List of check types to execute
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
BaseEvent for stage1 start
|
|
34
|
+
"""
|
|
35
|
+
return BaseEvent(
|
|
36
|
+
type="evaluation.stage1.started",
|
|
37
|
+
aggregate_type="evaluation",
|
|
38
|
+
aggregate_id=execution_id,
|
|
39
|
+
data={
|
|
40
|
+
"checks_to_run": checks_to_run,
|
|
41
|
+
},
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def create_stage1_completed_event(
|
|
46
|
+
execution_id: str,
|
|
47
|
+
passed: bool,
|
|
48
|
+
checks: list[dict[str, Any]],
|
|
49
|
+
coverage_score: float | None,
|
|
50
|
+
) -> BaseEvent:
|
|
51
|
+
"""Create event for Stage 1 mechanical verification completion.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
execution_id: Unique execution identifier
|
|
55
|
+
passed: Overall pass/fail status
|
|
56
|
+
checks: List of check results as dicts
|
|
57
|
+
coverage_score: Test coverage score if measured
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
BaseEvent for stage1 completion
|
|
61
|
+
"""
|
|
62
|
+
return BaseEvent(
|
|
63
|
+
type="evaluation.stage1.completed",
|
|
64
|
+
aggregate_type="evaluation",
|
|
65
|
+
aggregate_id=execution_id,
|
|
66
|
+
data={
|
|
67
|
+
"passed": passed,
|
|
68
|
+
"checks": checks,
|
|
69
|
+
"coverage_score": coverage_score,
|
|
70
|
+
"failed_count": sum(1 for c in checks if not c.get("passed", True)),
|
|
71
|
+
},
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def create_stage2_started_event(
|
|
76
|
+
execution_id: str,
|
|
77
|
+
model: str,
|
|
78
|
+
current_ac: str,
|
|
79
|
+
) -> BaseEvent:
|
|
80
|
+
"""Create event for Stage 2 semantic evaluation starting.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
execution_id: Unique execution identifier
|
|
84
|
+
model: LLM model being used
|
|
85
|
+
current_ac: Acceptance criterion being evaluated
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
BaseEvent for stage2 start
|
|
89
|
+
"""
|
|
90
|
+
return BaseEvent(
|
|
91
|
+
type="evaluation.stage2.started",
|
|
92
|
+
aggregate_type="evaluation",
|
|
93
|
+
aggregate_id=execution_id,
|
|
94
|
+
data={
|
|
95
|
+
"model": model,
|
|
96
|
+
"current_ac": current_ac,
|
|
97
|
+
},
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def create_stage2_completed_event(
|
|
102
|
+
execution_id: str,
|
|
103
|
+
score: float,
|
|
104
|
+
ac_compliance: bool,
|
|
105
|
+
goal_alignment: float,
|
|
106
|
+
drift_score: float,
|
|
107
|
+
uncertainty: float,
|
|
108
|
+
) -> BaseEvent:
|
|
109
|
+
"""Create event for Stage 2 semantic evaluation completion.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
execution_id: Unique execution identifier
|
|
113
|
+
score: Overall evaluation score
|
|
114
|
+
ac_compliance: Whether AC is met
|
|
115
|
+
goal_alignment: Goal alignment score
|
|
116
|
+
drift_score: Drift from seed
|
|
117
|
+
uncertainty: Model uncertainty
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
BaseEvent for stage2 completion
|
|
121
|
+
"""
|
|
122
|
+
return BaseEvent(
|
|
123
|
+
type="evaluation.stage2.completed",
|
|
124
|
+
aggregate_type="evaluation",
|
|
125
|
+
aggregate_id=execution_id,
|
|
126
|
+
data={
|
|
127
|
+
"score": score,
|
|
128
|
+
"ac_compliance": ac_compliance,
|
|
129
|
+
"goal_alignment": goal_alignment,
|
|
130
|
+
"drift_score": drift_score,
|
|
131
|
+
"uncertainty": uncertainty,
|
|
132
|
+
},
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def create_stage3_started_event(
|
|
137
|
+
execution_id: str,
|
|
138
|
+
models: list[str],
|
|
139
|
+
trigger_reason: str,
|
|
140
|
+
) -> BaseEvent:
|
|
141
|
+
"""Create event for Stage 3 consensus evaluation starting.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
execution_id: Unique execution identifier
|
|
145
|
+
models: List of models participating in consensus
|
|
146
|
+
trigger_reason: Reason consensus was triggered
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
BaseEvent for stage3 start
|
|
150
|
+
"""
|
|
151
|
+
return BaseEvent(
|
|
152
|
+
type="evaluation.stage3.started",
|
|
153
|
+
aggregate_type="evaluation",
|
|
154
|
+
aggregate_id=execution_id,
|
|
155
|
+
data={
|
|
156
|
+
"models": models,
|
|
157
|
+
"trigger_reason": trigger_reason,
|
|
158
|
+
},
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def create_stage3_completed_event(
|
|
163
|
+
execution_id: str,
|
|
164
|
+
approved: bool,
|
|
165
|
+
votes: list[dict[str, Any]],
|
|
166
|
+
majority_ratio: float,
|
|
167
|
+
disagreements: list[str],
|
|
168
|
+
) -> BaseEvent:
|
|
169
|
+
"""Create event for Stage 3 consensus evaluation completion.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
execution_id: Unique execution identifier
|
|
173
|
+
approved: Whether consensus approved
|
|
174
|
+
votes: List of vote dicts
|
|
175
|
+
majority_ratio: Approval ratio
|
|
176
|
+
disagreements: List of dissenting reasons
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
BaseEvent for stage3 completion
|
|
180
|
+
"""
|
|
181
|
+
return BaseEvent(
|
|
182
|
+
type="evaluation.stage3.completed",
|
|
183
|
+
aggregate_type="evaluation",
|
|
184
|
+
aggregate_id=execution_id,
|
|
185
|
+
data={
|
|
186
|
+
"approved": approved,
|
|
187
|
+
"votes": votes,
|
|
188
|
+
"majority_ratio": majority_ratio,
|
|
189
|
+
"disagreements": disagreements,
|
|
190
|
+
"total_votes": len(votes),
|
|
191
|
+
"approving_votes": sum(1 for v in votes if v.get("approved", False)),
|
|
192
|
+
},
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def create_consensus_triggered_event(
|
|
197
|
+
execution_id: str,
|
|
198
|
+
trigger_type: str,
|
|
199
|
+
trigger_details: dict[str, Any],
|
|
200
|
+
) -> BaseEvent:
|
|
201
|
+
"""Create event when consensus is triggered by trigger matrix.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
execution_id: Unique execution identifier
|
|
205
|
+
trigger_type: Type of trigger activated
|
|
206
|
+
trigger_details: Additional context about trigger
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
BaseEvent for consensus trigger
|
|
210
|
+
"""
|
|
211
|
+
return BaseEvent(
|
|
212
|
+
type="evaluation.consensus.triggered",
|
|
213
|
+
aggregate_type="evaluation",
|
|
214
|
+
aggregate_id=execution_id,
|
|
215
|
+
data={
|
|
216
|
+
"trigger_type": trigger_type,
|
|
217
|
+
"trigger_details": trigger_details,
|
|
218
|
+
},
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def create_pipeline_completed_event(
|
|
223
|
+
execution_id: str,
|
|
224
|
+
final_approved: bool,
|
|
225
|
+
highest_stage: int,
|
|
226
|
+
failure_reason: str | None,
|
|
227
|
+
) -> BaseEvent:
|
|
228
|
+
"""Create event for full evaluation pipeline completion.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
execution_id: Unique execution identifier
|
|
232
|
+
final_approved: Overall approval status
|
|
233
|
+
highest_stage: Highest stage number completed
|
|
234
|
+
failure_reason: Reason for failure if not approved
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
BaseEvent for pipeline completion
|
|
238
|
+
"""
|
|
239
|
+
return BaseEvent(
|
|
240
|
+
type="evaluation.pipeline.completed",
|
|
241
|
+
aggregate_type="evaluation",
|
|
242
|
+
aggregate_id=execution_id,
|
|
243
|
+
data={
|
|
244
|
+
"final_approved": final_approved,
|
|
245
|
+
"highest_stage": highest_stage,
|
|
246
|
+
"failure_reason": failure_reason,
|
|
247
|
+
},
|
|
248
|
+
)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Execution module for Double Diamond cycle.
|
|
2
|
+
|
|
3
|
+
This module implements Phase 2 of the Ouroboros workflow:
|
|
4
|
+
- Double Diamond pattern: Discover → Define → Design → Deliver
|
|
5
|
+
- Recursive AC decomposition
|
|
6
|
+
- Phase transition management
|
|
7
|
+
- SubAgent isolation for child AC execution (Story 3.4)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from ouroboros.execution.double_diamond import (
|
|
11
|
+
CycleResult,
|
|
12
|
+
DoubleDiamond,
|
|
13
|
+
ExecutionError,
|
|
14
|
+
Phase,
|
|
15
|
+
PhaseContext,
|
|
16
|
+
PhaseResult,
|
|
17
|
+
)
|
|
18
|
+
from ouroboros.execution.subagent import (
|
|
19
|
+
SubAgentError,
|
|
20
|
+
ValidationError,
|
|
21
|
+
create_subagent_completed_event,
|
|
22
|
+
create_subagent_failed_event,
|
|
23
|
+
create_subagent_started_event,
|
|
24
|
+
create_subagent_validated_event,
|
|
25
|
+
validate_child_result,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
# Double Diamond
|
|
30
|
+
"CycleResult",
|
|
31
|
+
"DoubleDiamond",
|
|
32
|
+
"ExecutionError",
|
|
33
|
+
"Phase",
|
|
34
|
+
"PhaseContext",
|
|
35
|
+
"PhaseResult",
|
|
36
|
+
# SubAgent Isolation
|
|
37
|
+
"SubAgentError",
|
|
38
|
+
"ValidationError",
|
|
39
|
+
"create_subagent_completed_event",
|
|
40
|
+
"create_subagent_failed_event",
|
|
41
|
+
"create_subagent_started_event",
|
|
42
|
+
"create_subagent_validated_event",
|
|
43
|
+
"validate_child_result",
|
|
44
|
+
]
|