ouroboros-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ouroboros-ai might be problematic. Click here for more details.

Files changed (81) hide show
  1. ouroboros/__init__.py +15 -0
  2. ouroboros/__main__.py +9 -0
  3. ouroboros/bigbang/__init__.py +39 -0
  4. ouroboros/bigbang/ambiguity.py +464 -0
  5. ouroboros/bigbang/interview.py +530 -0
  6. ouroboros/bigbang/seed_generator.py +610 -0
  7. ouroboros/cli/__init__.py +9 -0
  8. ouroboros/cli/commands/__init__.py +7 -0
  9. ouroboros/cli/commands/config.py +79 -0
  10. ouroboros/cli/commands/init.py +425 -0
  11. ouroboros/cli/commands/run.py +201 -0
  12. ouroboros/cli/commands/status.py +85 -0
  13. ouroboros/cli/formatters/__init__.py +31 -0
  14. ouroboros/cli/formatters/panels.py +157 -0
  15. ouroboros/cli/formatters/progress.py +112 -0
  16. ouroboros/cli/formatters/tables.py +166 -0
  17. ouroboros/cli/main.py +60 -0
  18. ouroboros/config/__init__.py +81 -0
  19. ouroboros/config/loader.py +292 -0
  20. ouroboros/config/models.py +332 -0
  21. ouroboros/core/__init__.py +62 -0
  22. ouroboros/core/ac_tree.py +401 -0
  23. ouroboros/core/context.py +472 -0
  24. ouroboros/core/errors.py +246 -0
  25. ouroboros/core/seed.py +212 -0
  26. ouroboros/core/types.py +205 -0
  27. ouroboros/evaluation/__init__.py +110 -0
  28. ouroboros/evaluation/consensus.py +350 -0
  29. ouroboros/evaluation/mechanical.py +351 -0
  30. ouroboros/evaluation/models.py +235 -0
  31. ouroboros/evaluation/pipeline.py +286 -0
  32. ouroboros/evaluation/semantic.py +302 -0
  33. ouroboros/evaluation/trigger.py +278 -0
  34. ouroboros/events/__init__.py +5 -0
  35. ouroboros/events/base.py +80 -0
  36. ouroboros/events/decomposition.py +153 -0
  37. ouroboros/events/evaluation.py +248 -0
  38. ouroboros/execution/__init__.py +44 -0
  39. ouroboros/execution/atomicity.py +451 -0
  40. ouroboros/execution/decomposition.py +481 -0
  41. ouroboros/execution/double_diamond.py +1386 -0
  42. ouroboros/execution/subagent.py +275 -0
  43. ouroboros/observability/__init__.py +63 -0
  44. ouroboros/observability/drift.py +383 -0
  45. ouroboros/observability/logging.py +504 -0
  46. ouroboros/observability/retrospective.py +338 -0
  47. ouroboros/orchestrator/__init__.py +78 -0
  48. ouroboros/orchestrator/adapter.py +391 -0
  49. ouroboros/orchestrator/events.py +278 -0
  50. ouroboros/orchestrator/runner.py +597 -0
  51. ouroboros/orchestrator/session.py +486 -0
  52. ouroboros/persistence/__init__.py +23 -0
  53. ouroboros/persistence/checkpoint.py +511 -0
  54. ouroboros/persistence/event_store.py +183 -0
  55. ouroboros/persistence/migrations/__init__.py +1 -0
  56. ouroboros/persistence/migrations/runner.py +100 -0
  57. ouroboros/persistence/migrations/scripts/001_initial.sql +20 -0
  58. ouroboros/persistence/schema.py +56 -0
  59. ouroboros/persistence/uow.py +230 -0
  60. ouroboros/providers/__init__.py +28 -0
  61. ouroboros/providers/base.py +133 -0
  62. ouroboros/providers/claude_code_adapter.py +212 -0
  63. ouroboros/providers/litellm_adapter.py +316 -0
  64. ouroboros/py.typed +0 -0
  65. ouroboros/resilience/__init__.py +67 -0
  66. ouroboros/resilience/lateral.py +595 -0
  67. ouroboros/resilience/stagnation.py +727 -0
  68. ouroboros/routing/__init__.py +60 -0
  69. ouroboros/routing/complexity.py +272 -0
  70. ouroboros/routing/downgrade.py +664 -0
  71. ouroboros/routing/escalation.py +340 -0
  72. ouroboros/routing/router.py +204 -0
  73. ouroboros/routing/tiers.py +247 -0
  74. ouroboros/secondary/__init__.py +40 -0
  75. ouroboros/secondary/scheduler.py +467 -0
  76. ouroboros/secondary/todo_registry.py +483 -0
  77. ouroboros_ai-0.1.0.dist-info/METADATA +607 -0
  78. ouroboros_ai-0.1.0.dist-info/RECORD +81 -0
  79. ouroboros_ai-0.1.0.dist-info/WHEEL +4 -0
  80. ouroboros_ai-0.1.0.dist-info/entry_points.txt +2 -0
  81. ouroboros_ai-0.1.0.dist-info/licenses/LICENSE +21 -0
ouroboros/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ """Ouroboros - Self-Improving AI Workflow System."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ __all__ = ["__version__", "main"]
6
+
7
+
8
+ def main() -> None:
9
+ """Main entry point for the Ouroboros CLI.
10
+
11
+ This function invokes the Typer app from ouroboros.cli.main.
12
+ """
13
+ from ouroboros.cli.main import app
14
+
15
+ app()
ouroboros/__main__.py ADDED
@@ -0,0 +1,9 @@
1
+ """Ouroboros CLI entry point.
2
+
3
+ This module serves as the main entry point for the Ouroboros CLI.
4
+ """
5
+
6
+ from ouroboros import main
7
+
8
+ if __name__ == "__main__":
9
+ main()
@@ -0,0 +1,39 @@
1
+ """Big Bang phase - Interactive interview for requirement clarification.
2
+
3
+ This package implements Phase 0: Big Bang, which transforms vague user ideas
4
+ into clear, executable requirements through an interactive interview process.
5
+ """
6
+
7
+ from ouroboros.bigbang.ambiguity import (
8
+ AMBIGUITY_THRESHOLD,
9
+ AmbiguityScore,
10
+ AmbiguityScorer,
11
+ ComponentScore,
12
+ ScoreBreakdown,
13
+ format_score_display,
14
+ is_ready_for_seed,
15
+ )
16
+ from ouroboros.bigbang.interview import InterviewEngine, InterviewState
17
+ from ouroboros.bigbang.seed_generator import (
18
+ SeedGenerator,
19
+ load_seed,
20
+ save_seed_sync,
21
+ )
22
+
23
+ __all__ = [
24
+ # Ambiguity
25
+ "AMBIGUITY_THRESHOLD",
26
+ "AmbiguityScore",
27
+ "AmbiguityScorer",
28
+ "ComponentScore",
29
+ "ScoreBreakdown",
30
+ "format_score_display",
31
+ "is_ready_for_seed",
32
+ # Interview
33
+ "InterviewEngine",
34
+ "InterviewState",
35
+ # Seed Generation
36
+ "SeedGenerator",
37
+ "load_seed",
38
+ "save_seed_sync",
39
+ ]
@@ -0,0 +1,464 @@
1
+ """Ambiguity scoring module for requirement clarity assessment.
2
+
3
+ This module implements ambiguity measurement for interview states, determining
4
+ when requirements are clear enough (score <= 0.2) to proceed with Seed generation.
5
+
6
+ The scoring algorithm evaluates three key components:
7
+ - Goal Clarity (40%): How well the goal statement is defined
8
+ - Constraint Clarity (30%): How clearly constraints are specified
9
+ - Success Criteria Clarity (30%): How measurable the success criteria are
10
+ """
11
+
12
+ from dataclasses import dataclass
13
+ from typing import Any
14
+
15
+ from pydantic import BaseModel, Field
16
+ import structlog
17
+
18
+ from ouroboros.bigbang.interview import InterviewState
19
+ from ouroboros.core.errors import ProviderError
20
+ from ouroboros.core.types import Result
21
+ from ouroboros.providers.base import CompletionConfig, Message, MessageRole
22
+ from ouroboros.providers.litellm_adapter import LiteLLMAdapter
23
+
24
+ log = structlog.get_logger()
25
+
26
+ # Threshold for allowing Seed generation (NFR6)
27
+ AMBIGUITY_THRESHOLD = 0.2
28
+
29
+ # Weights for score components
30
+ GOAL_CLARITY_WEIGHT = 0.40
31
+ CONSTRAINT_CLARITY_WEIGHT = 0.30
32
+ SUCCESS_CRITERIA_CLARITY_WEIGHT = 0.30
33
+
34
+ DEFAULT_MODEL = "openrouter/google/gemini-2.0-flash-001"
35
+
36
+ # Temperature for reproducible scoring
37
+ SCORING_TEMPERATURE = 0.1
38
+
39
+
40
+ class ComponentScore(BaseModel):
41
+ """Individual component score with justification.
42
+
43
+ Attributes:
44
+ name: Name of the component being scored.
45
+ clarity_score: Clarity score between 0.0 (unclear) and 1.0 (perfectly clear).
46
+ weight: Weight of this component in the overall score.
47
+ justification: Explanation of why this score was given.
48
+ """
49
+
50
+ name: str
51
+ clarity_score: float = Field(ge=0.0, le=1.0)
52
+ weight: float = Field(ge=0.0, le=1.0)
53
+ justification: str
54
+
55
+
56
+ class ScoreBreakdown(BaseModel):
57
+ """Detailed breakdown of ambiguity score with justifications.
58
+
59
+ Attributes:
60
+ goal_clarity: Score for goal statement clarity.
61
+ constraint_clarity: Score for constraint specification clarity.
62
+ success_criteria_clarity: Score for success criteria measurability.
63
+ """
64
+
65
+ goal_clarity: ComponentScore
66
+ constraint_clarity: ComponentScore
67
+ success_criteria_clarity: ComponentScore
68
+
69
+ @property
70
+ def components(self) -> list[ComponentScore]:
71
+ """Return all component scores as a list."""
72
+ return [
73
+ self.goal_clarity,
74
+ self.constraint_clarity,
75
+ self.success_criteria_clarity,
76
+ ]
77
+
78
+
79
+ @dataclass(frozen=True, slots=True)
80
+ class AmbiguityScore:
81
+ """Result of ambiguity scoring for an interview state.
82
+
83
+ Attributes:
84
+ overall_score: Normalized ambiguity score (0.0 = clear, 1.0 = ambiguous).
85
+ breakdown: Detailed breakdown of component scores.
86
+ is_ready_for_seed: Whether score allows Seed generation (score <= 0.2).
87
+ """
88
+
89
+ overall_score: float
90
+ breakdown: ScoreBreakdown
91
+
92
+ @property
93
+ def is_ready_for_seed(self) -> bool:
94
+ """Check if ambiguity score allows Seed generation.
95
+
96
+ Returns:
97
+ True if overall_score <= AMBIGUITY_THRESHOLD (0.2).
98
+ """
99
+ return self.overall_score <= AMBIGUITY_THRESHOLD
100
+
101
+
102
+ @dataclass
103
+ class AmbiguityScorer:
104
+ """Scorer for calculating ambiguity of interview requirements.
105
+
106
+ Uses LLM to evaluate clarity of goals, constraints, and success criteria
107
+ from interview conversation, producing reproducible scores.
108
+
109
+ Example:
110
+ scorer = AmbiguityScorer(llm_adapter=LiteLLMAdapter())
111
+
112
+ result = await scorer.score(interview_state)
113
+ if result.is_ok:
114
+ ambiguity = result.value
115
+ if ambiguity.is_ready_for_seed:
116
+ # Proceed with Seed generation
117
+ ...
118
+ else:
119
+ # Generate additional questions
120
+ questions = scorer.generate_clarification_questions(ambiguity.breakdown)
121
+ """
122
+
123
+ llm_adapter: LiteLLMAdapter
124
+ model: str = DEFAULT_MODEL
125
+ temperature: float = SCORING_TEMPERATURE
126
+ max_tokens: int = 2048
127
+
128
+ async def score(
129
+ self, state: InterviewState
130
+ ) -> Result[AmbiguityScore, ProviderError]:
131
+ """Calculate ambiguity score for interview state.
132
+
133
+ Evaluates the interview conversation to determine clarity of:
134
+ - Goal statement (40% weight)
135
+ - Constraints (30% weight)
136
+ - Success criteria (30% weight)
137
+
138
+ Args:
139
+ state: The interview state to score.
140
+
141
+ Returns:
142
+ Result containing AmbiguityScore or ProviderError.
143
+ """
144
+ log.debug(
145
+ "ambiguity.scoring.started",
146
+ interview_id=state.interview_id,
147
+ rounds=len(state.rounds),
148
+ )
149
+
150
+ # Build the context from interview
151
+ context = self._build_interview_context(state)
152
+
153
+ # Create scoring prompt
154
+ system_prompt = self._build_scoring_system_prompt()
155
+ user_prompt = self._build_scoring_user_prompt(context)
156
+
157
+ messages = [
158
+ Message(role=MessageRole.SYSTEM, content=system_prompt),
159
+ Message(role=MessageRole.USER, content=user_prompt),
160
+ ]
161
+
162
+ config = CompletionConfig(
163
+ model=self.model,
164
+ temperature=self.temperature,
165
+ max_tokens=self.max_tokens,
166
+ )
167
+
168
+ result = await self.llm_adapter.complete(messages, config)
169
+
170
+ if result.is_err:
171
+ log.warning(
172
+ "ambiguity.scoring.failed",
173
+ interview_id=state.interview_id,
174
+ error=str(result.error),
175
+ )
176
+ return Result.err(result.error)
177
+
178
+ # Parse the LLM response into scores
179
+ try:
180
+ breakdown = self._parse_scoring_response(result.value.content)
181
+ overall_score = self._calculate_overall_score(breakdown)
182
+
183
+ ambiguity_score = AmbiguityScore(
184
+ overall_score=overall_score,
185
+ breakdown=breakdown,
186
+ )
187
+
188
+ log.info(
189
+ "ambiguity.scoring.completed",
190
+ interview_id=state.interview_id,
191
+ overall_score=overall_score,
192
+ is_ready_for_seed=ambiguity_score.is_ready_for_seed,
193
+ goal_clarity=breakdown.goal_clarity.clarity_score,
194
+ constraint_clarity=breakdown.constraint_clarity.clarity_score,
195
+ success_criteria_clarity=breakdown.success_criteria_clarity.clarity_score,
196
+ )
197
+
198
+ return Result.ok(ambiguity_score)
199
+
200
+ except (ValueError, KeyError) as e:
201
+ log.warning(
202
+ "ambiguity.scoring.parse_failed",
203
+ interview_id=state.interview_id,
204
+ error=str(e),
205
+ response=result.value.content[:500],
206
+ )
207
+ return Result.err(
208
+ ProviderError(
209
+ f"Failed to parse scoring response: {e}",
210
+ details={"response_preview": result.value.content[:200]},
211
+ )
212
+ )
213
+
214
+ def _build_interview_context(self, state: InterviewState) -> str:
215
+ """Build context string from interview state.
216
+
217
+ Args:
218
+ state: The interview state.
219
+
220
+ Returns:
221
+ Formatted context string.
222
+ """
223
+ parts = [f"Initial Context: {state.initial_context}"]
224
+
225
+ for round_data in state.rounds:
226
+ parts.append(f"\nQ: {round_data.question}")
227
+ if round_data.user_response:
228
+ parts.append(f"A: {round_data.user_response}")
229
+
230
+ return "\n".join(parts)
231
+
232
+ def _build_scoring_system_prompt(self) -> str:
233
+ """Build system prompt for scoring.
234
+
235
+ Returns:
236
+ System prompt string.
237
+ """
238
+ return """You are an expert requirements analyst evaluating the clarity of software requirements.
239
+
240
+ Your task is to assess how clear and unambiguous the requirements are based on an interview conversation.
241
+
242
+ Evaluate three components:
243
+ 1. Goal Clarity (40% weight): Is the goal statement specific and well-defined?
244
+ - Clear: "Build a CLI tool for task management with project grouping"
245
+ - Unclear: "Build something useful for productivity"
246
+
247
+ 2. Constraint Clarity (30% weight): Are constraints and limitations specified?
248
+ - Clear: "Must use Python 3.14+, no external database dependencies"
249
+ - Unclear: No mention of technical constraints or limitations
250
+
251
+ 3. Success Criteria Clarity (30% weight): Are success criteria measurable?
252
+ - Clear: "Tasks can be created, edited, deleted; supports filtering by status"
253
+ - Unclear: "The tool should be easy to use"
254
+
255
+ For each component, provide:
256
+ - A clarity score between 0.0 (completely unclear) and 1.0 (perfectly clear)
257
+ - A brief justification explaining the score
258
+
259
+ Respond in this exact format:
260
+ GOAL_CLARITY_SCORE: <score>
261
+ GOAL_CLARITY_JUSTIFICATION: <justification>
262
+ CONSTRAINT_CLARITY_SCORE: <score>
263
+ CONSTRAINT_CLARITY_JUSTIFICATION: <justification>
264
+ SUCCESS_CRITERIA_CLARITY_SCORE: <score>
265
+ SUCCESS_CRITERIA_CLARITY_JUSTIFICATION: <justification>
266
+
267
+ Be strict in your evaluation. Scores above 0.8 require very specific, measurable requirements."""
268
+
269
+ def _build_scoring_user_prompt(self, context: str) -> str:
270
+ """Build user prompt with interview context.
271
+
272
+ Args:
273
+ context: Formatted interview context.
274
+
275
+ Returns:
276
+ User prompt string.
277
+ """
278
+ return f"""Please evaluate the clarity of the following requirements conversation:
279
+
280
+ ---
281
+ {context}
282
+ ---
283
+
284
+ Analyze each component and provide scores with justifications."""
285
+
286
+ def _parse_scoring_response(self, response: str) -> ScoreBreakdown:
287
+ """Parse LLM response into ScoreBreakdown.
288
+
289
+ Args:
290
+ response: Raw LLM response text.
291
+
292
+ Returns:
293
+ Parsed ScoreBreakdown.
294
+
295
+ Raises:
296
+ ValueError: If response cannot be parsed.
297
+ """
298
+ lines = response.strip().split("\n")
299
+ scores: dict[str, Any] = {}
300
+
301
+ for line in lines:
302
+ line = line.strip()
303
+ if not line:
304
+ continue
305
+
306
+ for prefix in [
307
+ "GOAL_CLARITY_SCORE:",
308
+ "GOAL_CLARITY_JUSTIFICATION:",
309
+ "CONSTRAINT_CLARITY_SCORE:",
310
+ "CONSTRAINT_CLARITY_JUSTIFICATION:",
311
+ "SUCCESS_CRITERIA_CLARITY_SCORE:",
312
+ "SUCCESS_CRITERIA_CLARITY_JUSTIFICATION:",
313
+ ]:
314
+ if line.startswith(prefix):
315
+ key = prefix[:-1].lower() # Remove colon and lowercase
316
+ value = line[len(prefix) :].strip()
317
+ scores[key] = value
318
+ break
319
+
320
+ # Validate all required fields are present
321
+ required_fields = [
322
+ "goal_clarity_score",
323
+ "goal_clarity_justification",
324
+ "constraint_clarity_score",
325
+ "constraint_clarity_justification",
326
+ "success_criteria_clarity_score",
327
+ "success_criteria_clarity_justification",
328
+ ]
329
+
330
+ for field_name in required_fields:
331
+ if field_name not in scores:
332
+ raise ValueError(f"Missing required field: {field_name}")
333
+
334
+ # Parse scores to float
335
+ def parse_score(value: str) -> float:
336
+ try:
337
+ score = float(value)
338
+ return max(0.0, min(1.0, score)) # Clamp to [0, 1]
339
+ except ValueError as e:
340
+ raise ValueError(f"Invalid score value: {value}") from e
341
+
342
+ return ScoreBreakdown(
343
+ goal_clarity=ComponentScore(
344
+ name="Goal Clarity",
345
+ clarity_score=parse_score(scores["goal_clarity_score"]),
346
+ weight=GOAL_CLARITY_WEIGHT,
347
+ justification=scores["goal_clarity_justification"],
348
+ ),
349
+ constraint_clarity=ComponentScore(
350
+ name="Constraint Clarity",
351
+ clarity_score=parse_score(scores["constraint_clarity_score"]),
352
+ weight=CONSTRAINT_CLARITY_WEIGHT,
353
+ justification=scores["constraint_clarity_justification"],
354
+ ),
355
+ success_criteria_clarity=ComponentScore(
356
+ name="Success Criteria Clarity",
357
+ clarity_score=parse_score(scores["success_criteria_clarity_score"]),
358
+ weight=SUCCESS_CRITERIA_CLARITY_WEIGHT,
359
+ justification=scores["success_criteria_clarity_justification"],
360
+ ),
361
+ )
362
+
363
+ def _calculate_overall_score(self, breakdown: ScoreBreakdown) -> float:
364
+ """Calculate overall ambiguity score from component clarity scores.
365
+
366
+ Ambiguity = 1 - (weighted average of clarity scores)
367
+
368
+ Args:
369
+ breakdown: Score breakdown with component clarity scores.
370
+
371
+ Returns:
372
+ Overall ambiguity score between 0.0 and 1.0.
373
+ """
374
+ weighted_clarity = sum(
375
+ component.clarity_score * component.weight
376
+ for component in breakdown.components
377
+ )
378
+
379
+ # Ambiguity = 1 - clarity
380
+ return round(1.0 - weighted_clarity, 4)
381
+
382
+ def generate_clarification_questions(
383
+ self, breakdown: ScoreBreakdown
384
+ ) -> list[str]:
385
+ """Generate clarification questions based on score breakdown.
386
+
387
+ Identifies which components need clarification and suggests questions.
388
+
389
+ Args:
390
+ breakdown: Score breakdown with component scores.
391
+
392
+ Returns:
393
+ List of clarification questions for low-scoring components.
394
+ """
395
+ questions: list[str] = []
396
+
397
+ # Threshold for "needs clarification"
398
+ clarification_threshold = 0.8
399
+
400
+ if breakdown.goal_clarity.clarity_score < clarification_threshold:
401
+ questions.append(
402
+ "Can you describe the specific problem this solution should solve?"
403
+ )
404
+ questions.append(
405
+ "What is the primary deliverable or output you expect?"
406
+ )
407
+
408
+ if breakdown.constraint_clarity.clarity_score < clarification_threshold:
409
+ questions.append(
410
+ "Are there any technical constraints or limitations to consider?"
411
+ )
412
+ questions.append(
413
+ "What should definitely be excluded from the scope?"
414
+ )
415
+
416
+ if breakdown.success_criteria_clarity.clarity_score < clarification_threshold:
417
+ questions.append(
418
+ "How will you know when this is successfully completed?"
419
+ )
420
+ questions.append(
421
+ "What specific features or behaviors are essential?"
422
+ )
423
+
424
+ return questions
425
+
426
+
427
+ def is_ready_for_seed(score: AmbiguityScore) -> bool:
428
+ """Helper function to check if score allows Seed generation.
429
+
430
+ Args:
431
+ score: The ambiguity score to check.
432
+
433
+ Returns:
434
+ True if score <= AMBIGUITY_THRESHOLD (0.2), allowing Seed generation.
435
+ """
436
+ return score.is_ready_for_seed
437
+
438
+
439
+ def format_score_display(score: AmbiguityScore) -> str:
440
+ """Format ambiguity score for display after interview round.
441
+
442
+ Args:
443
+ score: The ambiguity score to format.
444
+
445
+ Returns:
446
+ Formatted string for display.
447
+ """
448
+ lines = [
449
+ f"Ambiguity Score: {score.overall_score:.2f}",
450
+ f"Ready for Seed: {'Yes' if score.is_ready_for_seed else 'No'}",
451
+ "",
452
+ "Component Breakdown:",
453
+ ]
454
+
455
+ for component in score.breakdown.components:
456
+ clarity_percent = component.clarity_score * 100
457
+ weight_percent = component.weight * 100
458
+ lines.append(
459
+ f" {component.name} (weight: {weight_percent:.0f}%): "
460
+ f"{clarity_percent:.0f}% clear"
461
+ )
462
+ lines.append(f" Justification: {component.justification}")
463
+
464
+ return "\n".join(lines)