ouroboros-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ouroboros-ai might be problematic. Click here for more details.

Files changed (81) hide show
  1. ouroboros/__init__.py +15 -0
  2. ouroboros/__main__.py +9 -0
  3. ouroboros/bigbang/__init__.py +39 -0
  4. ouroboros/bigbang/ambiguity.py +464 -0
  5. ouroboros/bigbang/interview.py +530 -0
  6. ouroboros/bigbang/seed_generator.py +610 -0
  7. ouroboros/cli/__init__.py +9 -0
  8. ouroboros/cli/commands/__init__.py +7 -0
  9. ouroboros/cli/commands/config.py +79 -0
  10. ouroboros/cli/commands/init.py +425 -0
  11. ouroboros/cli/commands/run.py +201 -0
  12. ouroboros/cli/commands/status.py +85 -0
  13. ouroboros/cli/formatters/__init__.py +31 -0
  14. ouroboros/cli/formatters/panels.py +157 -0
  15. ouroboros/cli/formatters/progress.py +112 -0
  16. ouroboros/cli/formatters/tables.py +166 -0
  17. ouroboros/cli/main.py +60 -0
  18. ouroboros/config/__init__.py +81 -0
  19. ouroboros/config/loader.py +292 -0
  20. ouroboros/config/models.py +332 -0
  21. ouroboros/core/__init__.py +62 -0
  22. ouroboros/core/ac_tree.py +401 -0
  23. ouroboros/core/context.py +472 -0
  24. ouroboros/core/errors.py +246 -0
  25. ouroboros/core/seed.py +212 -0
  26. ouroboros/core/types.py +205 -0
  27. ouroboros/evaluation/__init__.py +110 -0
  28. ouroboros/evaluation/consensus.py +350 -0
  29. ouroboros/evaluation/mechanical.py +351 -0
  30. ouroboros/evaluation/models.py +235 -0
  31. ouroboros/evaluation/pipeline.py +286 -0
  32. ouroboros/evaluation/semantic.py +302 -0
  33. ouroboros/evaluation/trigger.py +278 -0
  34. ouroboros/events/__init__.py +5 -0
  35. ouroboros/events/base.py +80 -0
  36. ouroboros/events/decomposition.py +153 -0
  37. ouroboros/events/evaluation.py +248 -0
  38. ouroboros/execution/__init__.py +44 -0
  39. ouroboros/execution/atomicity.py +451 -0
  40. ouroboros/execution/decomposition.py +481 -0
  41. ouroboros/execution/double_diamond.py +1386 -0
  42. ouroboros/execution/subagent.py +275 -0
  43. ouroboros/observability/__init__.py +63 -0
  44. ouroboros/observability/drift.py +383 -0
  45. ouroboros/observability/logging.py +504 -0
  46. ouroboros/observability/retrospective.py +338 -0
  47. ouroboros/orchestrator/__init__.py +78 -0
  48. ouroboros/orchestrator/adapter.py +391 -0
  49. ouroboros/orchestrator/events.py +278 -0
  50. ouroboros/orchestrator/runner.py +597 -0
  51. ouroboros/orchestrator/session.py +486 -0
  52. ouroboros/persistence/__init__.py +23 -0
  53. ouroboros/persistence/checkpoint.py +511 -0
  54. ouroboros/persistence/event_store.py +183 -0
  55. ouroboros/persistence/migrations/__init__.py +1 -0
  56. ouroboros/persistence/migrations/runner.py +100 -0
  57. ouroboros/persistence/migrations/scripts/001_initial.sql +20 -0
  58. ouroboros/persistence/schema.py +56 -0
  59. ouroboros/persistence/uow.py +230 -0
  60. ouroboros/providers/__init__.py +28 -0
  61. ouroboros/providers/base.py +133 -0
  62. ouroboros/providers/claude_code_adapter.py +212 -0
  63. ouroboros/providers/litellm_adapter.py +316 -0
  64. ouroboros/py.typed +0 -0
  65. ouroboros/resilience/__init__.py +67 -0
  66. ouroboros/resilience/lateral.py +595 -0
  67. ouroboros/resilience/stagnation.py +727 -0
  68. ouroboros/routing/__init__.py +60 -0
  69. ouroboros/routing/complexity.py +272 -0
  70. ouroboros/routing/downgrade.py +664 -0
  71. ouroboros/routing/escalation.py +340 -0
  72. ouroboros/routing/router.py +204 -0
  73. ouroboros/routing/tiers.py +247 -0
  74. ouroboros/secondary/__init__.py +40 -0
  75. ouroboros/secondary/scheduler.py +467 -0
  76. ouroboros/secondary/todo_registry.py +483 -0
  77. ouroboros_ai-0.1.0.dist-info/METADATA +607 -0
  78. ouroboros_ai-0.1.0.dist-info/RECORD +81 -0
  79. ouroboros_ai-0.1.0.dist-info/WHEEL +4 -0
  80. ouroboros_ai-0.1.0.dist-info/entry_points.txt +2 -0
  81. ouroboros_ai-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,451 @@
1
+ """Atomicity detection for AC decomposition.
2
+
3
+ Determines whether an Acceptance Criterion (AC) is atomic (can be executed
4
+ directly) or non-atomic (needs decomposition into smaller units).
5
+
6
+ An AC is considered atomic if:
7
+ - Complexity score < 0.7
8
+ - Required tools < 3
9
+ - Estimated duration < 300 seconds
10
+
11
+ This module provides both LLM-based analysis (preferred) and heuristic
12
+ fallback (when LLM fails).
13
+
14
+ Usage:
15
+ from ouroboros.execution.atomicity import check_atomicity, AtomicityCriteria
16
+
17
+ result = await check_atomicity(
18
+ ac_content="Implement user login",
19
+ llm_adapter=adapter,
20
+ criteria=AtomicityCriteria(),
21
+ )
22
+
23
+ if result.is_ok:
24
+ if result.value.is_atomic:
25
+ print("AC is atomic - execute directly")
26
+ else:
27
+ print("AC needs decomposition")
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ from dataclasses import dataclass
33
+ import json
34
+ import re
35
+ from typing import TYPE_CHECKING, Any
36
+
37
+ from ouroboros.core.errors import ProviderError, ValidationError
38
+ from ouroboros.core.types import Result
39
+ from ouroboros.observability.logging import get_logger
40
+ from ouroboros.routing.complexity import TaskContext, estimate_complexity
41
+
42
+ if TYPE_CHECKING:
43
+ from ouroboros.providers.litellm_adapter import LiteLLMAdapter
44
+
45
+ log = get_logger(__name__)
46
+
47
+
48
+ # Default thresholds from requirements
49
+ DEFAULT_MAX_COMPLEXITY = 0.7
50
+ DEFAULT_MAX_TOOL_COUNT = 3
51
+ DEFAULT_MAX_DURATION_SECONDS = 300
52
+
53
+
54
+ @dataclass(frozen=True, slots=True)
55
+ class AtomicityCriteria:
56
+ """Configurable thresholds for atomicity detection.
57
+
58
+ Attributes:
59
+ max_complexity: Maximum complexity score for atomic ACs (0.0-1.0).
60
+ max_tool_count: Maximum number of tools for atomic ACs.
61
+ max_duration_seconds: Maximum estimated duration for atomic ACs.
62
+ """
63
+
64
+ max_complexity: float = DEFAULT_MAX_COMPLEXITY
65
+ max_tool_count: int = DEFAULT_MAX_TOOL_COUNT
66
+ max_duration_seconds: int = DEFAULT_MAX_DURATION_SECONDS
67
+
68
+ def validate(self) -> Result[None, ValidationError]:
69
+ """Validate criteria constraints.
70
+
71
+ Returns:
72
+ Result with None on success or ValidationError on failure.
73
+ """
74
+ if not 0.0 <= self.max_complexity <= 1.0:
75
+ return Result.err(
76
+ ValidationError(
77
+ "max_complexity must be between 0.0 and 1.0",
78
+ field="max_complexity",
79
+ value=self.max_complexity,
80
+ )
81
+ )
82
+ if self.max_tool_count < 0:
83
+ return Result.err(
84
+ ValidationError(
85
+ "max_tool_count must be non-negative",
86
+ field="max_tool_count",
87
+ value=self.max_tool_count,
88
+ )
89
+ )
90
+ if self.max_duration_seconds < 0:
91
+ return Result.err(
92
+ ValidationError(
93
+ "max_duration_seconds must be non-negative",
94
+ field="max_duration_seconds",
95
+ value=self.max_duration_seconds,
96
+ )
97
+ )
98
+ return Result.ok(None)
99
+
100
+
101
+ @dataclass(frozen=True, slots=True)
102
+ class AtomicityResult:
103
+ """Result of atomicity check.
104
+
105
+ Attributes:
106
+ is_atomic: Whether the AC is atomic.
107
+ complexity_score: Normalized complexity (0.0-1.0).
108
+ tool_count: Estimated number of tools required.
109
+ estimated_duration: Estimated duration in seconds.
110
+ reasoning: Human-readable explanation of the decision.
111
+ method: Detection method used ("llm" or "heuristic").
112
+ """
113
+
114
+ is_atomic: bool
115
+ complexity_score: float
116
+ tool_count: int
117
+ estimated_duration: int
118
+ reasoning: str
119
+ method: str = "llm"
120
+
121
+ def to_dict(self) -> dict[str, Any]:
122
+ """Convert to dictionary for logging/serialization."""
123
+ return {
124
+ "is_atomic": self.is_atomic,
125
+ "complexity_score": self.complexity_score,
126
+ "tool_count": self.tool_count,
127
+ "estimated_duration": self.estimated_duration,
128
+ "reasoning": self.reasoning,
129
+ "method": self.method,
130
+ }
131
+
132
+
133
+ # LLM prompts for atomicity detection
134
+ ATOMICITY_SYSTEM_PROMPT = """You are an expert at analyzing task complexity and atomicity.
135
+
136
+ An acceptance criterion (AC) is considered ATOMIC if it can be:
137
+ 1. Completed in a single focused session
138
+ 2. Executed with minimal tools (< 3)
139
+ 3. Clearly verified when done
140
+ 4. Estimated at under 300 seconds of execution time
141
+
142
+ Non-atomic ACs typically:
143
+ - Have multiple distinct steps that could be separate tasks
144
+ - Require coordinating several different tools/systems
145
+ - Have complex verification requirements
146
+ - Would benefit from being broken down further
147
+
148
+ Analyze the given AC and determine if it's atomic or needs decomposition."""
149
+
150
+ ATOMICITY_USER_TEMPLATE = """Acceptance Criterion:
151
+ {ac_content}
152
+
153
+ Analyze this AC and respond with a JSON object:
154
+ {{
155
+ "is_atomic": true/false,
156
+ "complexity_score": 0.0 to 1.0 (0 = trivial, 1 = very complex),
157
+ "tool_count": estimated number of tools needed (integer),
158
+ "estimated_duration": estimated seconds to complete (integer),
159
+ "reasoning": "brief explanation of your assessment"
160
+ }}
161
+
162
+ Only respond with the JSON, no other text."""
163
+
164
+
165
+ def _extract_json_from_response(response: str) -> dict[str, Any] | None:
166
+ """Extract JSON from LLM response, handling various formats.
167
+
168
+ Args:
169
+ response: Raw LLM response text.
170
+
171
+ Returns:
172
+ Parsed JSON dict or None if parsing fails.
173
+ """
174
+ # Try direct parsing first
175
+ try:
176
+ result = json.loads(response.strip())
177
+ if isinstance(result, dict):
178
+ return result
179
+ except json.JSONDecodeError:
180
+ pass
181
+
182
+ # Try to find JSON in markdown code blocks
183
+ json_pattern = r"```(?:json)?\s*(.*?)```"
184
+ matches = re.findall(json_pattern, response, re.DOTALL)
185
+ for match in matches:
186
+ try:
187
+ result = json.loads(match.strip())
188
+ if isinstance(result, dict):
189
+ return result
190
+ except json.JSONDecodeError:
191
+ continue
192
+
193
+ # Try to find JSON-like content
194
+ brace_pattern = r"\{[^{}]*\}"
195
+ matches = re.findall(brace_pattern, response, re.DOTALL)
196
+ for match in matches:
197
+ try:
198
+ result = json.loads(match.strip())
199
+ if isinstance(result, dict):
200
+ return result
201
+ except json.JSONDecodeError:
202
+ continue
203
+
204
+ return None
205
+
206
+
207
+ def _heuristic_atomicity_check(
208
+ ac_content: str,
209
+ criteria: AtomicityCriteria,
210
+ ) -> AtomicityResult:
211
+ """Fallback heuristic-based atomicity check.
212
+
213
+ Uses simple text analysis when LLM is unavailable.
214
+
215
+ Args:
216
+ ac_content: The AC text to analyze.
217
+ criteria: Atomicity thresholds.
218
+
219
+ Returns:
220
+ AtomicityResult based on heuristic analysis.
221
+ """
222
+ # Estimate token count (rough: 4 chars per token)
223
+ token_count = len(ac_content) // 4
224
+
225
+ # Estimate tool dependencies from keywords
226
+ tool_keywords = [
227
+ "database",
228
+ "api",
229
+ "file",
230
+ "git",
231
+ "docker",
232
+ "npm",
233
+ "pip",
234
+ "test",
235
+ "deploy",
236
+ "build",
237
+ "migrate",
238
+ "configure",
239
+ "install",
240
+ "http",
241
+ "rest",
242
+ "graphql",
243
+ ]
244
+ tool_count = sum(1 for keyword in tool_keywords if keyword in ac_content.lower())
245
+
246
+ # Estimate complexity based on structure
247
+ complexity_indicators = [
248
+ "and",
249
+ "then",
250
+ "after",
251
+ "before",
252
+ "while",
253
+ "during",
254
+ "multiple",
255
+ "several",
256
+ "various",
257
+ "different",
258
+ ]
259
+ complexity_boost = sum(0.1 for ind in complexity_indicators if ind in ac_content.lower())
260
+
261
+ # Use existing complexity estimation
262
+ task_ctx = TaskContext(
263
+ token_count=token_count,
264
+ tool_dependencies=["tool"] * tool_count,
265
+ ac_depth=0,
266
+ )
267
+ complexity_result = estimate_complexity(task_ctx)
268
+
269
+ base_complexity = complexity_result.value.score if complexity_result.is_ok else 0.5
270
+
271
+ complexity_score = min(1.0, base_complexity + complexity_boost)
272
+
273
+ # Estimate duration (rough: 30 seconds per 100 tokens, adjusted by complexity)
274
+ estimated_duration = int((token_count / 100) * 30 * (1 + complexity_score))
275
+
276
+ # Determine atomicity
277
+ is_atomic = (
278
+ complexity_score < criteria.max_complexity
279
+ and tool_count < criteria.max_tool_count
280
+ and estimated_duration < criteria.max_duration_seconds
281
+ )
282
+
283
+ reasons = []
284
+ if complexity_score >= criteria.max_complexity:
285
+ reasons.append(f"complexity {complexity_score:.2f} >= {criteria.max_complexity}")
286
+ if tool_count >= criteria.max_tool_count:
287
+ reasons.append(f"tools {tool_count} >= {criteria.max_tool_count}")
288
+ if estimated_duration >= criteria.max_duration_seconds:
289
+ reasons.append(f"duration {estimated_duration}s >= {criteria.max_duration_seconds}s")
290
+
291
+ if not reasons:
292
+ reasons.append("within all thresholds")
293
+
294
+ return AtomicityResult(
295
+ is_atomic=is_atomic,
296
+ complexity_score=complexity_score,
297
+ tool_count=tool_count,
298
+ estimated_duration=estimated_duration,
299
+ reasoning=f"[Heuristic] {'; '.join(reasons)}",
300
+ method="heuristic",
301
+ )
302
+
303
+
304
+ async def check_atomicity(
305
+ ac_content: str,
306
+ llm_adapter: LiteLLMAdapter,
307
+ criteria: AtomicityCriteria | None = None,
308
+ *,
309
+ use_llm: bool = True,
310
+ model: str = "openrouter/google/gemini-2.0-flash-001",
311
+ ) -> Result[AtomicityResult, ProviderError | ValidationError]:
312
+ """Check if an AC is atomic using LLM + heuristic fallback.
313
+
314
+ Attempts LLM-based analysis first, falling back to heuristics
315
+ if LLM fails or is disabled.
316
+
317
+ Args:
318
+ ac_content: The acceptance criterion text to analyze.
319
+ llm_adapter: LLM adapter for making completion requests.
320
+ criteria: Atomicity thresholds (uses defaults if None).
321
+ use_llm: Whether to attempt LLM analysis first.
322
+ model: Model to use for LLM analysis.
323
+
324
+ Returns:
325
+ Result containing AtomicityResult or error.
326
+
327
+ Example:
328
+ result = await check_atomicity(
329
+ "Implement user authentication with JWT",
330
+ llm_adapter,
331
+ AtomicityCriteria(max_complexity=0.6),
332
+ )
333
+ if result.is_ok and result.value.is_atomic:
334
+ print("Execute directly")
335
+ """
336
+ if criteria is None:
337
+ criteria = AtomicityCriteria()
338
+
339
+ # Validate criteria
340
+ validation_result = criteria.validate()
341
+ if validation_result.is_err:
342
+ return Result.err(validation_result.error)
343
+
344
+ log.debug(
345
+ "atomicity.check.started",
346
+ ac_length=len(ac_content),
347
+ use_llm=use_llm,
348
+ )
349
+
350
+ # Skip LLM if disabled
351
+ if not use_llm:
352
+ result = _heuristic_atomicity_check(ac_content, criteria)
353
+ log.info(
354
+ "atomicity.check.completed",
355
+ is_atomic=result.is_atomic,
356
+ method="heuristic",
357
+ complexity=result.complexity_score,
358
+ )
359
+ return Result.ok(result)
360
+
361
+ # Try LLM-based analysis
362
+ from ouroboros.providers.base import CompletionConfig, Message, MessageRole
363
+
364
+ messages = [
365
+ Message(role=MessageRole.SYSTEM, content=ATOMICITY_SYSTEM_PROMPT),
366
+ Message(role=MessageRole.USER, content=ATOMICITY_USER_TEMPLATE.format(ac_content=ac_content)),
367
+ ]
368
+
369
+ config = CompletionConfig(
370
+ model=model,
371
+ temperature=0.3, # Lower for consistent analysis
372
+ max_tokens=500,
373
+ )
374
+
375
+ llm_result = await llm_adapter.complete(messages, config)
376
+
377
+ if llm_result.is_err:
378
+ log.warning(
379
+ "atomicity.check.llm_failed",
380
+ error=str(llm_result.error),
381
+ falling_back_to_heuristic=True,
382
+ )
383
+ # Fallback to heuristic
384
+ result = _heuristic_atomicity_check(ac_content, criteria)
385
+ log.info(
386
+ "atomicity.check.completed",
387
+ is_atomic=result.is_atomic,
388
+ method="heuristic_fallback",
389
+ complexity=result.complexity_score,
390
+ )
391
+ return Result.ok(result)
392
+
393
+ # Parse LLM response
394
+ response_text = llm_result.value.content
395
+ parsed = _extract_json_from_response(response_text)
396
+
397
+ if parsed is None:
398
+ log.warning(
399
+ "atomicity.check.parse_failed",
400
+ response_preview=response_text[:200],
401
+ falling_back_to_heuristic=True,
402
+ )
403
+ # Fallback to heuristic
404
+ result = _heuristic_atomicity_check(ac_content, criteria)
405
+ return Result.ok(result)
406
+
407
+ try:
408
+ # Extract values with defaults
409
+ is_atomic_raw = parsed.get("is_atomic", True)
410
+ complexity_score = float(parsed.get("complexity_score", 0.5))
411
+ tool_count = int(parsed.get("tool_count", 1))
412
+ estimated_duration = int(parsed.get("estimated_duration", 60))
413
+ reasoning = str(parsed.get("reasoning", "LLM analysis"))
414
+
415
+ # Apply criteria to determine atomicity
416
+ is_atomic = (
417
+ is_atomic_raw
418
+ and complexity_score < criteria.max_complexity
419
+ and tool_count < criteria.max_tool_count
420
+ and estimated_duration < criteria.max_duration_seconds
421
+ )
422
+
423
+ result = AtomicityResult(
424
+ is_atomic=is_atomic,
425
+ complexity_score=complexity_score,
426
+ tool_count=tool_count,
427
+ estimated_duration=estimated_duration,
428
+ reasoning=reasoning,
429
+ method="llm",
430
+ )
431
+
432
+ log.info(
433
+ "atomicity.check.completed",
434
+ is_atomic=result.is_atomic,
435
+ method="llm",
436
+ complexity=result.complexity_score,
437
+ tool_count=result.tool_count,
438
+ )
439
+
440
+ return Result.ok(result)
441
+
442
+ except (ValueError, TypeError, KeyError) as e:
443
+ log.warning(
444
+ "atomicity.check.parse_error",
445
+ error=str(e),
446
+ parsed=parsed,
447
+ falling_back_to_heuristic=True,
448
+ )
449
+ # Fallback to heuristic
450
+ result = _heuristic_atomicity_check(ac_content, criteria)
451
+ return Result.ok(result)