ouroboros-ai 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ouroboros-ai might be problematic. Click here for more details.

@@ -9,6 +9,8 @@ The scoring algorithm evaluates three key components:
9
9
  - Success Criteria Clarity (30%): How measurable the success criteria are
10
10
  """
11
11
 
12
+ import json
13
+ import re
12
14
  from dataclasses import dataclass
13
15
  from typing import Any
14
16
 
@@ -110,15 +112,15 @@ class AmbiguityScorer:
110
112
  from interview conversation, producing reproducible scores.
111
113
 
112
114
  Uses adaptive token allocation: starts with `initial_max_tokens` and
113
- doubles on truncation up to `MAX_TOKEN_LIMIT`. Retries up to `max_retries`
114
- times on both provider errors and parse failures.
115
+ doubles on truncation up to `MAX_TOKEN_LIMIT`. Retries until success
116
+ by default (unlimited), or up to `max_retries` if specified.
115
117
 
116
118
  Attributes:
117
119
  llm_adapter: The LLM adapter for completions.
118
120
  model: Model identifier to use.
119
121
  temperature: Temperature for reproducibility (default 0.1).
120
122
  initial_max_tokens: Starting token limit (default 2048).
121
- max_retries: Maximum retry attempts (default 3).
123
+ max_retries: Maximum retry attempts, or None for unlimited (default).
122
124
 
123
125
  Example:
124
126
  scorer = AmbiguityScorer(llm_adapter=LiteLLMAdapter())
@@ -138,7 +140,8 @@ class AmbiguityScorer:
138
140
  model: str = DEFAULT_MODEL
139
141
  temperature: float = SCORING_TEMPERATURE
140
142
  initial_max_tokens: int = 2048
141
- max_retries: int = 3
143
+ max_retries: int | None = None # None = unlimited retries
144
+ max_format_error_retries: int = 5 # Stop after N format errors (non-truncation)
142
145
 
143
146
  async def score(
144
147
  self, state: InterviewState
@@ -180,8 +183,15 @@ class AmbiguityScorer:
180
183
  current_max_tokens = self.initial_max_tokens
181
184
  last_error: Exception | ProviderError | None = None
182
185
  last_response: str = ""
186
+ attempt = 0
187
+
188
+ while True:
189
+ # Check retry limit if set
190
+ if self.max_retries is not None and attempt >= self.max_retries:
191
+ break
192
+
193
+ attempt += 1
183
194
 
184
- for attempt in range(self.max_retries):
185
195
  config = CompletionConfig(
186
196
  model=self.model,
187
197
  temperature=self.temperature,
@@ -190,15 +200,15 @@ class AmbiguityScorer:
190
200
 
191
201
  result = await self.llm_adapter.complete(messages, config)
192
202
 
193
- # Fix #3: Retry on provider errors (rate limits, transient failures)
203
+ # Retry on provider errors (rate limits, transient failures)
194
204
  if result.is_err:
195
205
  last_error = result.error
196
206
  log.warning(
197
207
  "ambiguity.scoring.provider_error_retrying",
198
208
  interview_id=state.interview_id,
199
209
  error=str(result.error),
200
- attempt=attempt + 1,
201
- max_retries=self.max_retries,
210
+ attempt=attempt,
211
+ max_retries=self.max_retries or "unlimited",
202
212
  )
203
213
  continue
204
214
 
@@ -221,7 +231,7 @@ class AmbiguityScorer:
221
231
  constraint_clarity=breakdown.constraint_clarity.clarity_score,
222
232
  success_criteria_clarity=breakdown.success_criteria_clarity.clarity_score,
223
233
  tokens_used=current_max_tokens,
224
- attempt=attempt + 1,
234
+ attempt=attempt,
225
235
  )
226
236
 
227
237
  return Result.ok(ambiguity_score)
@@ -230,11 +240,11 @@ class AmbiguityScorer:
230
240
  last_error = e
231
241
  last_response = result.value.content
232
242
 
233
- # Fix #2: Only increase tokens if response was truncated
243
+ # Only increase tokens if response was truncated
234
244
  is_truncated = result.value.finish_reason == "length"
235
245
 
236
246
  if is_truncated:
237
- # Double tokens on truncation (no upper limit)
247
+ # Double tokens on truncation, capped at MAX_TOKEN_LIMIT if set
238
248
  next_tokens = current_max_tokens * 2
239
249
  if MAX_TOKEN_LIMIT is not None:
240
250
  next_tokens = min(next_tokens, MAX_TOKEN_LIMIT)
@@ -242,7 +252,7 @@ class AmbiguityScorer:
242
252
  "ambiguity.scoring.truncated_retrying",
243
253
  interview_id=state.interview_id,
244
254
  error=str(e),
245
- attempt=attempt + 1,
255
+ attempt=attempt,
246
256
  current_tokens=current_max_tokens,
247
257
  next_tokens=next_tokens,
248
258
  )
@@ -253,11 +263,11 @@ class AmbiguityScorer:
253
263
  "ambiguity.scoring.format_error_retrying",
254
264
  interview_id=state.interview_id,
255
265
  error=str(e),
256
- attempt=attempt + 1,
266
+ attempt=attempt,
257
267
  finish_reason=result.value.finish_reason,
258
268
  )
259
269
 
260
- # All retries exhausted
270
+ # All retries exhausted (only reached if max_retries is set)
261
271
  log.warning(
262
272
  "ambiguity.scoring.failed",
263
273
  interview_id=state.interview_id,
@@ -296,38 +306,19 @@ class AmbiguityScorer:
296
306
  Returns:
297
307
  System prompt string.
298
308
  """
299
- return """You are an expert requirements analyst evaluating the clarity of software requirements.
300
-
301
- Your task is to assess how clear and unambiguous the requirements are based on an interview conversation.
309
+ return """You are an expert requirements analyst. Evaluate the clarity of software requirements.
302
310
 
303
311
  Evaluate three components:
304
- 1. Goal Clarity (40% weight): Is the goal statement specific and well-defined?
305
- - Clear: "Build a CLI tool for task management with project grouping"
306
- - Unclear: "Build something useful for productivity"
307
-
308
- 2. Constraint Clarity (30% weight): Are constraints and limitations specified?
309
- - Clear: "Must use Python 3.14+, no external database dependencies"
310
- - Unclear: No mention of technical constraints or limitations
311
-
312
- 3. Success Criteria Clarity (30% weight): Are success criteria measurable?
313
- - Clear: "Tasks can be created, edited, deleted; supports filtering by status"
314
- - Unclear: "The tool should be easy to use"
312
+ 1. Goal Clarity (40%): Is the goal specific and well-defined?
313
+ 2. Constraint Clarity (30%): Are constraints and limitations specified?
314
+ 3. Success Criteria Clarity (30%): Are success criteria measurable?
315
315
 
316
- For each component, provide:
317
- - A clarity score between 0.0 (completely unclear) and 1.0 (perfectly clear)
318
- - A brief justification (1-2 sentences max) explaining the score
316
+ Score each from 0.0 (unclear) to 1.0 (perfectly clear). Scores above 0.8 require very specific requirements.
319
317
 
320
- IMPORTANT: You MUST provide ALL six fields below. Keep justifications concise.
318
+ RESPOND ONLY WITH VALID JSON. No other text before or after.
321
319
 
322
- Respond in this exact format:
323
- GOAL_CLARITY_SCORE: <score>
324
- GOAL_CLARITY_JUSTIFICATION: <justification in 1-2 sentences>
325
- CONSTRAINT_CLARITY_SCORE: <score>
326
- CONSTRAINT_CLARITY_JUSTIFICATION: <justification in 1-2 sentences>
327
- SUCCESS_CRITERIA_CLARITY_SCORE: <score>
328
- SUCCESS_CRITERIA_CLARITY_JUSTIFICATION: <justification in 1-2 sentences>
329
-
330
- Be strict in your evaluation. Scores above 0.8 require very specific, measurable requirements."""
320
+ Required JSON format:
321
+ {"goal_clarity_score": 0.0, "goal_clarity_justification": "string", "constraint_clarity_score": 0.0, "constraint_clarity_justification": "string", "success_criteria_clarity_score": 0.0, "success_criteria_clarity_justification": "string"}"""
331
322
 
332
323
  def _build_scoring_user_prompt(self, context: str) -> str:
333
324
  """Build user prompt with interview context.
@@ -358,27 +349,23 @@ Analyze each component and provide scores with justifications."""
358
349
  Raises:
359
350
  ValueError: If response cannot be parsed.
360
351
  """
361
- lines = response.strip().split("\n")
362
- scores: dict[str, Any] = {}
363
-
364
- for line in lines:
365
- line = line.strip()
366
- if not line:
367
- continue
368
-
369
- for prefix in [
370
- "GOAL_CLARITY_SCORE:",
371
- "GOAL_CLARITY_JUSTIFICATION:",
372
- "CONSTRAINT_CLARITY_SCORE:",
373
- "CONSTRAINT_CLARITY_JUSTIFICATION:",
374
- "SUCCESS_CRITERIA_CLARITY_SCORE:",
375
- "SUCCESS_CRITERIA_CLARITY_JUSTIFICATION:",
376
- ]:
377
- if line.startswith(prefix):
378
- key = prefix[:-1].lower() # Remove colon and lowercase
379
- value = line[len(prefix) :].strip()
380
- scores[key] = value
381
- break
352
+ # Extract JSON from response (handle markdown code blocks)
353
+ text = response.strip()
354
+
355
+ # Try to find JSON in markdown code block
356
+ json_match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
357
+ if json_match:
358
+ text = json_match.group(1)
359
+ else:
360
+ # Try to find raw JSON object
361
+ json_match = re.search(r"\{.*\}", text, re.DOTALL)
362
+ if json_match:
363
+ text = json_match.group(0)
364
+
365
+ try:
366
+ data = json.loads(text)
367
+ except json.JSONDecodeError as e:
368
+ raise ValueError(f"Invalid JSON response: {e}") from e
382
369
 
383
370
  # Validate all required fields are present
384
371
  required_fields = [
@@ -391,35 +378,32 @@ Analyze each component and provide scores with justifications."""
391
378
  ]
392
379
 
393
380
  for field_name in required_fields:
394
- if field_name not in scores:
381
+ if field_name not in data:
395
382
  raise ValueError(f"Missing required field: {field_name}")
396
383
 
397
- # Parse scores to float
398
- def parse_score(value: str) -> float:
399
- try:
400
- score = float(value)
401
- return max(0.0, min(1.0, score)) # Clamp to [0, 1]
402
- except ValueError as e:
403
- raise ValueError(f"Invalid score value: {value}") from e
384
+ # Parse and clamp scores
385
+ def clamp_score(value: Any) -> float:
386
+ score = float(value)
387
+ return max(0.0, min(1.0, score))
404
388
 
405
389
  return ScoreBreakdown(
406
390
  goal_clarity=ComponentScore(
407
391
  name="Goal Clarity",
408
- clarity_score=parse_score(scores["goal_clarity_score"]),
392
+ clarity_score=clamp_score(data["goal_clarity_score"]),
409
393
  weight=GOAL_CLARITY_WEIGHT,
410
- justification=scores["goal_clarity_justification"],
394
+ justification=str(data["goal_clarity_justification"]),
411
395
  ),
412
396
  constraint_clarity=ComponentScore(
413
397
  name="Constraint Clarity",
414
- clarity_score=parse_score(scores["constraint_clarity_score"]),
398
+ clarity_score=clamp_score(data["constraint_clarity_score"]),
415
399
  weight=CONSTRAINT_CLARITY_WEIGHT,
416
- justification=scores["constraint_clarity_justification"],
400
+ justification=str(data["constraint_clarity_justification"]),
417
401
  ),
418
402
  success_criteria_clarity=ComponentScore(
419
403
  name="Success Criteria Clarity",
420
- clarity_score=parse_score(scores["success_criteria_clarity_score"]),
404
+ clarity_score=clamp_score(data["success_criteria_clarity_score"]),
421
405
  weight=SUCCESS_CRITERIA_CLARITY_WEIGHT,
422
- justification=scores["success_criteria_clarity_justification"],
406
+ justification=str(data["success_criteria_clarity_justification"]),
423
407
  ),
424
408
  )
425
409
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ouroboros-ai
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Self-Improving AI Workflow System
5
5
  Author-email: Q00 <jqyu.lee@gmail.com>
6
6
  License-File: LICENSE
@@ -2,7 +2,7 @@ ouroboros/__init__.py,sha256=lmQgHmNOWxGlmwayNvp1ckCuJycL8WzX5Y-7IzrFaVM,701
2
2
  ouroboros/__main__.py,sha256=f_qnL0zPJwh9kfQqynX5adpqzj8ilj94zW5Q2loqGxE,168
3
3
  ouroboros/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  ouroboros/bigbang/__init__.py,sha256=9xGqOYwMKBifb7QVwonc_wndNLMZb7ZH7xgMHaz_70A,951
5
- ouroboros/bigbang/ambiguity.py,sha256=4U_hhPst0wURyMZSAdF-2G7t6mtj8i_W7dl3w4WG3uQ,18653
5
+ ouroboros/bigbang/ambiguity.py,sha256=5KM8xjATknjLZguVa90Yii6o3pzXE4PU4BJIP6Ii938,17955
6
6
  ouroboros/bigbang/interview.py,sha256=zm1VrDNqE8ouGG62h8qnNkIpnUf3HHv4NjzMKDIaWcY,17147
7
7
  ouroboros/bigbang/seed_generator.py,sha256=7MY9a7Eua_zVGDWIVDlzOZJjeAwz0DRatXJg0PvMgiY,20082
8
8
  ouroboros/cli/__init__.py,sha256=CRpxsqJadZL7bCS-yrULWC51tqPKfPsxQLgt0JiwP4g,225
@@ -75,8 +75,8 @@ ouroboros/routing/tiers.py,sha256=QhBQUOo2-h5Z3dEtC0lcOzkRnqTi2W7Jl46750AVNig,73
75
75
  ouroboros/secondary/__init__.py,sha256=kYQ7C4bnBzwDlPrU8qZrOPr2ZuTBaftGktOXl5WZl5Q,1123
76
76
  ouroboros/secondary/scheduler.py,sha256=sPVVWJ1q0yewRAM-Rm1j_HMerSe4cavIvP9z4xlUuL4,13737
77
77
  ouroboros/secondary/todo_registry.py,sha256=4W3C9Uro29VrVLCPKUlpH_BYpzQSbRNW1oMnDYyEhEw,13880
78
- ouroboros_ai-0.2.1.dist-info/METADATA,sha256=9Foj5oGU4_E58RM-EmFWyOTe4oDgqoE5W6-MkopDpHU,19661
79
- ouroboros_ai-0.2.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
80
- ouroboros_ai-0.2.1.dist-info/entry_points.txt,sha256=MoETHup6rVkR6AsyjoRzAgIuvVtYYm3Jw40itV3_VyI,53
81
- ouroboros_ai-0.2.1.dist-info/licenses/LICENSE,sha256=n2X-q26TqpXnoBo0t_WouhFxWw663_q5FmbYDZayoHo,1060
82
- ouroboros_ai-0.2.1.dist-info/RECORD,,
78
+ ouroboros_ai-0.2.3.dist-info/METADATA,sha256=pAjfUYPmqTUzuLJoNQcoJx88R8yZwj_ALVniBc6jLGg,19661
79
+ ouroboros_ai-0.2.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
80
+ ouroboros_ai-0.2.3.dist-info/entry_points.txt,sha256=MoETHup6rVkR6AsyjoRzAgIuvVtYYm3Jw40itV3_VyI,53
81
+ ouroboros_ai-0.2.3.dist-info/licenses/LICENSE,sha256=n2X-q26TqpXnoBo0t_WouhFxWw663_q5FmbYDZayoHo,1060
82
+ ouroboros_ai-0.2.3.dist-info/RECORD,,