PyPI - ouroboros-ai - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

ouroboros-ai 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ouroboros-ai might be problematic. Click here for more details.

Files changed (6) hide show

ouroboros/bigbang/ambiguity.py CHANGED Viewed

@@ -9,6 +9,8 @@ The scoring algorithm evaluates three key components:
 - Success Criteria Clarity (30%): How measurable the success criteria are
 """
+import json
+import re
 from dataclasses import dataclass
 from typing import Any
@@ -110,15 +112,15 @@ class AmbiguityScorer:
     from interview conversation, producing reproducible scores.
     Uses adaptive token allocation: starts with `initial_max_tokens` and
-    doubles on truncation up to `MAX_TOKEN_LIMIT`. Retries up to `max_retries`
-    times on both provider errors and parse failures.
+    doubles on truncation up to `MAX_TOKEN_LIMIT`. Retries until success
+    by default (unlimited), or up to `max_retries` if specified.
     Attributes:
         llm_adapter: The LLM adapter for completions.
         model: Model identifier to use.
         temperature: Temperature for reproducibility (default 0.1).
         initial_max_tokens: Starting token limit (default 2048).
-        max_retries: Maximum retry attempts (default 3).
+        max_retries: Maximum retry attempts, or None for unlimited (default).
     Example:
         scorer = AmbiguityScorer(llm_adapter=LiteLLMAdapter())
@@ -138,7 +140,8 @@ class AmbiguityScorer:
     model: str = DEFAULT_MODEL
     temperature: float = SCORING_TEMPERATURE
     initial_max_tokens: int = 2048
-    max_retries: int = 3
+    max_retries: int | None = None  # None = unlimited retries
+    max_format_error_retries: int = 5  # Stop after N format errors (non-truncation)
     async def score(
         self, state: InterviewState
@@ -180,8 +183,15 @@ class AmbiguityScorer:
         current_max_tokens = self.initial_max_tokens
         last_error: Exception | ProviderError | None = None
         last_response: str = ""
+        attempt = 0
+        while True:
+            # Check retry limit if set
+            if self.max_retries is not None and attempt >= self.max_retries:
+                break
+            attempt += 1
-        for attempt in range(self.max_retries):
             config = CompletionConfig(
                 model=self.model,
                 temperature=self.temperature,
@@ -190,15 +200,15 @@ class AmbiguityScorer:
             result = await self.llm_adapter.complete(messages, config)
-            # Fix #3: Retry on provider errors (rate limits, transient failures)
+            # Retry on provider errors (rate limits, transient failures)
             if result.is_err:
                 last_error = result.error
                 log.warning(
                     "ambiguity.scoring.provider_error_retrying",
                     interview_id=state.interview_id,
                     error=str(result.error),
-                    attempt=attempt + 1,
-                    max_retries=self.max_retries,
+                    attempt=attempt,
+                    max_retries=self.max_retries or "unlimited",
                 )
                 continue
@@ -221,7 +231,7 @@ class AmbiguityScorer:
                     constraint_clarity=breakdown.constraint_clarity.clarity_score,
                     success_criteria_clarity=breakdown.success_criteria_clarity.clarity_score,
                     tokens_used=current_max_tokens,
-                    attempt=attempt + 1,
+                    attempt=attempt,
                 )
                 return Result.ok(ambiguity_score)
@@ -230,11 +240,11 @@ class AmbiguityScorer:
                 last_error = e
                 last_response = result.value.content
-                # Fix #2: Only increase tokens if response was truncated
+                # Only increase tokens if response was truncated
                 is_truncated = result.value.finish_reason == "length"
                 if is_truncated:
-                    # Double tokens on truncation (no upper limit)
+                    # Double tokens on truncation, capped at MAX_TOKEN_LIMIT if set
                     next_tokens = current_max_tokens * 2
                     if MAX_TOKEN_LIMIT is not None:
                         next_tokens = min(next_tokens, MAX_TOKEN_LIMIT)
@@ -242,7 +252,7 @@ class AmbiguityScorer:
                         "ambiguity.scoring.truncated_retrying",
                         interview_id=state.interview_id,
                         error=str(e),
-                        attempt=attempt + 1,
+                        attempt=attempt,
                         current_tokens=current_max_tokens,
                         next_tokens=next_tokens,
                     )
@@ -253,11 +263,11 @@ class AmbiguityScorer:
                         "ambiguity.scoring.format_error_retrying",
                         interview_id=state.interview_id,
                         error=str(e),
-                        attempt=attempt + 1,
+                        attempt=attempt,
                         finish_reason=result.value.finish_reason,
                     )
-        # All retries exhausted
+        # All retries exhausted (only reached if max_retries is set)
         log.warning(
             "ambiguity.scoring.failed",
             interview_id=state.interview_id,
@@ -296,38 +306,19 @@ class AmbiguityScorer:
         Returns:
             System prompt string.
         """
-        return """You are an expert requirements analyst evaluating the clarity of software requirements.
-Your task is to assess how clear and unambiguous the requirements are based on an interview conversation.
+        return """You are an expert requirements analyst. Evaluate the clarity of software requirements.
 Evaluate three components:
-1. Goal Clarity (40% weight): Is the goal statement specific and well-defined?
-   - Clear: "Build a CLI tool for task management with project grouping"
-   - Unclear: "Build something useful for productivity"
-2. Constraint Clarity (30% weight): Are constraints and limitations specified?
-   - Clear: "Must use Python 3.14+, no external database dependencies"
-   - Unclear: No mention of technical constraints or limitations
-3. Success Criteria Clarity (30% weight): Are success criteria measurable?
-   - Clear: "Tasks can be created, edited, deleted; supports filtering by status"
-   - Unclear: "The tool should be easy to use"
+1. Goal Clarity (40%): Is the goal specific and well-defined?
+2. Constraint Clarity (30%): Are constraints and limitations specified?
+3. Success Criteria Clarity (30%): Are success criteria measurable?
-For each component, provide:
-- A clarity score between 0.0 (completely unclear) and 1.0 (perfectly clear)
-- A brief justification (1-2 sentences max) explaining the score
+Score each from 0.0 (unclear) to 1.0 (perfectly clear). Scores above 0.8 require very specific requirements.
-IMPORTANT: You MUST provide ALL six fields below. Keep justifications concise.
+RESPOND ONLY WITH VALID JSON. No other text before or after.
-Respond in this exact format:
-GOAL_CLARITY_SCORE: <score>
-GOAL_CLARITY_JUSTIFICATION: <justification in 1-2 sentences>
-CONSTRAINT_CLARITY_SCORE: <score>
-CONSTRAINT_CLARITY_JUSTIFICATION: <justification in 1-2 sentences>
-SUCCESS_CRITERIA_CLARITY_SCORE: <score>
-SUCCESS_CRITERIA_CLARITY_JUSTIFICATION: <justification in 1-2 sentences>
-Be strict in your evaluation. Scores above 0.8 require very specific, measurable requirements."""
+Required JSON format:
+{"goal_clarity_score": 0.0, "goal_clarity_justification": "string", "constraint_clarity_score": 0.0, "constraint_clarity_justification": "string", "success_criteria_clarity_score": 0.0, "success_criteria_clarity_justification": "string"}"""
     def _build_scoring_user_prompt(self, context: str) -> str:
         """Build user prompt with interview context.
@@ -358,27 +349,23 @@ Analyze each component and provide scores with justifications."""
         Raises:
             ValueError: If response cannot be parsed.
         """
-        lines = response.strip().split("\n")
-        scores: dict[str, Any] = {}
-        for line in lines:
-            line = line.strip()
-            if not line:
-                continue
-            for prefix in [
-                "GOAL_CLARITY_SCORE:",
-                "GOAL_CLARITY_JUSTIFICATION:",
-                "CONSTRAINT_CLARITY_SCORE:",
-                "CONSTRAINT_CLARITY_JUSTIFICATION:",
-                "SUCCESS_CRITERIA_CLARITY_SCORE:",
-                "SUCCESS_CRITERIA_CLARITY_JUSTIFICATION:",
-            ]:
-                if line.startswith(prefix):
-                    key = prefix[:-1].lower()  # Remove colon and lowercase
-                    value = line[len(prefix) :].strip()
-                    scores[key] = value
-                    break
+        # Extract JSON from response (handle markdown code blocks)
+        text = response.strip()
+        # Try to find JSON in markdown code block
+        json_match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
+        if json_match:
+            text = json_match.group(1)
+        else:
+            # Try to find raw JSON object
+            json_match = re.search(r"\{.*\}", text, re.DOTALL)
+            if json_match:
+                text = json_match.group(0)
+        try:
+            data = json.loads(text)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON response: {e}") from e
         # Validate all required fields are present
         required_fields = [
@@ -391,35 +378,32 @@ Analyze each component and provide scores with justifications."""
         ]
         for field_name in required_fields:
-            if field_name not in scores:
+            if field_name not in data:
                 raise ValueError(f"Missing required field: {field_name}")
-        # Parse scores to float
-        def parse_score(value: str) -> float:
-            try:
-                score = float(value)
-                return max(0.0, min(1.0, score))  # Clamp to [0, 1]
-            except ValueError as e:
-                raise ValueError(f"Invalid score value: {value}") from e
+        # Parse and clamp scores
+        def clamp_score(value: Any) -> float:
+            score = float(value)
+            return max(0.0, min(1.0, score))
         return ScoreBreakdown(
             goal_clarity=ComponentScore(
                 name="Goal Clarity",
-                clarity_score=parse_score(scores["goal_clarity_score"]),
+                clarity_score=clamp_score(data["goal_clarity_score"]),
                 weight=GOAL_CLARITY_WEIGHT,
-                justification=scores["goal_clarity_justification"],
+                justification=str(data["goal_clarity_justification"]),
             ),
             constraint_clarity=ComponentScore(
                 name="Constraint Clarity",
-                clarity_score=parse_score(scores["constraint_clarity_score"]),
+                clarity_score=clamp_score(data["constraint_clarity_score"]),
                 weight=CONSTRAINT_CLARITY_WEIGHT,
-                justification=scores["constraint_clarity_justification"],
+                justification=str(data["constraint_clarity_justification"]),
             ),
             success_criteria_clarity=ComponentScore(
                 name="Success Criteria Clarity",
-                clarity_score=parse_score(scores["success_criteria_clarity_score"]),
+                clarity_score=clamp_score(data["success_criteria_clarity_score"]),
                 weight=SUCCESS_CRITERIA_CLARITY_WEIGHT,
-                justification=scores["success_criteria_clarity_justification"],
+                justification=str(data["success_criteria_clarity_justification"]),
             ),
         )

{ouroboros_ai-0.2.1.dist-info → ouroboros_ai-0.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ouroboros-ai
-Version: 0.2.1
+Version: 0.2.3
 Summary: Self-Improving AI Workflow System
 Author-email: Q00 <jqyu.lee@gmail.com>
 License-File: LICENSE

{ouroboros_ai-0.2.1.dist-info → ouroboros_ai-0.2.3.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ ouroboros/__init__.py,sha256=lmQgHmNOWxGlmwayNvp1ckCuJycL8WzX5Y-7IzrFaVM,701
 ouroboros/__main__.py,sha256=f_qnL0zPJwh9kfQqynX5adpqzj8ilj94zW5Q2loqGxE,168
 ouroboros/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ouroboros/bigbang/__init__.py,sha256=9xGqOYwMKBifb7QVwonc_wndNLMZb7ZH7xgMHaz_70A,951
-ouroboros/bigbang/ambiguity.py,sha256=4U_hhPst0wURyMZSAdF-2G7t6mtj8i_W7dl3w4WG3uQ,18653
+ouroboros/bigbang/ambiguity.py,sha256=5KM8xjATknjLZguVa90Yii6o3pzXE4PU4BJIP6Ii938,17955
 ouroboros/bigbang/interview.py,sha256=zm1VrDNqE8ouGG62h8qnNkIpnUf3HHv4NjzMKDIaWcY,17147
 ouroboros/bigbang/seed_generator.py,sha256=7MY9a7Eua_zVGDWIVDlzOZJjeAwz0DRatXJg0PvMgiY,20082
 ouroboros/cli/__init__.py,sha256=CRpxsqJadZL7bCS-yrULWC51tqPKfPsxQLgt0JiwP4g,225
@@ -75,8 +75,8 @@ ouroboros/routing/tiers.py,sha256=QhBQUOo2-h5Z3dEtC0lcOzkRnqTi2W7Jl46750AVNig,73
 ouroboros/secondary/__init__.py,sha256=kYQ7C4bnBzwDlPrU8qZrOPr2ZuTBaftGktOXl5WZl5Q,1123
 ouroboros/secondary/scheduler.py,sha256=sPVVWJ1q0yewRAM-Rm1j_HMerSe4cavIvP9z4xlUuL4,13737
 ouroboros/secondary/todo_registry.py,sha256=4W3C9Uro29VrVLCPKUlpH_BYpzQSbRNW1oMnDYyEhEw,13880
-ouroboros_ai-0.2.1.dist-info/METADATA,sha256=9Foj5oGU4_E58RM-EmFWyOTe4oDgqoE5W6-MkopDpHU,19661
-ouroboros_ai-0.2.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-ouroboros_ai-0.2.1.dist-info/entry_points.txt,sha256=MoETHup6rVkR6AsyjoRzAgIuvVtYYm3Jw40itV3_VyI,53
-ouroboros_ai-0.2.1.dist-info/licenses/LICENSE,sha256=n2X-q26TqpXnoBo0t_WouhFxWw663_q5FmbYDZayoHo,1060
-ouroboros_ai-0.2.1.dist-info/RECORD,,
+ouroboros_ai-0.2.3.dist-info/METADATA,sha256=pAjfUYPmqTUzuLJoNQcoJx88R8yZwj_ALVniBc6jLGg,19661
+ouroboros_ai-0.2.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+ouroboros_ai-0.2.3.dist-info/entry_points.txt,sha256=MoETHup6rVkR6AsyjoRzAgIuvVtYYm3Jw40itV3_VyI,53
+ouroboros_ai-0.2.3.dist-info/licenses/LICENSE,sha256=n2X-q26TqpXnoBo0t_WouhFxWw663_q5FmbYDZayoHo,1060
+ouroboros_ai-0.2.3.dist-info/RECORD,,

{ouroboros_ai-0.2.1.dist-info → ouroboros_ai-0.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{ouroboros_ai-0.2.1.dist-info → ouroboros_ai-0.2.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ouroboros_ai-0.2.1.dist-info → ouroboros_ai-0.2.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

ouroboros-ai 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

Potentially problematic release.

ouroboros-ai 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl