PyPI - pdd-cli - Versions diffs - 0.0.48__py3-none-any.whl → 0.0.50__py3-none-any.whl - Mend

pdd-cli 0.0.48py3-none-any.whl → 0.0.50py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pdd-cli might be problematic. Click here for more details.

Files changed (28) hide show

pdd/__init__.py +4 -4
pdd/bug_to_unit_test.py +2 -0
pdd/cli.py +8 -1
pdd/code_generator.py +3 -1
pdd/context_generator.py +3 -1
pdd/continue_generation.py +47 -7
pdd/data/llm_model.csv +15 -16
pdd/detect_change_main.py +2 -2
pdd/generate_test.py +3 -1
pdd/llm_invoke.py +461 -74
pdd/load_prompt_template.py +30 -9
pdd/pdd_completion.fish +2 -2
pdd/pdd_completion.zsh +4 -4
pdd/postprocess.py +2 -2
pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
pdd/prompts/insert_includes_LLM.prompt +4 -4
pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
pdd/summarize_directory.py +15 -2
pdd/sync_orchestration.py +32 -4
pdd/trace.py +131 -11
pdd/trace_main.py +2 -2
pdd/unfinished_prompt.py +41 -2
{pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/METADATA +7 -4
{pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/RECORD +27 -27
{pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/WHEEL +0 -0
{pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/entry_points.txt +0 -0
{pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/licenses/LICENSE +0 -0
{pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/top_level.txt +0 -0

pdd/llm_invoke.py CHANGED Viewed

@@ -6,7 +6,7 @@ import pandas as pd
 import litellm
 import logging # ADDED FOR DETAILED LOGGING
 import importlib.resources
-from litellm.caching.caching import Cache  # Fix for LiteLLM v1.49.3+
+from litellm.caching.caching import Cache  # Fix for LiteLLM v1.75.5+
 # --- Configure Standard Python Logging ---
 logger = logging.getLogger("pdd.llm_invoke")
@@ -26,6 +26,16 @@ litellm_logger = logging.getLogger("litellm")
 litellm_log_level = os.getenv("LITELLM_LOG_LEVEL", "WARNING" if PRODUCTION_MODE else "INFO")
 litellm_logger.setLevel(getattr(logging, litellm_log_level, logging.WARNING))
+# Ensure LiteLLM drops provider-unsupported params instead of erroring
+# This prevents failures like UnsupportedParamsError for OpenAI gpt-5-* when
+# passing generic params (e.g., reasoning_effort) not accepted by that API path.
+try:
+    _drop_params_env = os.getenv("LITELLM_DROP_PARAMS", "true")
+    litellm.drop_params = str(_drop_params_env).lower() in ("1", "true", "yes", "on")
+except Exception:
+    # Be conservative: default to True even if env parsing fails
+    litellm.drop_params = True
 # Add a console handler if none exists
 if not logger.handlers:
     console_handler = logging.StreamHandler()
@@ -71,7 +81,7 @@ import json
 # from rich import print as rprint # Replaced with logger
 from dotenv import load_dotenv
 from pathlib import Path
-from typing import Optional, Dict, List, Any, Type, Union
+from typing import Optional, Dict, List, Any, Type, Union, Tuple
 from pydantic import BaseModel, ValidationError
 import openai  # Import openai for exception handling as LiteLLM maps to its types
 from langchain_core.prompts import PromptTemplate
@@ -114,6 +124,22 @@ def _is_wsl_environment() -> bool:
         return False
+def _openai_responses_supports_response_format() -> bool:
+    """Detect if current OpenAI Python SDK supports `response_format` on Responses.create.
+    Returns True if the installed SDK exposes a `response_format` parameter on
+    `openai.resources.responses.Responses.create`, else False. This avoids
+    sending unsupported kwargs and triggering TypeError at runtime.
+    """
+    try:
+        import inspect
+        from openai.resources.responses import Responses
+        sig = inspect.signature(Responses.create)
+        return "response_format" in sig.parameters
+    except Exception:
+        return False
 def _get_environment_info() -> Dict[str, str]:
     """
     Get environment information for debugging and error reporting.
@@ -188,24 +214,80 @@ if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
 ENV_PATH = PROJECT_ROOT / ".env"
 # --- Determine LLM_MODEL_CSV_PATH ---
-# Prioritize ~/.pdd/llm_model.csv
+# Prioritize ~/.pdd/llm_model.csv, then a project .pdd from the current CWD,
+# then PROJECT_ROOT (which may be set from PDD_PATH), else fall back to package.
 user_pdd_dir = Path.home() / ".pdd"
 user_model_csv_path = user_pdd_dir / "llm_model.csv"
-# Check in order: user-specific, project-specific, package default
+def _detect_project_root_from_cwd(max_levels: int = 5) -> Path:
+    """Search upwards from the current working directory for common project markers.
+    This intentionally ignores PDD_PATH to support CLI invocations that set
+    PDD_PATH to the installed package location. We want to honor a real project
+    checkout's .pdd/llm_model.csv when running inside it.
+    """
+    try:
+        current_dir = Path.cwd().resolve()
+        for _ in range(max_levels):
+            if (
+                (current_dir / ".git").exists()
+                or (current_dir / "pyproject.toml").exists()
+                or (current_dir / "data").is_dir()
+                or (current_dir / ".env").exists()
+            ):
+                return current_dir
+            parent = current_dir.parent
+            if parent == current_dir:
+                break
+            current_dir = parent
+    except Exception:
+        pass
+    return Path.cwd().resolve()
+# Resolve candidates
+project_root_from_cwd = _detect_project_root_from_cwd()
+project_csv_from_cwd = project_root_from_cwd / ".pdd" / "llm_model.csv"
+project_csv_from_env = PROJECT_ROOT / ".pdd" / "llm_model.csv"
+# Detect whether PDD_PATH points to the installed package directory. If so,
+# don't prioritize it over the real project from CWD.
+try:
+    _installed_pkg_root = importlib.resources.files('pdd')
+    # importlib.resources.files returns a Traversable; get a FS path string if possible
+    try:
+        _installed_pkg_root_path = Path(str(_installed_pkg_root))
+    except Exception:
+        _installed_pkg_root_path = None
+except Exception:
+    _installed_pkg_root_path = None
+def _is_env_path_package_dir(env_path: Path) -> bool:
+    try:
+        if _installed_pkg_root_path is None:
+            return False
+        env_path = env_path.resolve()
+        pkg_path = _installed_pkg_root_path.resolve()
+        # Treat equal or subpath as package dir
+        return env_path == pkg_path or str(env_path).startswith(str(pkg_path))
+    except Exception:
+        return False
+# Selection order
 if user_model_csv_path.is_file():
     LLM_MODEL_CSV_PATH = user_model_csv_path
     logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
+elif (not _is_env_path_package_dir(PROJECT_ROOT)) and project_csv_from_env.is_file():
+    # Honor an explicitly-set PDD_PATH pointing to a real project directory
+    LLM_MODEL_CSV_PATH = project_csv_from_env
+    logger.info(f"Using project-specific LLM model CSV (from PDD_PATH): {LLM_MODEL_CSV_PATH}")
+elif project_csv_from_cwd.is_file():
+    # Otherwise, prefer the project relative to the current working directory
+    LLM_MODEL_CSV_PATH = project_csv_from_cwd
+    logger.info(f"Using project-specific LLM model CSV (from CWD): {LLM_MODEL_CSV_PATH}")
 else:
-    # Check project-specific location (.pdd directory)
-    project_model_csv_path = PROJECT_ROOT / ".pdd" / "llm_model.csv"
-    if project_model_csv_path.is_file():
-        LLM_MODEL_CSV_PATH = project_model_csv_path
-        logger.info(f"Using project-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
-    else:
-        # Neither exists, we'll use a marker path that _load_model_data will handle
-        LLM_MODEL_CSV_PATH = None
-        logger.info("No local LLM model CSV found, will use package default")
+    # Neither exists, we'll use a marker path that _load_model_data will handle
+    LLM_MODEL_CSV_PATH = None
+    logger.info("No local LLM model CSV found, will use package default")
 # ---------------------------------
 # Load environment variables from .env file
@@ -280,16 +362,22 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
         elif 'AWS_REGION_NAME' in os.environ:
             pass # Or just leave it if the temporary setting wasn't done/needed
+# Check if caching is disabled via environment variable
+if os.getenv("LITELLM_CACHE_DISABLE") == "1":
+    logger.info("LiteLLM caching disabled via LITELLM_CACHE_DISABLE=1")
+    litellm.cache = None
+    cache_configured = True
 if not cache_configured:
     try:
-        # Try SQLite-based cache as a fallback
+        # Try disk-based cache as a fallback
         sqlite_cache_path = PROJECT_ROOT / "litellm_cache.sqlite"
-        configured_cache = Cache(type="sqlite", cache_path=str(sqlite_cache_path))
+        configured_cache = Cache(type="disk", disk_cache_dir=str(sqlite_cache_path))
         litellm.cache = configured_cache
-        logger.info(f"LiteLLM SQLite cache configured at {sqlite_cache_path}")
+        logger.info(f"LiteLLM disk cache configured at {sqlite_cache_path}")
         cache_configured = True
     except Exception as e2:
-        warnings.warn(f"Failed to configure LiteLLM SQLite cache: {e2}. Caching is disabled.")
+        warnings.warn(f"Failed to configure LiteLLM disk cache: {e2}. Caching is disabled.")
         litellm.cache = None
 if not cache_configured:
@@ -327,29 +415,49 @@ def _litellm_success_callback(
         cost_val = litellm.completion_cost(completion_response=completion_response)
         calculated_cost = cost_val if cost_val is not None else 0.0
     except Exception as e1:
-        # Attempt 2: If response object failed (e.g., missing provider in model name),
-        # try again using explicit model from kwargs and tokens from usage.
-        # This is often needed for batch completion items.
+        # Attempt 2: Compute via tokens and model mapping. If LiteLLM mapping is
+        # missing or API differs, fall back to CSV rates in _MODEL_RATE_MAP.
         logger.debug(f"Attempting cost calculation with fallback method: {e1}")
         try:
-            model_name = kwargs.get("model") # Get original model name from input kwargs
+            model_name = kwargs.get("model")
             if model_name and usage:
-                prompt_tokens = getattr(usage, 'prompt_tokens', 0)
-                completion_tokens = getattr(usage, 'completion_tokens', 0)
-                cost_val = litellm.completion_cost(
-                    model=model_name,
-                    prompt_tokens=prompt_tokens,
-                    completion_tokens=completion_tokens
-                )
-                calculated_cost = cost_val if cost_val is not None else 0.0
+                in_tok = getattr(usage, 'prompt_tokens', None)
+                out_tok = getattr(usage, 'completion_tokens', None)
+                # Some providers may use 'input_tokens'/'output_tokens'
+                if in_tok is None:
+                    in_tok = getattr(usage, 'input_tokens', 0)
+                if out_tok is None:
+                    out_tok = getattr(usage, 'output_tokens', 0)
+                # Try LiteLLM helper (arg names vary across versions)
+                try:
+                    cost_val = litellm.completion_cost(
+                        model=model_name,
+                        prompt_tokens=in_tok,
+                        completion_tokens=out_tok,
+                    )
+                    calculated_cost = cost_val if cost_val is not None else 0.0
+                except TypeError:
+                    # Older/newer versions may require input/output token names
+                    try:
+                        cost_val = litellm.completion_cost(
+                            model=model_name,
+                            input_tokens=in_tok,
+                            output_tokens=out_tok,
+                        )
+                        calculated_cost = cost_val if cost_val is not None else 0.0
+                    except Exception as e3:
+                        # Final fallback: compute using CSV rates
+                        rates = _MODEL_RATE_MAP.get(str(model_name))
+                        if rates is not None:
+                            in_rate, out_rate = rates
+                            calculated_cost = (float(in_tok or 0) * in_rate + float(out_tok or 0) * out_rate) / 1_000_000.0
+                        else:
+                            calculated_cost = 0.0
+                        logger.debug(f"Cost calculation failed with LiteLLM token API; used CSV rates if available. Detail: {e3}")
             else:
-                # If we can't get model name or usage, fallback to 0
                 calculated_cost = 0.0
-                # Optional: Log the original error e1 if needed
-                # logger.warning(f"[Callback WARN] Failed to calculate cost with response object ({e1}) and fallback failed.")
         except Exception as e2:
-            # Optional: Log secondary error e2 if needed
-            # logger.warning(f"[Callback WARN] Failed to calculate cost with fallback method: {e2}")
             calculated_cost = 0.0 # Default to 0 on any error
             logger.debug(f"Cost calculation failed with fallback method: {e2}")
@@ -367,6 +475,23 @@ def _litellm_success_callback(
 # Register the callback with LiteLLM
 litellm.success_callback = [_litellm_success_callback]
+# --- Cost Mapping Support (CSV Rates) ---
+# Populate from CSV inside llm_invoke; used by callback fallback
+_MODEL_RATE_MAP: Dict[str, Tuple[float, float]] = {}
+def _set_model_rate_map(df: pd.DataFrame) -> None:
+    global _MODEL_RATE_MAP
+    try:
+        _MODEL_RATE_MAP = {
+            str(row['model']): (
+                float(row['input']) if pd.notna(row['input']) else 0.0,
+                float(row['output']) if pd.notna(row['output']) else 0.0,
+            )
+            for _, row in df.iterrows()
+        }
+    except Exception:
+        _MODEL_RATE_MAP = {}
 # --- Helper Functions ---
 def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
@@ -476,11 +601,26 @@ def _select_model_candidates(
         # Try finding base model in the *original* df in case it was filtered out
         original_base = model_df[model_df['model'] == base_model_name]
         if not original_base.empty:
-             raise ValueError(f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration.")
-        else:
-             raise ValueError(f"Specified base model '{base_model_name}' not found in the LLM model CSV.")
-    base_model = base_model_row.iloc[0]
+            # Base exists but may be misconfigured (e.g., missing API key). Keep erroring loudly.
+            raise ValueError(
+                f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration."
+            )
+        # Option A': Soft fallback – choose a reasonable surrogate base and continue
+        # Strategy (simplified and deterministic): pick the first available model
+        # from the CSV as the surrogate base. This mirrors typical CSV ordering
+        # expectations and keeps behavior predictable across environments.
+        try:
+            base_model = available_df.iloc[0]
+            logger.warning(
+                f"Base model '{base_model_name}' not found in CSV. Falling back to surrogate base '{base_model['model']}' (Option A')."
+            )
+        except Exception:
+            # If any unexpected error occurs during fallback, raise a clear error
+            raise ValueError(
+                f"Specified base model '{base_model_name}' not found and fallback selection failed. Check your LLM model CSV."
+            )
+    else:
+        base_model = base_model_row.iloc[0]
     # 3. Determine Target and Sort
     candidates = []
@@ -491,9 +631,10 @@ def _select_model_candidates(
         # Sort remaining by ELO descending as fallback
         available_df['sort_metric'] = -available_df['coding_arena_elo'] # Negative for descending sort
         candidates = available_df.sort_values(by='sort_metric').to_dict('records')
-        # Ensure base model is first if it exists
-        if any(c['model'] == base_model_name for c in candidates):
-            candidates.sort(key=lambda x: 0 if x['model'] == base_model_name else 1)
+        # Ensure effective base model is first if it exists (supports surrogate base)
+        effective_base_name = str(base_model['model']) if isinstance(base_model, pd.Series) else base_model_name
+        if any(c['model'] == effective_base_name for c in candidates):
+            candidates.sort(key=lambda x: 0 if x['model'] == effective_base_name else 1)
         target_metric_value = f"Base Model ELO: {base_model['coding_arena_elo']}"
     elif strength < 0.5:
@@ -710,6 +851,49 @@ def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[st
     except Exception as e:
         raise ValueError(f"Error formatting prompt: {e}") from e
+# --- JSON Extraction Helpers ---
+import re
+def _extract_fenced_json_block(text: str) -> Optional[str]:
+    try:
+        m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, flags=re.IGNORECASE)
+        if m:
+            return m.group(1)
+        return None
+    except Exception:
+        return None
+def _extract_balanced_json_objects(text: str) -> List[str]:
+    results: List[str] = []
+    brace_stack = 0
+    start_idx = -1
+    in_string = False
+    escape = False
+    for i, ch in enumerate(text):
+        if in_string:
+            if escape:
+                escape = False
+            elif ch == '\\':
+                escape = True
+            elif ch == '"':
+                in_string = False
+            continue
+        else:
+            if ch == '"':
+                in_string = True
+                continue
+            if ch == '{':
+                if brace_stack == 0:
+                    start_idx = i
+                brace_stack += 1
+            elif ch == '}':
+                if brace_stack > 0:
+                    brace_stack -= 1
+                    if brace_stack == 0 and start_idx != -1:
+                        results.append(text[start_idx:i+1])
+                        start_idx = -1
+    return results
 # --- Main Function ---
 def llm_invoke(
@@ -852,6 +1036,16 @@ def llm_invoke(
     # --- 3. Iterate Through Candidates and Invoke LLM ---
     last_exception = None
     newly_acquired_keys: Dict[str, bool] = {} # Track keys obtained in this run
+    # Initialize variables for retry section
+    response_format = None
+    time_kwargs = {}
+    # Update global rate map for callback cost fallback
+    try:
+        _set_model_rate_map(model_df)
+    except Exception:
+        pass
     for model_info in candidate_models:
         model_name_litellm = model_info['model']
@@ -945,11 +1139,33 @@ def llm_invoke(
             elif verbose: # No api_key_name_from_csv in CSV for this model
                 logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).")
-            # Add api_base if present in CSV
+            # Add base_url/api_base override if present in CSV
             api_base = model_info.get('base_url')
             if pd.notna(api_base) and api_base:
+                # LiteLLM prefers `base_url`; some older paths accept `api_base`.
+                litellm_kwargs["base_url"] = str(api_base)
                 litellm_kwargs["api_base"] = str(api_base)
+            # Provider-specific defaults (e.g., LM Studio)
+            model_name_lower = str(model_name_litellm).lower()
+            provider_lower_for_model = provider.lower()
+            is_lm_studio = model_name_lower.startswith('lm_studio/') or provider_lower_for_model == 'lm_studio'
+            if is_lm_studio:
+                # Ensure base_url is set (fallback to env LM_STUDIO_API_BASE or localhost)
+                if not litellm_kwargs.get("base_url"):
+                    lm_studio_base = os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1")
+                    litellm_kwargs["base_url"] = lm_studio_base
+                    litellm_kwargs["api_base"] = lm_studio_base
+                    if verbose:
+                        logger.info(f"[INFO] Using LM Studio base_url: {lm_studio_base}")
+                # Ensure a non-empty api_key; LM Studio accepts any non-empty token (e.g., 'lm-studio')
+                if not litellm_kwargs.get("api_key"):
+                    lm_studio_key = os.getenv("LM_STUDIO_API_KEY") or "lm-studio"
+                    litellm_kwargs["api_key"] = lm_studio_key
+                    if verbose:
+                        logger.info("[INFO] Using LM Studio api_key placeholder (set LM_STUDIO_API_KEY to customize).")
             # Handle Structured Output (JSON Mode / Pydantic)
             if output_pydantic:
                 # Check if model supports structured output based on CSV flag or LiteLLM check
@@ -964,7 +1180,8 @@ def llm_invoke(
                         logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
                     # Pass the Pydantic model directly if supported, else use json_object
                     # LiteLLM handles passing Pydantic models for supported providers
-                    litellm_kwargs["response_format"] = output_pydantic
+                    response_format = output_pydantic
+                    litellm_kwargs["response_format"] = response_format
                     # As a fallback, one could use:
                     # litellm_kwargs["response_format"] = {"type": "json_object"}
                     # And potentially enable client-side validation:
@@ -986,7 +1203,9 @@ def llm_invoke(
                             # Currently known: Anthropic uses 'thinking'
                             # Model name comparison is more robust than provider string
                             if provider == 'anthropic': # Check provider column instead of model prefix
-                                litellm_kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
+                                thinking_param = {"type": "enabled", "budget_tokens": budget}
+                                litellm_kwargs["thinking"] = thinking_param
+                                time_kwargs["thinking"] = thinking_param
                                 if verbose:
                                     logger.info(f"[INFO] Requesting Anthropic thinking (budget type) with budget: {budget} tokens for {model_name_litellm}")
                             else:
@@ -1004,10 +1223,32 @@ def llm_invoke(
                         effort = "high"
                     elif time > 0.3:
                         effort = "medium"
-                    # Use the common 'reasoning_effort' param LiteLLM provides
-                    litellm_kwargs["reasoning_effort"] = effort
-                    if verbose:
-                        logger.info(f"[INFO] Requesting reasoning_effort='{effort}' (effort type) for {model_name_litellm} based on time={time}")
+                    # Map effort parameter per-provider/model family
+                    model_lower = str(model_name_litellm).lower()
+                    provider_lower = str(provider).lower()
+                    if provider_lower == 'openai' and model_lower.startswith('gpt-5'):
+                        # OpenAI 5-series uses Responses API with nested 'reasoning'
+                        reasoning_obj = {"effort": effort, "summary": "auto"}
+                        litellm_kwargs["reasoning"] = reasoning_obj
+                        time_kwargs["reasoning"] = reasoning_obj
+                        if verbose:
+                            logger.info(f"[INFO] Requesting OpenAI reasoning.effort='{effort}' for {model_name_litellm} (Responses API)")
+                    elif provider_lower == 'openai' and model_lower.startswith('o') and 'mini' not in model_lower:
+                        # Historical o* models may use LiteLLM's generic reasoning_effort param
+                        litellm_kwargs["reasoning_effort"] = effort
+                        time_kwargs["reasoning_effort"] = effort
+                        if verbose:
+                            logger.info(f"[INFO] Requesting reasoning_effort='{effort}' for {model_name_litellm}")
+                    else:
+                        # Fallback to LiteLLM generic param when supported by provider adapter
+                        litellm_kwargs["reasoning_effort"] = effort
+                        time_kwargs["reasoning_effort"] = effort
+                        if verbose:
+                            logger.info(f"[INFO] Requesting generic reasoning_effort='{effort}' for {model_name_litellm}")
                 elif reasoning_type == 'none':
                     if verbose:
@@ -1039,6 +1280,142 @@ def llm_invoke(
                     logger.debug("NOT ENABLING CACHING: litellm.cache is None at call time")
+                # Route OpenAI gpt-5* models through Responses API to support 'reasoning'
+                model_lower_for_call = str(model_name_litellm).lower()
+                provider_lower_for_call = str(provider).lower()
+                if (
+                    not use_batch_mode
+                    and provider_lower_for_call == 'openai'
+                    and model_lower_for_call.startswith('gpt-5')
+                ):
+                    if verbose:
+                        logger.info(f"[INFO] Calling OpenAI Responses API for {model_name_litellm}...")
+                    try:
+                        # Build input text from messages
+                        if isinstance(formatted_messages, list) and formatted_messages and isinstance(formatted_messages[0], dict):
+                            input_text = "\n\n".join(f"{m.get('role','user')}: {m.get('content','')}" for m in formatted_messages)
+                        else:
+                            # Fallback: string cast
+                            input_text = str(formatted_messages)
+                        # Derive effort mapping already computed in time_kwargs
+                        reasoning_param = time_kwargs.get("reasoning")
+                        # Optional text settings; keep simple
+                        text_block = {"format": {"type": "text"}}
+                        # If structured output requested, attempt JSON schema via Pydantic
+                        # GPT-5 Responses API does not support temperature; omit it here.
+                        responses_kwargs = {
+                            "model": model_name_litellm,
+                            "input": input_text,
+                            "text": text_block,
+                        }
+                        if verbose and temperature not in (None, 0, 0.0):
+                            logger.info("[INFO] Skipping 'temperature' for OpenAI GPT-5 Responses call (unsupported by API).")
+                        if reasoning_param is not None:
+                            responses_kwargs["reasoning"] = reasoning_param
+                        if output_pydantic:
+                            try:
+                                schema = output_pydantic.model_json_schema()
+                                if _openai_responses_supports_response_format():
+                                    responses_kwargs["response_format"] = {
+                                        "type": "json_schema",
+                                        "json_schema": {
+                                            "name": output_pydantic.__name__,
+                                            "schema": schema,
+                                            "strict": True,
+                                        },
+                                    }
+                                    # When enforcing JSON schema, omit text formatting
+                                    responses_kwargs.pop("text", None)
+                                else:
+                                    if verbose:
+                                        logger.info("[INFO] OpenAI SDK lacks Responses.response_format; will validate JSON client-side with Pydantic.")
+                            except Exception as schema_e:
+                                logger.warning(f"[WARN] Failed to derive JSON schema from Pydantic: {schema_e}. Proceeding without structured response_format.")
+                        # Initialize OpenAI client with explicit key if provided
+                        try:
+                            from openai import OpenAI as _OpenAIClient
+                        except Exception:
+                            _OpenAIClient = None
+                        if _OpenAIClient is None:
+                            raise RuntimeError("OpenAI SDK not available to call Responses API.")
+                        api_key_to_use = litellm_kwargs.get("api_key") or os.getenv("OPENAI_API_KEY")
+                        client = _OpenAIClient(api_key=api_key_to_use) if api_key_to_use else _OpenAIClient()
+                        # Make the Responses API call, with graceful fallback if SDK
+                        # doesn't support certain newer kwargs (e.g., response_format)
+                        try:
+                            resp = client.responses.create(**responses_kwargs)
+                        except TypeError as te:
+                            msg = str(te)
+                            if 'response_format' in responses_kwargs and ('unexpected keyword argument' in msg or 'got an unexpected keyword argument' in msg):
+                                logger.warning("[WARN] OpenAI SDK doesn't support response_format; retrying without it.")
+                                responses_kwargs.pop('response_format', None)
+                                resp = client.responses.create(**responses_kwargs)
+                            else:
+                                raise
+                        # Extract text result
+                        result_text = getattr(resp, "output_text", None)
+                        if result_text is None:
+                            try:
+                                # Fallback parse
+                                outputs = getattr(resp, "output", []) or getattr(resp, "outputs", [])
+                                if outputs:
+                                    first = outputs[0]
+                                    content = getattr(first, "content", [])
+                                    if content and hasattr(content[0], "text"):
+                                        result_text = content[0].text
+                            except Exception:
+                                result_text = None
+                        # Calculate cost using usage + CSV rates
+                        usage = getattr(resp, "usage", None)
+                        total_cost = 0.0
+                        if usage is not None:
+                            in_tok = getattr(usage, "input_tokens", 0) or 0
+                            out_tok = getattr(usage, "output_tokens", 0) or 0
+                            in_rate = model_info.get('input', 0.0) or 0.0
+                            out_rate = model_info.get('output', 0.0) or 0.0
+                            total_cost = (in_tok * in_rate + out_tok * out_rate) / 1_000_000.0
+                        final_result = None
+                        if output_pydantic and result_text:
+                            try:
+                                final_result = output_pydantic.model_validate_json(result_text)
+                            except Exception as e:
+                                logger.error(f"[ERROR] Pydantic parse failed on Responses output: {e}")
+                                final_result = result_text
+                        else:
+                            final_result = result_text
+                        if verbose:
+                            logger.info(f"[RESULT] Model Used: {model_name_litellm}")
+                            logger.info(f"[RESULT] Total Cost (estimated): ${total_cost:.6g}")
+                        return {
+                            'result': final_result,
+                            'cost': total_cost,
+                            'model_name': model_name_litellm,
+                            'thinking_output': None,
+                        }
+                    except Exception as e:
+                        last_exception = e
+                        logger.error(f"[ERROR] OpenAI Responses call failed for {model_name_litellm}: {e}")
+                        # Remove 'reasoning' key to avoid OpenAI Chat API unknown param errors
+                        if "reasoning" in litellm_kwargs:
+                            try:
+                                litellm_kwargs.pop("reasoning", None)
+                            except Exception:
+                                pass
+                        # Fall through to LiteLLM path as a fallback
                 if use_batch_mode:
                     if verbose:
                         logger.info(f"[INFO] Calling litellm.batch_completion for {model_name_litellm}...")
@@ -1105,7 +1482,6 @@ def llm_invoke(
                                         messages=retry_messages,
                                         temperature=temperature,
                                         response_format=response_format,
-                                        max_completion_tokens=max_tokens,
                                         **time_kwargs
                                     )
                                     # Re-enable cache - restore original configured cache (restore to original state, even if None)
@@ -1149,26 +1525,39 @@ def llm_invoke(
                                 elif isinstance(raw_result, str):
                                     json_string_to_parse = raw_result # Start with the raw string
                                     try:
-                                        # Look for first { and last }
-                                        start_brace = json_string_to_parse.find('{')
-                                        end_brace = json_string_to_parse.rfind('}')
-                                        if start_brace != -1 and end_brace != -1 and end_brace > start_brace:
-                                            potential_json = json_string_to_parse[start_brace:end_brace+1]
-                                            # Basic check if it looks like JSON
-                                            if potential_json.strip().startswith('{') and potential_json.strip().endswith('}'):
-                                                if verbose:
-                                                    logger.debug(f"[DEBUG] Attempting to parse extracted JSON block: '{potential_json}'")
-                                                parsed_result = output_pydantic.model_validate_json(potential_json)
-                                            else:
-                                                # If block extraction fails, try cleaning markdown next
-                                                raise ValueError("Extracted block doesn't look like JSON")
+                                        # 1) Prefer fenced ```json blocks
+                                        fenced = _extract_fenced_json_block(raw_result)
+                                        candidates: List[str] = []
+                                        if fenced:
+                                            candidates.append(fenced)
                                         else:
-                                             # If no braces found, try cleaning markdown next
-                                            raise ValueError("Could not find enclosing {}")
+                                            # 2) Fall back to scanning for balanced JSON objects
+                                            candidates.extend(_extract_balanced_json_objects(raw_result))
+                                        if not candidates:
+                                            raise ValueError("No JSON-like content found")
+                                        parse_err: Optional[Exception] = None
+                                        for cand in candidates:
+                                            try:
+                                                if verbose:
+                                                    logger.debug(f"[DEBUG] Attempting to parse candidate JSON block: {cand}")
+                                                parsed_result = output_pydantic.model_validate_json(cand)
+                                                json_string_to_parse = cand
+                                                parse_err = None
+                                                break
+                                            except (json.JSONDecodeError, ValidationError, ValueError) as pe:
+                                                parse_err = pe
+                                        if parsed_result is None:
+                                            # If none of the candidates parsed, raise last error
+                                            if parse_err is not None:
+                                                raise parse_err
+                                            raise ValueError("Unable to parse any JSON candidates")
                                     except (json.JSONDecodeError, ValidationError, ValueError) as extraction_error:
                                         if verbose:
-                                            logger.debug(f"[DEBUG] JSON block extraction/validation failed ('{extraction_error}'). Trying markdown cleaning.")
-                                        # Fallback: Clean markdown fences and retry JSON validation
+                                            logger.debug(f"[DEBUG] JSON extraction/validation failed ('{extraction_error}'). Trying fence cleaning.")
+                                        # Last resort: strip any leading/trailing code fences and retry
                                         cleaned_result_str = raw_result.strip()
                                         if cleaned_result_str.startswith("```json"):
                                             cleaned_result_str = cleaned_result_str[7:]
@@ -1177,15 +1566,13 @@ def llm_invoke(
                                         if cleaned_result_str.endswith("```"):
                                             cleaned_result_str = cleaned_result_str[:-3]
                                         cleaned_result_str = cleaned_result_str.strip()
-                                        # Check again if it looks like JSON before parsing
                                         if cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}'):
                                             if verbose:
-                                                logger.debug(f"[DEBUG] Attempting parse after cleaning markdown fences. Cleaned string: '{cleaned_result_str}'")
-                                            json_string_to_parse = cleaned_result_str # Update string for error reporting
+                                                logger.debug(f"[DEBUG] Attempting parse after generic fence cleaning. Cleaned string: '{cleaned_result_str}'")
+                                            json_string_to_parse = cleaned_result_str
                                             parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
                                         else:
-                                            # If still doesn't look like JSON, raise error
-                                            raise ValueError("Content after cleaning markdown doesn't look like JSON")
+                                            raise ValueError("Content after cleaning doesn't look like JSON")
                                 # Check if any parsing attempt succeeded
@@ -1319,7 +1706,7 @@ if __name__ == "__main__":
         response = llm_invoke(
             prompt="Tell me a short joke about {topic}.",
             input_json={"topic": "programmers"},
-            strength=0.5, # Use base model (gpt-4.1-nano)
+            strength=0.5, # Use base model (gpt-5-nano)
             temperature=0.7,
             verbose=True
         )
@@ -1400,7 +1787,7 @@ if __name__ == "__main__":
             {"role": "system", "content": "You are a helpful assistant."},
             {"role": "user", "content": "What is the capital of France?"}
         ]
-        # Strength 0.5 should select gpt-4.1-nano
+        # Strength 0.5 should select gpt-5-nano
         response_messages = llm_invoke(
             messages=custom_messages,
             strength=0.5,

pdd-cli 0.0.48__py3-none-any.whl → 0.0.50__py3-none-any.whl

Potentially problematic release.

pdd-cli 0.0.48py3-none-any.whl → 0.0.50py3-none-any.whl