PyPI - edsl - Versions diffs - 0.1.58__py3-none-any.whl → 0.1.60__py3-none-any.whl - Mend

edsl 0.1.58py3-none-any.whl → 0.1.60py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

edsl/__version__.py +1 -1
edsl/agents/agent.py +23 -4
edsl/agents/agent_list.py +36 -6
edsl/base/data_transfer_models.py +5 -0
edsl/base/enums.py +7 -2
edsl/coop/coop.py +103 -1
edsl/dataset/dataset.py +74 -0
edsl/dataset/dataset_operations_mixin.py +69 -64
edsl/inference_services/services/__init__.py +3 -1
edsl/inference_services/services/open_ai_service_v2.py +243 -0
edsl/inference_services/services/test_service.py +1 -1
edsl/interviews/exception_tracking.py +66 -20
edsl/invigilators/invigilators.py +5 -1
edsl/invigilators/prompt_constructor.py +299 -136
edsl/jobs/data_structures.py +3 -0
edsl/jobs/html_table_job_logger.py +18 -1
edsl/jobs/jobs_pricing_estimation.py +6 -2
edsl/jobs/jobs_remote_inference_logger.py +2 -0
edsl/jobs/remote_inference.py +34 -7
edsl/key_management/key_lookup_builder.py +25 -3
edsl/language_models/language_model.py +41 -3
edsl/language_models/raw_response_handler.py +126 -7
edsl/prompts/prompt.py +1 -0
edsl/questions/question_list.py +76 -20
edsl/results/result.py +37 -0
edsl/results/results.py +9 -1
edsl/scenarios/file_store.py +8 -12
edsl/scenarios/scenario.py +50 -2
edsl/scenarios/scenario_list.py +34 -12
edsl/surveys/survey.py +4 -0
edsl/tasks/task_history.py +180 -6
edsl/utilities/wikipedia.py +194 -0
{edsl-0.1.58.dist-info → edsl-0.1.60.dist-info}/METADATA +5 -4
{edsl-0.1.58.dist-info → edsl-0.1.60.dist-info}/RECORD +37 -35
{edsl-0.1.58.dist-info → edsl-0.1.60.dist-info}/LICENSE +0 -0
{edsl-0.1.58.dist-info → edsl-0.1.60.dist-info}/WHEEL +0 -0
{edsl-0.1.58.dist-info → edsl-0.1.60.dist-info}/entry_points.txt +0 -0

edsl/jobs/data_structures.py CHANGED Viewed

@@ -213,6 +213,9 @@ class Answers(UserDict):
         if comment:
             self[question.question_name + "_comment"] = comment
+        if getattr(response, "reasoning_summary", None):
+            self[question.question_name + "_reasoning_summary"] = response.reasoning_summary
     def replace_missing_answers_with_none(self, survey: "Survey") -> None:
         """
         Replace missing answers with None for all questions in the survey.

edsl/jobs/html_table_job_logger.py CHANGED Viewed

@@ -217,6 +217,17 @@ class HTMLTableJobLogger(JobLogger):
         )
         total_cost = total_input_cost + total_output_cost
+        # Calculate credit totals
+        total_input_credits = sum(
+            cost.input_cost_credits_with_cache or 0
+            for cost in self.jobs_info.model_costs
+        )
+        total_output_credits = sum(
+            cost.output_cost_credits_with_cache or 0
+            for cost in self.jobs_info.model_costs
+        )
+        total_credits = total_input_credits + total_output_credits
         # Generate cost rows HTML with class names for right alignment
         cost_rows = "".join(
             f"""
@@ -228,6 +239,7 @@ class HTMLTableJobLogger(JobLogger):
                 <td class='token-count'>{cost.output_tokens:,}</td>
                 <td class='cost-value'>${cost.output_cost_usd:.4f}</td>
                 <td class='cost-value'>${(cost.input_cost_usd or 0) + (cost.output_cost_usd or 0):.4f}</td>
+                <td class='cost-value'>{(cost.input_cost_credits_with_cache or 0) + (cost.output_cost_credits_with_cache or 0):,.2f}</td>
             </tr>
             """
             for cost in self.jobs_info.model_costs
@@ -242,6 +254,7 @@ class HTMLTableJobLogger(JobLogger):
                 <td class='token-count'>{total_output_tokens:,}</td>
                 <td class='cost-value'>${total_output_cost:.4f}</td>
                 <td class='cost-value'>${total_cost:.4f}</td>
+                <td class='cost-value'>{total_credits:,.2f}</td>
             </tr>
         """
@@ -249,7 +262,7 @@ class HTMLTableJobLogger(JobLogger):
         <div class="model-costs-section">
             <div class="model-costs-header" onclick="{self._collapse(f'model-costs-content-{self.log_id}', f'model-costs-arrow-{self.log_id}')}">
                 <span id="model-costs-arrow-{self.log_id}" class="expand-toggle">&#8963;</span>
-                <span>Model Costs (${total_cost:.4f} total)</span>
+                <span>Model Costs (${total_cost:.4f} / {total_credits:,.2f} credits total)</span>
                 <span style="flex-grow: 1;"></span>
             </div>
             <div id="model-costs-content-{self.log_id}" class="model-costs-content">
@@ -263,6 +276,7 @@ class HTMLTableJobLogger(JobLogger):
                             <th class="cost-header">Output Tokens</th>
                             <th class="cost-header">Output Cost</th>
                             <th class="cost-header">Total Cost</th>
+                            <th class="cost-header">Total Credits</th>
                         </tr>
                     </thead>
                     <tbody>
@@ -270,6 +284,9 @@ class HTMLTableJobLogger(JobLogger):
                         {total_row}
                     </tbody>
                 </table>
+                <p style="font-style: italic; margin-top: 8px; font-size: 0.85em; color: #4b5563;">
+                    You can obtain the total credit cost by multiplying the total USD cost by 100. A lower credit cost indicates that you saved money by retrieving responses from the universal remote cache.
+                </p>
             </div>
         </div>
         """

edsl/jobs/jobs_pricing_estimation.py CHANGED Viewed

@@ -88,7 +88,6 @@ class PromptCostEstimator:
 class JobsPrompts:
     relevant_keys = [
         "user_prompt",
         "system_prompt",
@@ -171,13 +170,18 @@ class JobsPrompts:
         cost = prompt_cost["cost_usd"]
         # Generate cache keys for each iteration
+        files_list = prompts.get("files_list", None)
+        if files_list:
+            files_hash = "+".join([str(hash(file)) for file in files_list])
+            user_prompt_with_hashes = user_prompt + f" {files_hash}"
         cache_keys = []
         for iteration in range(iterations):
             cache_key = CacheEntry.gen_key(
                 model=model,
                 parameters=invigilator.model.parameters,
                 system_prompt=system_prompt,
-                user_prompt=user_prompt,
+                user_prompt=user_prompt_with_hashes if files_list else user_prompt,
                 iteration=iteration,
             )
             cache_keys.append(cache_key)

edsl/jobs/jobs_remote_inference_logger.py CHANGED Viewed

@@ -40,6 +40,8 @@ class ModelCost:
     input_cost_usd: float = None
     output_tokens: int = None
     output_cost_usd: float = None
+    input_cost_credits_with_cache: int = None
+    output_cost_credits_with_cache: int = None
 @dataclass

edsl/jobs/remote_inference.py CHANGED Viewed

@@ -279,9 +279,7 @@ class JobsRemoteInferenceHandler:
         )
         time.sleep(self.poll_interval)
-    def _get_expenses_from_results(
-        self, results: "Results", include_cached_responses_in_cost: bool = False
-    ) -> dict:
+    def _get_expenses_from_results(self, results: "Results") -> dict:
         """
         Calculates expenses from Results object.
@@ -309,10 +307,6 @@ class JobsRemoteInferenceHandler:
                 question_name = key.removesuffix("_cost")
                 cache_used = result["cache_used_dict"][question_name]
-                # Skip if we're excluding cached responses and this was cached
-                if not include_cached_responses_in_cost and cache_used:
-                    continue
                 # Get expense keys for input and output tokens
                 input_key = (
                     result["model"]._inference_service_,
@@ -332,6 +326,7 @@ class JobsRemoteInferenceHandler:
                     expenses[input_key] = {
                         "tokens": 0,
                         "cost_usd": 0,
+                        "cost_usd_with_cache": 0,
                     }
                 input_price_per_million_tokens = input_key[3]
@@ -341,11 +336,15 @@ class JobsRemoteInferenceHandler:
                 expenses[input_key]["tokens"] += input_tokens
                 expenses[input_key]["cost_usd"] += input_cost
+                if not cache_used:
+                    expenses[input_key]["cost_usd_with_cache"] += input_cost
                 # Update output token expenses
                 if output_key not in expenses:
                     expenses[output_key] = {
                         "tokens": 0,
                         "cost_usd": 0,
+                        "cost_usd_with_cache": 0,
                     }
                 output_price_per_million_tokens = output_key[3]
@@ -357,6 +356,9 @@ class JobsRemoteInferenceHandler:
                 expenses[output_key]["tokens"] += output_tokens
                 expenses[output_key]["cost_usd"] += output_cost
+                if not cache_used:
+                    expenses[output_key]["cost_usd_with_cache"] += output_cost
         expenses_by_model = {}
         for expense_key, expense_usage in expenses.items():
             service, model, token_type, _ = expense_key
@@ -368,8 +370,10 @@ class JobsRemoteInferenceHandler:
                     "model": model,
                     "input_tokens": 0,
                     "input_cost_usd": 0,
+                    "input_cost_usd_with_cache": 0,
                     "output_tokens": 0,
                     "output_cost_usd": 0,
+                    "output_cost_usd_with_cache": 0,
                 }
             if token_type == "input":
@@ -377,14 +381,22 @@ class JobsRemoteInferenceHandler:
                 expenses_by_model[model_key]["input_cost_usd"] += expense_usage[
                     "cost_usd"
                 ]
+                expenses_by_model[model_key][
+                    "input_cost_usd_with_cache"
+                ] += expense_usage["cost_usd_with_cache"]
             elif token_type == "output":
                 expenses_by_model[model_key]["output_tokens"] += expense_usage["tokens"]
                 expenses_by_model[model_key]["output_cost_usd"] += expense_usage[
                     "cost_usd"
                 ]
+                expenses_by_model[model_key][
+                    "output_cost_usd_with_cache"
+                ] += expense_usage["cost_usd_with_cache"]
         converter = CostConverter()
         for model_key, model_cost_dict in expenses_by_model.items():
+            # Handle full cost (without cache)
             input_cost = model_cost_dict["input_cost_usd"]
             output_cost = model_cost_dict["output_cost_usd"]
             model_cost_dict["input_cost_credits"] = converter.usd_to_credits(input_cost)
@@ -399,6 +411,15 @@ class JobsRemoteInferenceHandler:
                 model_cost_dict["output_cost_credits"]
             )
+            # Handle cost with cache
+            input_cost_with_cache = model_cost_dict["input_cost_usd_with_cache"]
+            output_cost_with_cache = model_cost_dict["output_cost_usd_with_cache"]
+            model_cost_dict["input_cost_credits_with_cache"] = converter.usd_to_credits(
+                input_cost_with_cache
+            )
+            model_cost_dict["output_cost_credits_with_cache"] = (
+                converter.usd_to_credits(output_cost_with_cache)
+            )
         return list(expenses_by_model.values())
     def _fetch_results_and_log(
@@ -423,6 +444,12 @@ class JobsRemoteInferenceHandler:
                 input_cost_usd=model_cost_dict.get("input_cost_usd"),
                 output_tokens=model_cost_dict.get("output_tokens"),
                 output_cost_usd=model_cost_dict.get("output_cost_usd"),
+                input_cost_credits_with_cache=model_cost_dict.get(
+                    "input_cost_credits_with_cache"
+                ),
+                output_cost_credits_with_cache=model_cost_dict.get(
+                    "output_cost_credits_with_cache"
+                ),
             )
             for model_cost_dict in model_cost_dicts
         ]

edsl/key_management/key_lookup_builder.py CHANGED Viewed

@@ -363,13 +363,35 @@ class KeyLookupBuilder:
         >>> builder._add_api_key("OPENAI_API_KEY", "sk-1234", "env")
         >>> 'sk-1234' == builder.key_data["openai"][-1].value
         True
+        >>> 'sk-1234' == builder.key_data["openai_v2"][-1].value
+        True
         """
         service = api_keyname_to_service[key]
         new_entry = APIKeyEntry(service=service, name=key, value=value, source=source)
-        if service not in self.key_data:
-            self.key_data[service] = [new_entry]
+        # Special case for OPENAI_API_KEY - add to both openai and openai_v2
+        if key == "OPENAI_API_KEY":
+            # Add to openai service
+            openai_service = "openai"
+            openai_entry = APIKeyEntry(service=openai_service, name=key, value=value, source=source)
+            if openai_service not in self.key_data:
+                self.key_data[openai_service] = [openai_entry]
+            else:
+                self.key_data[openai_service].append(openai_entry)
+            # Add to openai_v2 service
+            openai_v2_service = "openai_v2"
+            openai_v2_entry = APIKeyEntry(service=openai_v2_service, name=key, value=value, source=source)
+            if openai_v2_service not in self.key_data:
+                self.key_data[openai_v2_service] = [openai_v2_entry]
+            else:
+                self.key_data[openai_v2_service].append(openai_v2_entry)
         else:
-            self.key_data[service].append(new_entry)
+            # Normal case for all other API keys
+            if service not in self.key_data:
+                self.key_data[service] = [new_entry]
+            else:
+                self.key_data[service].append(new_entry)
     def update_from_dict(self, d: dict) -> None:
         """

edsl/language_models/language_model.py CHANGED Viewed

@@ -174,7 +174,8 @@ class LanguageModel(
         """
         key_sequence = cls.key_sequence
         usage_sequence = cls.usage_sequence if hasattr(cls, "usage_sequence") else None
-        return RawResponseHandler(key_sequence, usage_sequence)
+        reasoning_sequence = cls.reasoning_sequence if hasattr(cls, "reasoning_sequence") else None
+        return RawResponseHandler(key_sequence, usage_sequence, reasoning_sequence)
     def __init__(
         self,
@@ -769,8 +770,45 @@ class LanguageModel(
                 params["question_name"] = invigilator.question.question_name
             # Get timeout from configuration
             from ..config import CONFIG
-            TIMEOUT = float(CONFIG.get("EDSL_API_TIMEOUT"))
+            import logging
+            logger = logging.getLogger(__name__)
+            base_timeout = float(CONFIG.get("EDSL_API_TIMEOUT"))
+            # Adjust timeout if files are present
+            import time
+            start = time.time()
+            if files_list:
+                # Calculate total size of attached files in MB
+                file_sizes = []
+                for file in files_list:
+                    # Try different attributes that might contain the file content
+                    if hasattr(file, "base64_string") and file.base64_string:
+                        file_sizes.append(len(file.base64_string) / (1024 * 1024))
+                    elif hasattr(file, "content") and file.content:
+                        file_sizes.append(len(file.content) / (1024 * 1024))
+                    elif hasattr(file, "data") and file.data:
+                        file_sizes.append(len(file.data) / (1024 * 1024))
+                    else:
+                        # Default minimum size if we can't determine actual size
+                        file_sizes.append(1)  # Assume at least 1MB
+                total_size_mb = sum(file_sizes)
+                # Increase timeout proportionally to file size
+                # For each MB of file size, add 10 seconds to the timeout (adjust as needed)
+                size_adjustment = total_size_mb * 10
+                # Cap the maximum timeout adjustment at 5 minutes (300 seconds)
+                size_adjustment = min(size_adjustment, 300)
+                TIMEOUT = base_timeout + size_adjustment
+                logger.info(
+                    f"Adjusted timeout for API call with {len(files_list)} files (total size: {total_size_mb:.2f}MB). Base timeout: {base_timeout}s, New timeout: {TIMEOUT}s"
+                )
+            else:
+                TIMEOUT = base_timeout
             # Execute the model call with timeout
             response = await asyncio.wait_for(f(**params), timeout=TIMEOUT)

edsl/language_models/raw_response_handler.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
-from typing import Optional, Any
+from typing import Optional, Any, List
 from .exceptions import (
     LanguageModelBadResponseError,
     LanguageModelTypeError,
@@ -41,10 +41,13 @@ def _extract_item_from_raw_response(data, sequence):
             current_data = current_data[key]
         except Exception as e:
             path = " -> ".join(map(str, sequence[: i + 1]))
-            if "error" in data:
-                msg = data["error"]
+            # Create a safe error message that won't be None
+            if "error" in data and data["error"] is not None:
+                msg = str(data["error"])
             else:
                 msg = f"Error accessing path: {path}. {str(e)}. Full response is: '{data}'"
             raise LanguageModelBadResponseError(message=msg, response_json=data)
     if isinstance(current_data, str):
         return current_data.strip()
@@ -55,17 +58,127 @@ def _extract_item_from_raw_response(data, sequence):
 class RawResponseHandler:
     """Class to handle raw responses from language models."""
-    def __init__(self, key_sequence: list, usage_sequence: Optional[list] = None):
+    def __init__(self, key_sequence: list, usage_sequence: Optional[list] = None, reasoning_sequence: Optional[list] = None):
         self.key_sequence = key_sequence
         self.usage_sequence = usage_sequence
+        self.reasoning_sequence = reasoning_sequence
     def get_generated_token_string(self, raw_response):
-        return _extract_item_from_raw_response(raw_response, self.key_sequence)
+        try:
+            return _extract_item_from_raw_response(raw_response, self.key_sequence)
+        except (LanguageModelKeyError, LanguageModelIndexError, LanguageModelTypeError, LanguageModelBadResponseError) as e:
+            # For non-reasoning models or reasoning models with different response formats,
+            # try to extract text directly from common response formats
+            if isinstance(raw_response, dict):
+                # Responses API format for non-reasoning models
+                if 'output' in raw_response and isinstance(raw_response['output'], list):
+                    # Try to get first message content
+                    if len(raw_response['output']) > 0:
+                        item = raw_response['output'][0]
+                        if isinstance(item, dict) and 'content' in item:
+                            if isinstance(item['content'], list) and len(item['content']) > 0:
+                                first_content = item['content'][0]
+                                if isinstance(first_content, dict) and 'text' in first_content:
+                                    return first_content['text']
+                            elif isinstance(item['content'], str):
+                                return item['content']
+                # OpenAI completions format
+                if 'choices' in raw_response and isinstance(raw_response['choices'], list) and len(raw_response['choices']) > 0:
+                    choice = raw_response['choices'][0]
+                    if isinstance(choice, dict):
+                        if 'text' in choice:
+                            return choice['text']
+                        elif 'message' in choice and isinstance(choice['message'], dict) and 'content' in choice['message']:
+                            return choice['message']['content']
+                # Text directly in response
+                if 'text' in raw_response:
+                    return raw_response['text']
+                elif 'content' in raw_response:
+                    return raw_response['content']
+                # Error message - try to return a coherent error for debugging
+                if 'message' in raw_response:
+                    return f"[ERROR: {raw_response['message']}]"
+            # If we get a string directly, return it
+            if isinstance(raw_response, str):
+                return raw_response
+            # As a last resort, convert the whole response to string
+            try:
+                return f"[ERROR: Could not extract text. Raw response: {str(raw_response)}]"
+            except:
+                return "[ERROR: Could not extract text from response]"
     def get_usage_dict(self, raw_response):
         if self.usage_sequence is None:
             return {}
-        return _extract_item_from_raw_response(raw_response, self.usage_sequence)
+        try:
+            return _extract_item_from_raw_response(raw_response, self.usage_sequence)
+        except (LanguageModelKeyError, LanguageModelIndexError, LanguageModelTypeError, LanguageModelBadResponseError):
+            # For non-reasoning models, try to extract usage from common response formats
+            if isinstance(raw_response, dict):
+                # Standard OpenAI usage format
+                if 'usage' in raw_response:
+                    return raw_response['usage']
+                # Look for nested usage info
+                if 'choices' in raw_response and len(raw_response['choices']) > 0:
+                    choice = raw_response['choices'][0]
+                    if isinstance(choice, dict) and 'usage' in choice:
+                        return choice['usage']
+            # If no usage info found, return empty dict
+            return {}
+    def get_reasoning_summary(self, raw_response):
+        """
+        Extract reasoning summary from the model response.
+        Handles various response structures:
+        1. Standard path extraction using self.reasoning_sequence
+        2. Direct access to output[0]['summary'] for OpenAI responses
+        3. List responses where the first item contains the output structure
+        """
+        if self.reasoning_sequence is None:
+            return None
+        try:
+            # First try the standard extraction path
+            summary_data = _extract_item_from_raw_response(raw_response, self.reasoning_sequence)
+            # If summary_data is a list of dictionaries with 'text' and 'type' fields
+            # (as in OpenAI's response format), combine them into a single string
+            if isinstance(summary_data, list) and all(isinstance(item, dict) and 'text' in item for item in summary_data):
+                return '\n\n'.join(item['text'] for item in summary_data)
+            return summary_data
+        except Exception:
+            # Fallback approaches for different response structures
+            try:
+                # Case 1: Direct dict with 'output' field (common OpenAI format)
+                if isinstance(raw_response, dict) and 'output' in raw_response:
+                    output = raw_response['output']
+                    if isinstance(output, list) and len(output) > 0 and 'summary' in output[0]:
+                        summary_data = output[0]['summary']
+                        if isinstance(summary_data, list) and all(isinstance(item, dict) and 'text' in item for item in summary_data):
+                            return '\n\n'.join(item['text'] for item in summary_data)
+                # Case 2: List where the first item is a dict with 'output' field
+                if isinstance(raw_response, list) and len(raw_response) > 0:
+                    first_item = raw_response[0]
+                    if isinstance(first_item, dict) and 'output' in first_item:
+                        output = first_item['output']
+                        if isinstance(output, list) and len(output) > 0 and 'summary' in output[0]:
+                            summary_data = output[0]['summary']
+                            if isinstance(summary_data, list) and all(isinstance(item, dict) and 'text' in item for item in summary_data):
+                                return '\n\n'.join(item['text'] for item in summary_data)
+            except Exception:
+                pass
+            return None
     def parse_response(self, raw_response: dict[str, Any]) -> Any:
         """Parses the API response and returns the response text."""
@@ -73,7 +186,11 @@ class RawResponseHandler:
         from edsl.data_transfer_models import EDSLOutput
         generated_token_string = self.get_generated_token_string(raw_response)
+        # Ensure generated_token_string is a string before using string methods
+        if not isinstance(generated_token_string, str):
+            generated_token_string = str(generated_token_string)
         last_newline = generated_token_string.rfind("\n")
+        reasoning_summary = self.get_reasoning_summary(raw_response)
         if last_newline == -1:
             # There is no comment
@@ -81,12 +198,14 @@ class RawResponseHandler:
                 "answer": self.convert_answer(generated_token_string),
                 "generated_tokens": generated_token_string,
                 "comment": None,
+                "reasoning_summary": reasoning_summary,
             }
         else:
             edsl_dict = {
                 "answer": self.convert_answer(generated_token_string[:last_newline]),
-                "comment": generated_token_string[last_newline + 1 :].strip(),
+                "comment": generated_token_string[last_newline + 1:].strip(),
                 "generated_tokens": generated_token_string,
+                "reasoning_summary": reasoning_summary,
             }
         return EDSLOutput(**edsl_dict)

edsl/prompts/prompt.py CHANGED Viewed

@@ -290,6 +290,7 @@ class Prompt(PersistenceMixin, RepresentationMixin):
             return result
         except Exception as e:
             print(f"Error rendering prompt: {e}")
+            raise e
             return self
     @staticmethod

edsl/questions/question_list.py CHANGED Viewed

@@ -299,23 +299,24 @@ class ListResponseValidator(ResponseValidatorABC):
         # This method can now be removed since validation is handled in the Pydantic model
         pass
-    def fix(self, response, verbose=False):
+    def fix(self, response, verbose=False) -> dict[str, Any]:
         """
         Fix common issues in list responses by splitting strings into lists.
         Examples:
             >>> from edsl import QuestionList
-            >>> q = QuestionList.example(min_list_items=2, max_list_items=4)
-            >>> validator = q.response_validator
+            >>> q_constrained = QuestionList.example(min_list_items=2, max_list_items=4)
+            >>> validator_constrained = q_constrained.response_validator
+            >>> q_permissive = QuestionList.example(permissive=True)
+            >>> validator_permissive = q_permissive.response_validator
             >>> # Fix a string that should be a list
             >>> bad_response = {"answer": "apple,banana,cherry"}
-            >>> try:
-            ...     validator.validate(bad_response)
-            ... except Exception:
-            ...     fixed = validator.fix(bad_response)
-            ...     validated = validator.validate(fixed)
-            ...     validated  # Show full response
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': ['apple', 'banana', 'cherry']}
+            >>> validator_constrained.validate(fixed)  # Show full response after validation
             {'answer': ['apple', 'banana', 'cherry'], 'comment': None, 'generated_tokens': None}
             >>> # Fix using generated_tokens when answer is invalid
@@ -323,12 +324,10 @@ class ListResponseValidator(ResponseValidatorABC):
             ...     "answer": None,
             ...     "generated_tokens": "pizza, pasta, salad"
             ... }
-            >>> try:
-            ...     validator.validate(bad_response)
-            ... except Exception:
-            ...     fixed = validator.fix(bad_response)
-            ...     validated = validator.validate(fixed)
-            ...     validated
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': ['pizza', ' pasta', ' salad']}
+            >>> validator_constrained.validate(fixed)
             {'answer': ['pizza', ' pasta', ' salad'], 'comment': None, 'generated_tokens': None}
             >>> # Preserve comments during fixing
@@ -336,17 +335,74 @@ class ListResponseValidator(ResponseValidatorABC):
             ...     "answer": "red,blue,green",
             ...     "comment": "These are colors"
             ... }
-            >>> fixed = validator.fix(bad_response)
-            >>> fixed == {
+            >>> fixed_output = validator_constrained.fix(bad_response)
+            >>> fixed_output
+            {'answer': ['red', 'blue', 'green'], 'comment': 'These are colors'}
+            >>> validated_output = validator_constrained.validate(fixed_output)
+            >>> validated_output == {
             ...     "answer": ["red", "blue", "green"],
-            ...     "comment": "These are colors"
+            ...     "comment": "These are colors",
+            ...     "generated_tokens": None
             ... }
             True
+            >>> # Fix an empty string answer
+            >>> bad_response = {"answer": ""}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': []}
+            >>> validator_permissive.validate(fixed)
+            {'answer': [], 'comment': None, 'generated_tokens': None}
+            >>> # Fix a single item string answer (no commas)
+            >>> bad_response = {"answer": "single_item"}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': ['single_item']}
+            >>> validator_permissive.validate(fixed)
+            {'answer': ['single_item'], 'comment': None, 'generated_tokens': None}
+            >>> # Fix when answer is None and no generated_tokens
+            >>> bad_response = {"answer": None}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': []}
+            >>> validator_permissive.validate(fixed)
+            {'answer': [], 'comment': None, 'generated_tokens': None}
+            >>> # Fix when answer key is missing but generated_tokens is present
+            >>> bad_response = {"generated_tokens": "token1,token2"}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': ['token1', 'token2']}
+            >>> validator_constrained.validate(fixed) # 2 items, OK for constrained validator
+            {'answer': ['token1', 'token2'], 'comment': None, 'generated_tokens': None}
+            >>> # Fix when answer key is missing and generated_tokens is an empty string
+            >>> bad_response = {"generated_tokens": ""}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': []}
+            >>> validator_permissive.validate(fixed)
+            {'answer': [], 'comment': None, 'generated_tokens': None}
+            >>> # Fix when answer key is missing and generated_tokens is a single item
+            >>> bad_response = {"generated_tokens": "single_token"}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': ['single_token']}
+            >>> validator_permissive.validate(fixed)
+            {'answer': ['single_token'], 'comment': None, 'generated_tokens': None}
         """
         if verbose:
             print(f"Fixing list response: {response}")
         answer = str(response.get("answer") or response.get("generated_tokens", ""))
-        result = {"answer": answer.split(",")}
+        if "," in answer:
+            result = {"answer": answer.split(",")}
+        elif answer == "":
+            result = {"answer": []}
+        else:
+            result = {"answer": [answer]}
         if "comment" in response:
             result["comment"] = response["comment"]
         return result
@@ -395,7 +451,7 @@ class QuestionList(QuestionBase):
         self.include_comment = include_comment
         self.answering_instructions = answering_instructions
-        self.question_presentations = question_presentation
+        self.question_presentation = question_presentation
     def create_response_model(self):
         return create_model(self.min_list_items, self.max_list_items, self.permissive)

edsl 0.1.58__py3-none-any.whl → 0.1.60__py3-none-any.whl

edsl 0.1.58py3-none-any.whl → 0.1.60py3-none-any.whl