PyPI - edsl - Versions diffs - 0.1.51__py3-none-any.whl → 0.1.53__py3-none-any.whl - Mend

edsl 0.1.51py3-none-any.whl → 0.1.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

edsl/__init__.py +45 -34
edsl/__version__.py +1 -1
edsl/conversation/Conversation.py +2 -1
edsl/coop/coop.py +2 -0
edsl/interviews/answering_function.py +20 -21
edsl/interviews/exception_tracking.py +4 -3
edsl/interviews/interview_task_manager.py +5 -2
edsl/interviews/request_token_estimator.py +104 -2
edsl/invigilators/invigilators.py +37 -4
edsl/jobs/html_table_job_logger.py +494 -257
edsl/jobs/jobs_status_enums.py +1 -0
edsl/jobs/remote_inference.py +46 -12
edsl/language_models/language_model.py +148 -146
edsl/results/results.py +31 -2
edsl/scenarios/file_store.py +73 -23
edsl/tasks/task_history.py +45 -8
edsl/templates/error_reporting/base.html +37 -4
edsl/templates/error_reporting/exceptions_table.html +105 -33
edsl/templates/error_reporting/interview_details.html +130 -126
edsl/templates/error_reporting/overview.html +21 -25
edsl/templates/error_reporting/report.css +215 -46
edsl/templates/error_reporting/report.js +122 -20
{edsl-0.1.51.dist-info → edsl-0.1.53.dist-info}/METADATA +1 -1
{edsl-0.1.51.dist-info → edsl-0.1.53.dist-info}/RECORD +27 -27
{edsl-0.1.51.dist-info → edsl-0.1.53.dist-info}/LICENSE +0 -0
{edsl-0.1.51.dist-info → edsl-0.1.53.dist-info}/WHEEL +0 -0
{edsl-0.1.51.dist-info → edsl-0.1.53.dist-info}/entry_points.txt +0 -0

edsl/__init__.py CHANGED Viewed

@@ -15,39 +15,42 @@ from edsl import logger
 # Set up logger with configuration from environment/config
 # (We'll configure the logger after CONFIG is initialized below)
-__all__ = ['logger']
+__all__ = ["logger"]
 # Define modules to import
 modules_to_import = [
-    'dataset',
-    'agents',
-    'surveys',
-    'questions',
-    'scenarios',
-    'language_models',
-    'results',
-    'caching',
-    'notebooks',
-    'coop',
-    'instructions',
-    'jobs'
+    "dataset",
+    "agents",
+    "surveys",
+    "questions",
+    "scenarios",
+    "language_models",
+    "results",
+    "caching",
+    "notebooks",
+    "coop",
+    "instructions",
+    "jobs",
+    "conversation",
 ]
 # Dynamically import modules and extend __all__
 for module_name in modules_to_import:
     try:
         # Import the module
-        module = importlib.import_module(f'.{module_name}', package='edsl')
+        module = importlib.import_module(f".{module_name}", package="edsl")
         # Get the module's __all__ attribute
-        module_all = getattr(module, '__all__', [])
+        module_all = getattr(module, "__all__", [])
         # Import all names from the module
         exec(f"from .{module_name} import *")
         # Extend __all__ with the module's __all__
         if module_all:
-            logger.debug(f"Adding {len(module_all)} items from {module_name} to __all__")
+            logger.debug(
+                f"Adding {len(module_all)} items from {module_name} to __all__"
+            )
             __all__.extend(module_all)
         else:
             logger.warning(f"Module {module_name} does not have __all__ defined")
@@ -61,39 +64,43 @@ for module_name in modules_to_import:
 try:
     from edsl.load_plugins import load_plugins
     from edsl.plugins import get_plugin_manager, get_exports
     # Load all plugins
     plugins = load_plugins()
     logger.info(f"Loaded {len(plugins)} plugins")
     # Add plugins to globals and __all__
     for plugin_name, plugin in plugins.items():
         globals()[plugin_name] = plugin
         __all__.append(plugin_name)
         logger.info(f"Registered plugin {plugin_name} in global namespace")
     # Get exports from plugins and add them to globals
     exports = get_exports()
     logger.info(f"Found {len(exports)} exported objects from plugins")
     for name, obj in exports.items():
         globals()[name] = obj
         __all__.append(name)
         logger.info(f"Added plugin export: {name}")
     # Add placeholders for expected exports that are missing
     # This maintains backward compatibility for common plugins
     PLUGIN_PLACEHOLDERS = {
         # No placeholders - removed Conjure for cleaner namespace
     }
     for placeholder_name, github_url in PLUGIN_PLACEHOLDERS.items():
         if placeholder_name not in globals():
             # Create a placeholder class
-            placeholder_class = type(placeholder_name, (), {
-                "__getattr__": lambda self, name: self._not_installed(name),
-                "_not_installed": lambda self, name: self._raise_import_error(),
-                "_raise_import_error": lambda self: exec(f"""
+            placeholder_class = type(
+                placeholder_name,
+                (),
+                {
+                    "__getattr__": lambda self, name: self._not_installed(name),
+                    "_not_installed": lambda self, name: self._raise_import_error(),
+                    "_raise_import_error": lambda self: exec(
+                        f"""
 msg = (
     "The {placeholder_name} plugin is not installed. "
     "To use {placeholder_name} with EDSL, install it using:\\n"
@@ -104,13 +111,17 @@ msg = (
 )
 logger.warning(msg)
 raise ImportError(msg)
-""")
-            })
+"""
+                    ),
+                },
+            )
             # Register the placeholder
             globals()[placeholder_name] = placeholder_class()
             __all__.append(placeholder_name)
-            logger.info(f"Added placeholder for {placeholder_name} with installation instructions")
+            logger.info(
+                f"Added placeholder for {placeholder_name} with installation instructions"
+            )
 except ImportError as e:
     # Modules not available
@@ -127,8 +138,8 @@ logger.configure_from_config()
 # Installs a custom exception handling routine for edsl exceptions
 from .base.base_exception import BaseException
 BaseException.install_exception_hook()
 # Log the total number of items in __all__ for debugging
 logger.debug(f"EDSL initialization complete with {len(__all__)} items in __all__")

edsl/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.51"
1	+ __version__ = "0.1.53"

edsl/conversation/Conversation.py CHANGED Viewed

@@ -4,7 +4,7 @@ import inspect
 from typing import Optional, Callable, TYPE_CHECKING
 from .. import QuestionFreeText, Results, AgentList, ScenarioList, Scenario, Model
 from ..questions import QuestionBase
-from ..results.Result import Result
+from ..results.result import Result
 from jinja2 import Template
 from ..caching import Cache
@@ -124,6 +124,7 @@ What do you say next?"""
                 and "{{ round_message }}" not in next_statement_question.question_text
             ):
                 from .exceptions import ConversationValueError
                 raise ConversationValueError(
                     "If you pass in a per_round_message_template, you must include {{ round_message }} in the question_text."
                 )

edsl/coop/coop.py CHANGED Viewed

@@ -666,6 +666,8 @@ class Coop(CoopFunctionsMixin):
             )
         edsl_class = ObjectRegistry.object_type_to_edsl_class.get(object_type)
         object = edsl_class.from_dict(json.loads(json_string))
+        if object_type == "results":
+            object.initialize_cache_from_results()
         return object
     def get_all(self, object_type: ObjectType) -> list[dict[str, Any]]:

edsl/interviews/answering_function.py CHANGED Viewed

@@ -26,7 +26,6 @@ class RetryConfig:
 class SkipHandler:
     def __init__(self, interview: "Interview"):
         self.interview = interview
         self.question_index = self.interview.to_index
@@ -47,7 +46,7 @@ class SkipHandler:
     def _current_info_env(self) -> dict[str, Any]:
         """
-        - The current answers are "generated_tokens" and "comment"
+        - The current answers are "generated_tokens" and "comment"
         - The scenario should have "scenario." added to the keys
         - The agent traits should have "agent." added to the keys
         """
@@ -65,10 +64,14 @@ class SkipHandler:
                 processed_answers[f"{key}.answer"] = value
         # Process scenario dictionary
-        processed_scenario = {f"scenario.{k}": v for k, v in self.interview.scenario.items()}
+        processed_scenario = {
+            f"scenario.{k}": v for k, v in self.interview.scenario.items()
+        }
         # Process agent traits
-        processed_agent = {f"agent.{k}": v for k, v in self.interview.agent["traits"].items()}
+        processed_agent = {
+            f"agent.{k}": v for k, v in self.interview.agent["traits"].items()
+        }
         return processed_answers | processed_scenario | processed_agent
@@ -85,21 +88,22 @@ class SkipHandler:
         # )
         # Get the index of the next question, which could also be the end of the survey
-        next_question: Union[int, EndOfSurvey] = (
-            self.interview.survey.rule_collection.next_question(
-                q_now=current_question_index,
-                answers=answers,
-            )
+        next_question: Union[
+            int, EndOfSurvey
+        ] = self.interview.survey.rule_collection.next_question(
+            q_now=current_question_index,
+            answers=answers,
         )
         def cancel_between(start, end):
             """Cancel the tasks for questions between the start and end indices."""
             for i in range(start, end):
-                #print(f"Cancelling task {i}")
-                #self.interview.tasks[i].cancel()
-                #self.interview.tasks[i].set_result("skipped")
-                self.interview.skip_flags[self.interview.survey.questions[i].question_name] = True
+                # print(f"Cancelling task {i}")
+                # self.interview.tasks[i].cancel()
+                # self.interview.tasks[i].set_result("skipped")
+                self.interview.skip_flags[
+                    self.interview.survey.questions[i].question_name
+                ] = True
         if (next_question_index := next_question.next_q) == EndOfSurvey:
             cancel_between(
@@ -111,8 +115,6 @@ class SkipHandler:
             cancel_between(current_question_index + 1, next_question_index)
 class AnswerQuestionFunctionConstructor:
     """Constructs a function that answers a question and records the answer."""
@@ -137,7 +139,6 @@ class AnswerQuestionFunctionConstructor:
     ):
         """Handle an exception that occurred while answering a question."""
         answers = copy.copy(
             self.interview.answers
         )  # copy to freeze the answers here for logging
@@ -171,7 +172,6 @@ class AnswerQuestionFunctionConstructor:
         question: "QuestionBase",
         task=None,
     ) -> "EDSLResultObjectInput":
         from tenacity import (
             RetryError,
             retry,
@@ -196,7 +196,6 @@ class AnswerQuestionFunctionConstructor:
                 return invigilator.get_failed_task_result(
                     failure_reason="Question skipped."
                 )
             if self.skip_handler.should_skip(question):
                 return invigilator.get_failed_task_result(
                     failure_reason="Question skipped."
@@ -240,7 +239,6 @@ class AnswerQuestionFunctionConstructor:
                 raise LanguageModelNoResponseError(
                     f"Language model did not return a response for question '{question.question_name}.'"
                 )
             if (
                 question.question_name in self.interview.exceptions
                 and had_language_model_no_response_error
@@ -250,7 +248,8 @@ class AnswerQuestionFunctionConstructor:
             return response
         try:
-            return await attempt_answer()
+            out = await attempt_answer()
+            return out
         except RetryError as retry_error:
             original_error = retry_error.last_attempt.exception()
             self._handle_exception(

edsl/interviews/exception_tracking.py CHANGED Viewed

@@ -81,6 +81,7 @@ class InterviewExceptionEntry:
             raise_validation_errors=True,
             disable_remote_cache=True,
             disable_remote_inference=True,
+            cache=False,
         )
         return results.task_history.exceptions[0]["how_are_you"][0]
@@ -92,13 +93,13 @@ class InterviewExceptionEntry:
     def code(self, run=True):
         """Return the code to reproduce the exception."""
         lines = []
-        lines.append("from .. import Question, Model, Scenario, Agent")
+        lines.append("from edsl import Question, Model, Scenario, Agent")
         lines.append(f"q = {repr(self.invigilator.question)}")
         lines.append(f"scenario = {repr(self.invigilator.scenario)}")
         lines.append(f"agent = {repr(self.invigilator.agent)}")
-        lines.append(f"m = Model('{self.invigilator.model.model}')")
-        lines.append("results = q.by(m).by(agent).by(scenario).run()")
+        lines.append(f"model = {repr(self.invigilator.model)}")
+        lines.append("results = q.by(model).by(agent).by(scenario).run()")
         code_str = "\n".join(lines)
         if run:

edsl/interviews/interview_task_manager.py CHANGED Viewed

@@ -24,6 +24,7 @@ class InterviewTaskManager:
             for index, question_name in enumerate(self.survey.question_names)
         }
         self._task_status_log_dict = InterviewStatusLog()
+        self.survey_dag = None
     def build_question_tasks(
         self, answer_func, token_estimator, model_buckets
@@ -46,8 +47,9 @@ class InterviewTaskManager:
         self, existing_tasks: list[asyncio.Task], question: "QuestionBase"
     ) -> list[asyncio.Task]:
         """Get tasks that must be completed before the given question."""
-        dag = self.survey.dag(textify=True)
-        parents = dag.get(question.question_name, [])
+        if self.survey_dag is None:
+            self.survey_dag = self.survey.dag(textify=True)
+        parents = self.survey_dag.get(question.question_name, [])
         return [existing_tasks[self.to_index[parent_name]] for parent_name in parents]
     def _create_single_task(
@@ -100,4 +102,5 @@ class InterviewTaskManager:
 if __name__ == "__main__":
     import doctest
     doctest.testmod()

edsl/interviews/request_token_estimator.py CHANGED Viewed

@@ -1,6 +1,101 @@
 from ..jobs.fetch_invigilator import FetchInvigilator
 from ..scenarios import FileStore
+import math
+# Model configs: base tokens and tile tokens only
+VISION_MODELS = {
+    "gpt-4o": {
+        "base_tokens": 85,
+        "tile_tokens": 170,
+    },
+    "gpt-4o-mini": {
+        "base_tokens": 2833,
+        "tile_tokens": 5667,
+    },
+    "o1": {
+        "base_tokens": 75,
+        "tile_tokens": 150,
+    },
+}
+def approximate_image_tokens_google(width: int, height: int) -> int:
+    """
+    Approximates the token usage for an image based on its dimensions.
+    This calculation is based on the rules described for Gemini 2.0 models
+    in the provided text:
+    - Images with both dimensions <= 384px cost 258 tokens.
+    - Larger images are processed in 768x768 tiles, each costing 258 tokens.
+    Note: This is an *approximation*. The exact cropping, scaling, and tiling
+    strategy used by the actual Gemini API might differ slightly.
+    Args:
+        width: The width of the image in pixels.
+        height: The height of the image in pixels.
+    Returns:
+        An estimated integer token count for the image.
+    Raises:
+        ValueError: If width or height are not positive integers.
+    """
+    SMALL_IMAGE_THRESHOLD = 384  # Max dimension for fixed token count
+    FIXED_TOKEN_COST_SMALL = 258  # Token cost for small images (<= 384x384)
+    TILE_SIZE = 768  # Dimension of tiles for larger images
+    TOKEN_COST_PER_TILE = 258  # Token cost per 768x768 tile
+    if (
+        not isinstance(width, int)
+        or not isinstance(height, int)
+        or width <= 0
+        or height <= 0
+    ):
+        raise ValueError("Image width and height must be positive integers.")
+    # Case 1: Small image (both dimensions <= threshold)
+    if width <= SMALL_IMAGE_THRESHOLD and height <= SMALL_IMAGE_THRESHOLD:
+        return FIXED_TOKEN_COST_SMALL
+    # Case 2: Larger image (at least one dimension > threshold)
+    else:
+        # Calculate how many tiles are needed to cover the width and height
+        # Use ceiling division to ensure full coverage
+        tiles_wide = math.ceil(width / TILE_SIZE)
+        tiles_high = math.ceil(height / TILE_SIZE)
+        # Total number of tiles is the product of tiles needed in each dimension
+        total_tiles = tiles_wide * tiles_high
+        # Total token cost is the number of tiles times the cost per tile
+        estimated_tokens = total_tiles * TOKEN_COST_PER_TILE
+        return estimated_tokens
+def estimate_tokens(model_name, width, height):
+    if model_name == "test":
+        return 10  # for testing purposes
+    if "gemini" in model_name:
+        out = approximate_image_tokens_google(width, height)
+        return out
+    if "claude" in model_name:
+        total_tokens = width * height / 750
+        return total_tokens
+    if model_name not in VISION_MODELS:
+        total_tokens = width * height / 750
+        return total_tokens
+    config = VISION_MODELS[model_name]
+    TILE_SIZE = 512
+    tiles_x = math.ceil(width / TILE_SIZE)
+    tiles_y = math.ceil(height / TILE_SIZE)
+    total_tiles = tiles_x * tiles_y
+    total_tokens = config["base_tokens"] + config["tile_tokens"] * total_tiles
+    return total_tokens
 class RequestTokenEstimator:
     """Estimate the number of tokens that will be required to run the focal task."""
@@ -24,15 +119,22 @@ class RequestTokenEstimator:
             elif isinstance(prompt, list):
                 for file in prompt:
                     if isinstance(file, FileStore):
-                        file_tokens += file.size * 0.25
+                        if file.is_image():
+                            model_name = self.interview.model.model
+                            width, height = file.get_image_dimensions()
+                            token_usage = estimate_tokens(model_name, width, height)
+                            file_tokens += token_usage
+                        else:
+                            file_tokens += file.size * 0.25
             else:
                 from .exceptions import InterviewTokenError
                 raise InterviewTokenError(f"Prompt is of type {type(prompt)}")
         result: float = len(combined_text) / 4.0 + file_tokens
         return result
 if __name__ == "__main__":
     import doctest
     doctest.testmod(optionflags=doctest.ELLIPSIS)

edsl/invigilators/invigilators.py CHANGED Viewed

@@ -24,11 +24,11 @@ if TYPE_CHECKING:
     from ..key_management import KeyLookup
 PromptType = Literal["user_prompt", "system_prompt", "encoded_image", "files_list"]
 NA = "Not Applicable"
 class InvigilatorBase(ABC):
     """An invigiator (someone who administers an exam) is a class that is responsible for administering a question to an agent.
@@ -261,13 +261,14 @@ class InvigilatorBase(ABC):
             current_answers=current_answers,
         )
 class InvigilatorAI(InvigilatorBase):
     """An invigilator that uses an AI model to answer questions."""
     def get_prompts(self) -> Dict[PromptType, "Prompt"]:
         """Return the prompts used."""
         return self.prompt_constructor.get_prompts()
     def get_captured_variables(self) -> dict:
         """Get the captured variables."""
         return self.prompt_constructor.get_captured_variables()
@@ -281,6 +282,7 @@ class InvigilatorAI(InvigilatorBase):
         if "encoded_image" in prompts:
             params["encoded_image"] = prompts["encoded_image"]
             from .exceptions import InvigilatorNotImplementedError
             raise InvigilatorNotImplementedError("encoded_image not implemented")
         if "files_list" in prompts:
@@ -307,7 +309,8 @@ class InvigilatorAI(InvigilatorBase):
         """
         agent_response_dict: AgentResponseDict = await self.async_get_agent_response()
         self.store_response(agent_response_dict)
-        return self._extract_edsl_result_entry_and_validate(agent_response_dict)
+        out = self._extract_edsl_result_entry_and_validate(agent_response_dict)
+        return out
     def _remove_from_cache(self, cache_key) -> None:
         """Remove an entry from the cache."""
@@ -389,6 +392,35 @@ class InvigilatorAI(InvigilatorBase):
         edsl_dict = agent_response_dict.edsl_dict._asdict()
         exception_occurred = None
         validated = False
+        if agent_response_dict.model_outputs.cache_used:
+            data = {
+                "answer": agent_response_dict.edsl_dict.answer
+                if type(agent_response_dict.edsl_dict.answer) is str
+                or type(agent_response_dict.edsl_dict.answer) is dict
+                or type(agent_response_dict.edsl_dict.answer) is list
+                or type(agent_response_dict.edsl_dict.answer) is int
+                or type(agent_response_dict.edsl_dict.answer) is float
+                or type(agent_response_dict.edsl_dict.answer) is bool
+                else "",
+                "comment": agent_response_dict.edsl_dict.comment
+                if agent_response_dict.edsl_dict.comment
+                else "",
+                "generated_tokens": agent_response_dict.edsl_dict.generated_tokens,
+                "question_name": self.question.question_name,
+                "prompts": self.get_prompts(),
+                "cached_response": agent_response_dict.model_outputs.cached_response,
+                "raw_model_response": agent_response_dict.model_outputs.response,
+                "cache_used": agent_response_dict.model_outputs.cache_used,
+                "cache_key": agent_response_dict.model_outputs.cache_key,
+                "validated": True,
+                "exception_occurred": exception_occurred,
+                "cost": agent_response_dict.model_outputs.cost,
+            }
+            result = EDSLResultObjectInput(**data)
+            return result
         try:
             # if the question has jinja parameters, it is easier to make a new question with the parameters
             if self.question.parameters:
@@ -405,7 +437,7 @@ class InvigilatorAI(InvigilatorBase):
                         self.question.question_options = new_question_options
                 question_with_validators = self.question.render(
-                    self.scenario | prior_answers_dict | {'agent':self.agent.traits}
+                    self.scenario | prior_answers_dict | {"agent": self.agent.traits}
                 )
                 question_with_validators.use_code = self.question.use_code
             else:
@@ -426,6 +458,7 @@ class InvigilatorAI(InvigilatorBase):
             exception_occurred = non_validation_error
         finally:
             # even if validation failes, we still return the result
             data = {
                 "answer": answer,
                 "comment": comment,

edsl 0.1.51__py3-none-any.whl → 0.1.53__py3-none-any.whl

edsl 0.1.51py3-none-any.whl → 0.1.53py3-none-any.whl