PyPI - edsl - Versions diffs - 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl - Mend

edsl 0.1.54py3-none-any.whl → 0.1.56py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

edsl/__init__.py +8 -1
edsl/__init__original.py +134 -0
edsl/__version__.py +1 -1
edsl/agents/agent.py +29 -0
edsl/agents/agent_list.py +36 -1
edsl/base/base_class.py +281 -151
edsl/base/data_transfer_models.py +15 -4
edsl/buckets/__init__.py +8 -3
edsl/buckets/bucket_collection.py +9 -3
edsl/buckets/model_buckets.py +4 -2
edsl/buckets/token_bucket.py +2 -2
edsl/buckets/token_bucket_client.py +5 -3
edsl/caching/cache.py +131 -62
edsl/caching/cache_entry.py +70 -58
edsl/caching/sql_dict.py +17 -0
edsl/cli.py +99 -0
edsl/config/config_class.py +16 -0
edsl/conversation/__init__.py +31 -0
edsl/coop/coop.py +276 -242
edsl/coop/coop_jobs_objects.py +59 -0
edsl/coop/coop_objects.py +29 -0
edsl/coop/coop_regular_objects.py +26 -0
edsl/coop/utils.py +24 -19
edsl/dataset/dataset.py +338 -101
edsl/dataset/dataset_operations_mixin.py +216 -180
edsl/db_list/sqlite_list.py +349 -0
edsl/inference_services/__init__.py +40 -5
edsl/inference_services/exceptions.py +11 -0
edsl/inference_services/services/anthropic_service.py +5 -2
edsl/inference_services/services/aws_bedrock.py +6 -2
edsl/inference_services/services/azure_ai.py +6 -2
edsl/inference_services/services/google_service.py +7 -3
edsl/inference_services/services/mistral_ai_service.py +6 -2
edsl/inference_services/services/open_ai_service.py +6 -2
edsl/inference_services/services/perplexity_service.py +6 -2
edsl/inference_services/services/test_service.py +94 -5
edsl/interviews/answering_function.py +167 -59
edsl/interviews/interview.py +124 -72
edsl/interviews/interview_task_manager.py +10 -0
edsl/interviews/request_token_estimator.py +8 -0
edsl/invigilators/invigilators.py +35 -13
edsl/jobs/async_interview_runner.py +146 -104
edsl/jobs/data_structures.py +6 -4
edsl/jobs/decorators.py +61 -0
edsl/jobs/fetch_invigilator.py +61 -18
edsl/jobs/html_table_job_logger.py +14 -2
edsl/jobs/jobs.py +180 -104
edsl/jobs/jobs_component_constructor.py +2 -2
edsl/jobs/jobs_interview_constructor.py +2 -0
edsl/jobs/jobs_pricing_estimation.py +154 -113
edsl/jobs/jobs_remote_inference_logger.py +4 -0
edsl/jobs/jobs_runner_status.py +30 -25
edsl/jobs/progress_bar_manager.py +79 -0
edsl/jobs/remote_inference.py +35 -1
edsl/key_management/key_lookup_builder.py +6 -1
edsl/language_models/language_model.py +110 -12
edsl/language_models/model.py +10 -3
edsl/language_models/price_manager.py +176 -71
edsl/language_models/registry.py +5 -0
edsl/notebooks/notebook.py +77 -10
edsl/questions/VALIDATION_README.md +134 -0
edsl/questions/__init__.py +24 -1
edsl/questions/exceptions.py +21 -0
edsl/questions/question_dict.py +201 -16
edsl/questions/question_multiple_choice_with_other.py +624 -0
edsl/questions/question_registry.py +2 -1
edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
edsl/questions/validation_analysis.py +185 -0
edsl/questions/validation_cli.py +131 -0
edsl/questions/validation_html_report.py +404 -0
edsl/questions/validation_logger.py +136 -0
edsl/results/result.py +115 -46
edsl/results/results.py +702 -171
edsl/scenarios/construct_download_link.py +16 -3
edsl/scenarios/directory_scanner.py +226 -226
edsl/scenarios/file_methods.py +5 -0
edsl/scenarios/file_store.py +150 -9
edsl/scenarios/handlers/__init__.py +5 -1
edsl/scenarios/handlers/mp4_file_store.py +104 -0
edsl/scenarios/handlers/webm_file_store.py +104 -0
edsl/scenarios/scenario.py +120 -101
edsl/scenarios/scenario_list.py +800 -727
edsl/scenarios/scenario_list_gc_test.py +146 -0
edsl/scenarios/scenario_list_memory_test.py +214 -0
edsl/scenarios/scenario_list_source_refactor.md +35 -0
edsl/scenarios/scenario_selector.py +5 -4
edsl/scenarios/scenario_source.py +1990 -0
edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
edsl/surveys/survey.py +22 -0
edsl/tasks/__init__.py +4 -2
edsl/tasks/task_history.py +198 -36
edsl/tests/scenarios/test_ScenarioSource.py +51 -0
edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
edsl/utilities/__init__.py +2 -1
edsl/utilities/decorators.py +121 -0
edsl/utilities/memory_debugger.py +1010 -0
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/METADATA +51 -76
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/RECORD +103 -79
edsl/jobs/jobs_runner_asyncio.py +0 -281
edsl/language_models/unused/fake_openai_service.py +0 -60
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/LICENSE +0 -0
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/WHEEL +0 -0
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/entry_points.txt +0 -0

edsl/jobs/async_interview_runner.py CHANGED Viewed

@@ -5,8 +5,9 @@ This module provides functionality to run multiple interviews in parallel
 with controlled concurrency, supporting both error handling and result collection.
 """
-from collections.abc import AsyncGenerator
-from typing import List, Generator, Tuple, TYPE_CHECKING
+from collections.abc import AsyncGenerator, AsyncIterator
+from contextlib import asynccontextmanager
+from typing import List, Generator, Tuple, TYPE_CHECKING, AsyncIterator
 from dataclasses import dataclass
 import asyncio
 from ..data_transfer_models import EDSLResultObjectInput
@@ -22,19 +23,15 @@ if TYPE_CHECKING:
     from ..jobs import Jobs
 @dataclass
-class InterviewResult:
-    """Container for the result of an interview along with metadata.
-    Attributes:
-        result: The Result object containing the interview answers
-        interview: The Interview object used to conduct the interview
-        order: The original position of this interview in the processing queue
-    """
-    result: Result
-    interview: Interview
-    order: int
+class InterviewBatch:
+    """Container for a batch of interviews being processed."""
+    chunks: List[Tuple[int, Interview]]
+    results: List[Tuple[Result, Interview, int]]
+    failed: List[Tuple[int, Interview, Exception]]
+    @classmethod
+    def create(cls, chunks: List[Tuple[int, Interview]]) -> 'InterviewBatch':
+        return cls(chunks=chunks, results=[], failed=[])
 class AsyncInterviewRunner:
     """
@@ -68,6 +65,110 @@ class AsyncInterviewRunner:
         self.run_config = run_config
         self._initialized = asyncio.Event()
+    @asynccontextmanager
+    async def _manage_tasks(self, tasks: List[asyncio.Task]) -> AsyncIterator[None]:
+        """Context manager for handling task lifecycle and cleanup."""
+        try:
+            yield
+        finally:
+            for task in tasks:
+                if not task.done():
+                    task.cancel()
+    @asynccontextmanager
+    async def _interview_batch_processor(self) -> AsyncIterator[AsyncGenerator[tuple[Result, Interview, int], None]]:
+        """Context manager for processing batches of interviews.
+        Handles initialization, cleanup, and error management for the entire
+        interview processing lifecycle.
+        """
+        self._initialized.set()
+        self._current_idx = 0
+        interview_generator = self._expand_interviews()
+        try:
+            async def process_batches() -> AsyncGenerator[tuple[Result, Interview, int], None]:
+                while True:
+                    chunk = self._get_next_chunk(interview_generator)
+                    if not chunk:
+                        break
+                    async with self._process_chunk(chunk) as results:
+                        for result_tuple in results:
+                            # Yield the full tuple (result, interview, idx)
+                            yield result_tuple
+                    # Clean up chunk to help with garbage collection
+                    for idx, interview in chunk:
+                        # Explicitly clear any interview references when done with the chunk
+                        if hasattr(interview, 'clear_references'):
+                            interview.clear_references()
+                    del chunk
+            yield process_batches()
+        finally:
+            # Cleanup code to help garbage collection
+            self._current_idx = 0
+            self._initialized.clear()
+            # Clear the generator to avoid references
+            if 'interview_generator' in locals():
+                del interview_generator
+    async def _run_single_interview(
+        self, interview: Interview, idx: int
+    ) -> Tuple[Result, Interview, int]:
+        """Execute a single interview with error handling."""
+        try:
+            await interview.async_conduct_interview(self.run_config)
+            # Create result and explicitly break reference to interview
+            result = Result.from_interview(interview)
+            # Update the status
+            self.run_config.environment.jobs_runner_status.add_completed_interview(
+                interview
+            )
+            # Return tuple that keeps the interview reference
+            return (result, interview, idx)
+        except Exception as e:
+            if self.run_config.parameters.stop_on_exception:
+                raise
+            # Could log the error here if needed
+            return None
+    @asynccontextmanager
+    async def _process_chunk(
+        self, chunk: List[Tuple[int, Interview]]
+    ) -> AsyncIterator[List[Tuple[Result, Interview, int]]]:
+        """Process a chunk of interviews concurrently."""
+        tasks = [
+            asyncio.create_task(self._run_single_interview(interview, idx))
+            for idx, interview in chunk
+        ]
+        async with self._manage_tasks(tasks):
+            results = await asyncio.gather(
+                *tasks,
+                return_exceptions=not self.run_config.parameters.stop_on_exception
+            )
+            # Filter out None results and yield a new list to avoid keeping the original tuple references
+            valid_results = []
+            for r in results:
+                if r is not None:
+                    result, interview, idx = r
+                    # Create a new tuple to break reference to the original
+                    new_tuple = (result, interview, idx)
+                    valid_results.append(new_tuple)
+                    # Clear original tuple to help GC
+                    del r
+            yield valid_results
+            # Manually clean up the valid_results list and its contents to help garbage collection
+            for tup in valid_results:
+                del tup
+            del valid_results
     def _expand_interviews(self) -> Generator["Interview", None, None]:
         """
         Create multiple copies of each interview based on the run configuration.
@@ -101,104 +202,45 @@ class AsyncInterviewRunner:
                     interview.cache = self.run_config.environment.cache
                     yield interview
-    async def _conduct_interview(
-        self, interview: "Interview"
-    ) -> Tuple["Result", "Interview"]:
-        """
-        Asynchronously conduct a single interview.
-        This method performs the interview and creates a Result object with
-        the extracted answers and model responses.
-        Args:
-            interview: The interview to conduct
-        Returns:
-            Tuple containing the Result object and the Interview object
-        Notes:
-            'extracted_answers' contains the processed and validated answers
-            from the interview, which may differ from the raw model output.
-        """
-        extracted_answers: dict[str, str]
-        model_response_objects: List[EDSLResultObjectInput]
-        extracted_answers, model_response_objects = (
-            await interview.async_conduct_interview(self.run_config)
-        )
-        result = Result.from_interview(
-            interview=interview,
-            extracted_answers=extracted_answers,
-            model_response_objects=model_response_objects,
-        )
-        return result, interview
-    async def run(
+    def _get_next_chunk(
         self,
-    ) -> AsyncGenerator[tuple[Result, Interview], None]:
+        gen: Generator[Interview, None, None]
+    ) -> List[Tuple[int, Interview]]:
+        """Take interviews from the generator up to MAX_CONCURRENT."""
+        chunk = []
+        while len(chunk) < self.MAX_CONCURRENT:
+            try:
+                interview = next(gen)
+                chunk.append((self._current_idx, interview))
+                self._current_idx += 1
+            except StopIteration:
+                break
+        return chunk
+    async def run(self) -> AsyncGenerator[tuple[Result, Interview, int], None]:
         """
         Run all interviews asynchronously and yield results as they complete.
-        This method processes interviews in chunks based on MAX_CONCURRENT,
-        maintaining controlled concurrency while yielding results as soon as
+        This method orchestrates the parallel execution of interviews while
+        maintaining controlled concurrency. Results are yielded as soon as
         they become available.
         Yields:
-            Tuples of (Result, Interview) as interviews complete
-        Notes:
-            - Uses structured concurrency patterns for proper resource management
-            - Handles exceptions according to the run configuration
-            - Ensures task cleanup even in case of failures
+            Tuples of (Result, Interview, idx) as interviews complete, where idx is the
+            original position index of the interview.
+        Raises:
+            Exception: If stop_on_exception is True and any interview fails
         """
-        interviews = list(self._expand_interviews())
-        self._initialized.set()
-        async def _process_single_interview(
-            interview: Interview, idx: int
-        ) -> InterviewResult:
-            try:
-                result, interview = await self._conduct_interview(interview)
-                self.run_config.environment.jobs_runner_status.add_completed_interview(
-                    interview
-                )
-                result.order = idx
-                return InterviewResult(result, interview, idx)
-            except Exception:
-                if self.run_config.parameters.stop_on_exception:
-                    raise
-                return None
-        # Process interviews in chunks
-        for i in range(0, len(interviews), self.MAX_CONCURRENT):
-            chunk = interviews[i : i + self.MAX_CONCURRENT]
-            tasks = [
-                asyncio.create_task(_process_single_interview(interview, idx))
-                for idx, interview in enumerate(chunk, start=i)
-            ]
-            try:
-                # Wait for all tasks in the chunk to complete
-                results = await asyncio.gather(
-                    *tasks,
-                    return_exceptions=not self.run_config.parameters.stop_on_exception
-                )
-                # Process successful results
-                for result in (r for r in results if r is not None):
-                    yield result.result, result.interview
-            except Exception:
-                if self.run_config.parameters.stop_on_exception:
-                    raise
-                continue
-            finally:
-                # Clean up any remaining tasks
-                for task in tasks:
-                    if not task.done():
-                        task.cancel()
+        async with self._interview_batch_processor() as processor:
+            async for result_tuple in processor:
+                # For each result tuple in the processor
+                result, interview, idx = result_tuple
+                # Yield a new tuple to break reference to the original tuple
+                yield result, interview, idx
+                # Help garbage collection by removing references
+                del result_tuple
 if __name__ == "__main__":
     import doctest

edsl/jobs/data_structures.py CHANGED Viewed

@@ -1,11 +1,11 @@
-from typing import Optional, Literal, TYPE_CHECKING
+from typing import Optional, Literal, TYPE_CHECKING, Any
 from dataclasses import dataclass, asdict
 from collections import UserDict
 from ..data_transfer_models import EDSLResultObjectInput
 # from edsl.data_transfer_models import VisibilityType
 from ..caching import Cache
-from ..buckets import BucketCollection
+# Import BucketCollection lazily to avoid circular imports
 from ..key_management import KeyLookup
 from ..base import Base
@@ -14,6 +14,7 @@ from .jobs_runner_status import JobsRunnerStatus
 if TYPE_CHECKING:
     from ..questions.question_base import QuestionBase
     from ..surveys import Survey
+    from ..buckets import BucketCollection
 VisibilityType = Literal["private", "public", "unlisted"]
@@ -33,7 +34,7 @@ class RunEnvironment:
         jobs_runner_status (JobsRunnerStatus, optional): Tracker for job execution progress
     """
     cache: Optional[Cache] = None
-    bucket_collection: Optional[BucketCollection] = None
+    bucket_collection: Optional[Any] = None  # Using Any to avoid circular import of BucketCollection
     key_lookup: Optional[KeyLookup] = None
     jobs_runner_status: Optional["JobsRunnerStatus"] = None
@@ -82,6 +83,7 @@ class RunParameters(Base):
     disable_remote_inference: bool = False
     job_uuid: Optional[str] = None
     fresh: bool = False  # if True, will not use cache and will save new results to cache
+    memory_threshold: Optional[int] = None  # Threshold in bytes for Results SQLList memory management
     def to_dict(self, add_edsl_version=False) -> dict:
         d = asdict(self)
@@ -131,7 +133,7 @@ class RunConfig:
         """
         self.environment = environment
-    def add_bucket_collection(self, bucket_collection: BucketCollection) -> None:
+    def add_bucket_collection(self, bucket_collection: "BucketCollection") -> None:
         """
         Set or replace the bucket collection in the environment.

edsl/jobs/decorators.py CHANGED Viewed

@@ -1,6 +1,67 @@
 from functools import wraps
 from threading import RLock
 import inspect
+from typing import Optional, Union, TypeVar, Callable, cast
+try:
+    from typing import ParamSpec
+except ImportError:
+    from typing_extensions import ParamSpec
+from ..jobs.data_structures import RunEnvironment, RunParameters, RunConfig
+P = ParamSpec("P")
+T = TypeVar("T")
+def with_config(f: Callable[P, T]) -> Callable[P, T]:
+    """
+    Decorator that processes function parameters to match the RunConfig dataclass structure.
+    This decorator is used primarily with the run() and run_async() methods to provide
+    a consistent interface for job configuration while maintaining a clean API.
+    The decorator:
+    1. Extracts environment-related parameters into a RunEnvironment instance
+    2. Extracts execution-related parameters into a RunParameters instance
+    3. Combines both into a single RunConfig object
+    4. Passes this RunConfig to the decorated function as a keyword argument
+    Parameters:
+        f (Callable): The function to decorate, typically run() or run_async()
+    Returns:
+        Callable: A wrapped function that accepts all RunConfig parameters directly
+    Example:
+        @with_config
+        def run(self, *, config: RunConfig) -> Results:
+            # Function can now access config.parameters and config.environment
+    """
+    parameter_fields = {
+        name: field.default
+        for name, field in RunParameters.__dataclass_fields__.items()
+    }
+    environment_fields = {
+        name: field.default
+        for name, field in RunEnvironment.__dataclass_fields__.items()
+    }
+    # Combined fields dict used for reference during development
+    # combined = {**parameter_fields, **environment_fields}
+    @wraps(f)
+    def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
+        environment = RunEnvironment(
+            **{k: v for k, v in kwargs.items() if k in environment_fields}
+        )
+        parameters = RunParameters(
+            **{k: v for k, v in kwargs.items() if k in parameter_fields}
+        )
+        config = RunConfig(environment=environment, parameters=parameters)
+        return f(*args, config=config)
+    return cast(Callable[P, T], wrapper)
 def synchronized_class(wrapped_class):

edsl/jobs/fetch_invigilator.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from typing import Dict, Any, Optional, TYPE_CHECKING
+import weakref
 if TYPE_CHECKING:
     from ..questions import QuestionBase
     from ..agents import InvigilatorBase
-    from ..language_models.key_management import KeyLookup
-    from .interviews import Interview
+    from ..key_management import KeyLookup
+    from ..interviews import Interview
 class FetchInvigilator:
@@ -14,34 +15,76 @@ class FetchInvigilator:
         current_answers: Optional[Dict[str, Any]] = None,
         key_lookup: Optional["KeyLookup"] = None,
     ):
-        self.interview = interview
-        if current_answers is None:
-            self.current_answers = self.interview.answers
-        else:
-            self.current_answers = current_answers
+        # Store a weak reference to the interview instead of a strong reference
+        self._interview_ref = weakref.ref(interview)
+        # Store external parameters that don't create reference cycles
+        self._current_answers = current_answers
         self.key_lookup = key_lookup
+    @property
+    def interview(self):
+        """Access the interview via weak reference if it still exists."""
+        interview = self._interview_ref()
+        if interview is None:
+            raise RuntimeError("Interview has been garbage collected")
+        return interview
+    @property
+    def _scenario(self):
+        return self.interview.scenario
+    @property
+    def _model(self):
+        return self.interview.model
+    @property
+    def _survey(self):
+        return self.interview.survey
+    @property
+    def _agent(self):
+        return self.interview.agent
+    @property
+    def _iteration(self):
+        return self.interview.iteration
+    @property
+    def _cache(self):
+        return self.interview.cache
+    @property
+    def _raise_validation_errors(self):
+        return self.interview.raise_validation_errors
+    @property
+    def current_answers(self):
+        if self._current_answers is not None:
+            return self._current_answers
+        return self.interview.answers
     def get_invigilator(self, question: "QuestionBase") -> "InvigilatorBase":
         """Return an invigilator for the given question.
         :param question: the question to be answered
         :param debug: whether to use debug mode, in which case `InvigilatorDebug` is used.
         """
-        invigilator = self.interview.agent.create_invigilator(
+        # Use cached properties instead of accessing through the interview reference
+        invigilator = self._agent.create_invigilator(
             question=question,
-            scenario=self.interview.scenario,
-            model=self.interview.model,
-            survey=self.interview.survey,
-            memory_plan=self.interview.survey.memory_plan,
-            current_answers=self.current_answers,  # not yet known
-            iteration=self.interview.iteration,
-            cache=self.interview.cache,
-            raise_validation_errors=self.interview.raise_validation_errors,
+            scenario=self._scenario,
+            model=self._model,
+            survey=self._survey,
+            memory_plan=self._survey.memory_plan,
+            current_answers=self.current_answers,
+            iteration=self._iteration,
+            cache=self._cache,
+            raise_validation_errors=self._raise_validation_errors,
             key_lookup=self.key_lookup,
         )
-        """Return an invigilator for the given question."""
         return invigilator
     def __call__(self, question):
         return self.get_invigilator(question)

edsl/jobs/html_table_job_logger.py CHANGED Viewed

@@ -362,7 +362,11 @@ class HTMLTableJobLogger(JobLogger):
         other_fields = []
         for field, _ in self.jobs_info.__annotations__.items():
-            if field != "pretty_names":
+            if field not in [
+                "pretty_names",
+                "completed_interviews",
+                "failed_interviews",
+            ]:
                 value = getattr(self.jobs_info, field)
                 if not value:
                     continue
@@ -522,6 +526,14 @@ class HTMLTableJobLogger(JobLogger):
         display_style = "block" if self.is_expanded else "none"
+        header_status_text = status_text
+        if (
+            current_status == JobsStatus.PARTIALLY_FAILED
+            and self.jobs_info.completed_interviews is not None
+            and self.jobs_info.failed_interviews is not None
+        ):
+            header_status_text += f" ({self.jobs_info.completed_interviews:,} completed, {self.jobs_info.failed_interviews:,} failed)"
         return f"""
         {css}
         <div class="jobs-container">
@@ -539,7 +551,7 @@ class HTMLTableJobLogger(JobLogger):
                     <span id="arrow-{self.log_id}" class="expand-toggle">{'&#8963;' if self.is_expanded else '&#8964;'}</span>
                     Job Status 🦜
                 </div>
-                <div class="{status_class}">{status_text}</div>
+                <div class="{status_class}">{header_status_text}</div>
             </div>
             <div id="content-{self.log_id}" class="jobs-content" style="display: {display_style};">
                 {content_html}

edsl 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl

edsl 0.1.54py3-none-any.whl → 0.1.56py3-none-any.whl