PyPI - edsl - Versions diffs - 0.1.33.dev2__py3-none-any.whl → 0.1.34__py3-none-any.whl - Mend

edsl 0.1.33.dev2py3-none-any.whl → 0.1.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

edsl/Base.py +24 -14
edsl/__init__.py +1 -0
edsl/__version__.py +1 -1
edsl/agents/Agent.py +6 -6
edsl/agents/Invigilator.py +28 -6
edsl/agents/InvigilatorBase.py +8 -27
edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +150 -182
edsl/agents/prompt_helpers.py +129 -0
edsl/config.py +26 -34
edsl/coop/coop.py +14 -4
edsl/data_transfer_models.py +26 -73
edsl/enums.py +2 -0
edsl/inference_services/AnthropicService.py +5 -2
edsl/inference_services/AwsBedrock.py +5 -2
edsl/inference_services/AzureAI.py +5 -2
edsl/inference_services/GoogleService.py +108 -33
edsl/inference_services/InferenceServiceABC.py +44 -13
edsl/inference_services/MistralAIService.py +5 -2
edsl/inference_services/OpenAIService.py +10 -6
edsl/inference_services/TestService.py +34 -16
edsl/inference_services/TogetherAIService.py +170 -0
edsl/inference_services/registry.py +2 -0
edsl/jobs/Jobs.py +109 -18
edsl/jobs/buckets/BucketCollection.py +24 -15
edsl/jobs/buckets/TokenBucket.py +64 -10
edsl/jobs/interviews/Interview.py +130 -49
edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +16 -0
edsl/jobs/interviews/InterviewExceptionEntry.py +2 -0
edsl/jobs/runners/JobsRunnerAsyncio.py +119 -173
edsl/jobs/runners/JobsRunnerStatus.py +332 -0
edsl/jobs/tasks/QuestionTaskCreator.py +1 -13
edsl/jobs/tasks/TaskHistory.py +17 -0
edsl/language_models/LanguageModel.py +36 -38
edsl/language_models/registry.py +13 -9
edsl/language_models/utilities.py +5 -2
edsl/questions/QuestionBase.py +74 -16
edsl/questions/QuestionBaseGenMixin.py +28 -0
edsl/questions/QuestionBudget.py +93 -41
edsl/questions/QuestionCheckBox.py +1 -1
edsl/questions/QuestionFreeText.py +6 -0
edsl/questions/QuestionMultipleChoice.py +13 -24
edsl/questions/QuestionNumerical.py +5 -4
edsl/questions/Quick.py +41 -0
edsl/questions/ResponseValidatorABC.py +11 -6
edsl/questions/derived/QuestionLinearScale.py +4 -1
edsl/questions/derived/QuestionTopK.py +4 -1
edsl/questions/derived/QuestionYesNo.py +8 -2
edsl/questions/descriptors.py +12 -11
edsl/questions/templates/budget/__init__.py +0 -0
edsl/questions/templates/budget/answering_instructions.jinja +7 -0
edsl/questions/templates/budget/question_presentation.jinja +7 -0
edsl/questions/templates/extract/__init__.py +0 -0
edsl/questions/templates/numerical/answering_instructions.jinja +0 -1
edsl/questions/templates/rank/__init__.py +0 -0
edsl/questions/templates/yes_no/answering_instructions.jinja +2 -2
edsl/results/DatasetExportMixin.py +5 -1
edsl/results/Result.py +1 -1
edsl/results/Results.py +4 -1
edsl/scenarios/FileStore.py +178 -34
edsl/scenarios/Scenario.py +76 -37
edsl/scenarios/ScenarioList.py +19 -2
edsl/scenarios/ScenarioListPdfMixin.py +150 -4
edsl/study/Study.py +32 -0
edsl/surveys/DAG.py +62 -0
edsl/surveys/MemoryPlan.py +26 -0
edsl/surveys/Rule.py +34 -1
edsl/surveys/RuleCollection.py +55 -5
edsl/surveys/Survey.py +189 -10
edsl/surveys/base.py +4 -0
edsl/templates/error_reporting/interview_details.html +6 -1
edsl/utilities/utilities.py +9 -1
{edsl-0.1.33.dev2.dist-info → edsl-0.1.34.dist-info}/METADATA +3 -1
{edsl-0.1.33.dev2.dist-info → edsl-0.1.34.dist-info}/RECORD +75 -69
edsl/jobs/interviews/retry_management.py +0 -39
edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -333
edsl/scenarios/ScenarioImageMixin.py +0 -100
{edsl-0.1.33.dev2.dist-info → edsl-0.1.34.dist-info}/LICENSE +0 -0
{edsl-0.1.33.dev2.dist-info → edsl-0.1.34.dist-info}/WHEEL +0 -0

edsl/jobs/interviews/retry_management.py DELETED Viewed

@@ -1,39 +0,0 @@
-from edsl import CONFIG
-from tenacity import (
-    retry,
-    wait_exponential,
-    stop_after_attempt,
-    retry_if_exception_type,
-    before_sleep,
-)
-EDSL_BACKOFF_START_SEC = float(CONFIG.get("EDSL_BACKOFF_START_SEC"))
-EDSL_MAX_BACKOFF_SEC = float(CONFIG.get("EDSL_MAX_BACKOFF_SEC"))
-EDSL_MAX_ATTEMPTS = int(CONFIG.get("EDSL_MAX_ATTEMPTS"))
-def print_retry(retry_state, print_to_terminal=True):
-    "Prints details on tenacity retries."
-    attempt_number = retry_state.attempt_number
-    exception = retry_state.outcome.exception()
-    wait_time = retry_state.next_action.sleep
-    exception_name = type(exception).__name__
-    if print_to_terminal:
-        print(
-            f"Attempt {attempt_number} failed with exception '{exception_name}':"
-            f"{exception}",
-            f"now waiting {wait_time:.2f} seconds before retrying."
-            f"Parameters: start={EDSL_BACKOFF_START_SEC}, max={EDSL_MAX_BACKOFF_SEC}, max_attempts={EDSL_MAX_ATTEMPTS}."
-            "\n\n",
-        )
-retry_strategy = retry(
-    wait=wait_exponential(
-        multiplier=EDSL_BACKOFF_START_SEC, max=EDSL_MAX_BACKOFF_SEC
-    ),  # Exponential back-off starting at 1s, doubling, maxing out at 60s
-    stop=stop_after_attempt(EDSL_MAX_ATTEMPTS),  # Stop after 5 attempts
-    # retry=retry_if_exception_type(Exception),  # Customize this as per your specific retry-able exception
-    before_sleep=print_retry,  # Use custom print function for retries
-)

edsl/jobs/runners/JobsRunnerStatusMixin.py DELETED Viewed

@@ -1,333 +0,0 @@
-from __future__ import annotations
-from typing import List, DefaultDict
-import asyncio
-from typing import Type
-from collections import defaultdict
-from typing import Literal, List, Type, DefaultDict
-from collections import UserDict, defaultdict
-from edsl.jobs.interviews.InterviewStatusDictionary import InterviewStatusDictionary
-from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
-from edsl.jobs.tokens.TokenUsage import TokenUsage
-from edsl.enums import get_token_pricing
-from edsl.jobs.tasks.task_status_enum import TaskStatus
-InterviewTokenUsageMapping = DefaultDict[str, InterviewTokenUsage]
-from edsl.jobs.interviews.InterviewStatistic import InterviewStatistic
-from edsl.jobs.interviews.InterviewStatisticsCollection import (
-    InterviewStatisticsCollection,
-)
-from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
-# return {"cache_status": token_usage_type, "details": details, "cost": f"${token_usage.cost(prices):.5f}"}
-from dataclasses import dataclass, asdict
-from rich.text import Text
-from rich.box import SIMPLE
-from rich.table import Table
-@dataclass
-class ModelInfo:
-    model_name: str
-    TPM_limit_k: float
-    RPM_limit_k: float
-    num_tasks_waiting: int
-    token_usage_info: dict
-@dataclass
-class ModelTokenUsageStats:
-    token_usage_type: str
-    details: List[dict]
-    cost: str
-class Stats:
-    def elapsed_time(self):
-        InterviewStatistic("elapsed_time", value=elapsed_time, digits=1, units="sec.")
-class JobsRunnerStatusMixin:
-    # @staticmethod
-    # def status_dict(interviews: List[Type["Interview"]]) -> List[Type[InterviewStatusDictionary]]:
-    #     """
-    #     >>> from edsl.jobs.interviews.Interview import Interview
-    #     >>> interviews = [Interview.example()]
-    #     >>> JobsRunnerStatusMixin().status_dict(interviews)
-    #     [InterviewStatusDictionary({<TaskStatus.NOT_STARTED: 1>: 0, <TaskStatus.WAITING_FOR_DEPENDENCIES: 2>: 0, <TaskStatus.CANCELLED: 3>: 0, <TaskStatus.PARENT_FAILED: 4>: 0, <TaskStatus.WAITING_FOR_REQUEST_CAPACITY: 5>: 0, <TaskStatus.WAITING_FOR_TOKEN_CAPACITY: 6>: 0, <TaskStatus.API_CALL_IN_PROGRESS: 7>: 0, <TaskStatus.SUCCESS: 8>: 0, <TaskStatus.FAILED: 9>: 0, 'number_from_cache': 0})]
-    #     """
-    #     return [interview.interview_status for interview in interviews]
-    def _compute_statistic(stat_name: str, completed_tasks, elapsed_time, interviews):
-        stat_definitions = {
-            "elapsed_time": lambda: InterviewStatistic(
-                "elapsed_time", value=elapsed_time, digits=1, units="sec."
-            ),
-            "total_interviews_requested": lambda: InterviewStatistic(
-                "total_interviews_requested", value=len(interviews), units=""
-            ),
-            "completed_interviews": lambda: InterviewStatistic(
-                "completed_interviews", value=len(completed_tasks), units=""
-            ),
-            "percent_complete": lambda: InterviewStatistic(
-                "percent_complete",
-                value=(
-                    len(completed_tasks) / len(interviews) * 100
-                    if len(interviews) > 0
-                    else "NA"
-                ),
-                digits=0,
-                units="%",
-            ),
-            "average_time_per_interview": lambda: InterviewStatistic(
-                "average_time_per_interview",
-                value=elapsed_time / len(completed_tasks) if completed_tasks else "NA",
-                digits=1,
-                units="sec.",
-            ),
-            "task_remaining": lambda: InterviewStatistic(
-                "task_remaining", value=len(interviews) - len(completed_tasks), units=""
-            ),
-            "estimated_time_remaining": lambda: InterviewStatistic(
-                "estimated_time_remaining",
-                value=(
-                    (len(interviews) - len(completed_tasks))
-                    * (elapsed_time / len(completed_tasks))
-                    if len(completed_tasks) > 0
-                    else "NA"
-                ),
-                digits=1,
-                units="sec.",
-            ),
-        }
-        if stat_name not in stat_definitions:
-            raise ValueError(
-                f"Invalid stat_name: {stat_name}. The valid stat_names are: {list(stat_definitions.keys())}"
-            )
-        return stat_definitions[stat_name]()
-    @staticmethod
-    def _job_level_info(
-        completed_tasks: List[Type[asyncio.Task]],
-        elapsed_time: float,
-        interviews: List[Type["Interview"]],
-    ) -> InterviewStatisticsCollection:
-        interview_statistics = InterviewStatisticsCollection()
-        default_statistics = [
-            "elapsed_time",
-            "total_interviews_requested",
-            "completed_interviews",
-            "percent_complete",
-            "average_time_per_interview",
-            "task_remaining",
-            "estimated_time_remaining",
-        ]
-        for stat_name in default_statistics:
-            interview_statistics.add_stat(
-                JobsRunnerStatusMixin._compute_statistic(
-                    stat_name, completed_tasks, elapsed_time, interviews
-                )
-            )
-        return interview_statistics
-    @staticmethod
-    def _get_model_queues_info(interviews):
-        models_to_tokens = defaultdict(InterviewTokenUsage)
-        model_to_status = defaultdict(InterviewStatusDictionary)
-        waiting_dict = defaultdict(int)
-        for interview in interviews:
-            models_to_tokens[interview.model] += interview.token_usage
-            model_to_status[interview.model] += interview.interview_status
-            waiting_dict[interview.model] += interview.interview_status.waiting
-        for model, num_waiting in waiting_dict.items():
-            yield JobsRunnerStatusMixin._get_model_info(
-                model, num_waiting, models_to_tokens
-            )
-    @staticmethod
-    def generate_status_summary(
-        completed_tasks: List[Type[asyncio.Task]],
-        elapsed_time: float,
-        interviews: List[Type["Interview"]],
-        include_model_queues=False,
-    ) -> InterviewStatisticsCollection:
-        """Generate a summary of the status of the job runner.
-        :param completed_tasks: list of completed tasks
-        :param elapsed_time: time elapsed since the start of the job
-        :param interviews: list of interviews to be conducted
-        >>> from edsl.jobs.interviews.Interview import Interview
-        >>> interviews = [Interview.example()]
-        >>> completed_tasks = []
-        >>> elapsed_time = 0
-        >>> JobsRunnerStatusMixin().generate_status_summary(completed_tasks, elapsed_time, interviews)
-        {'Elapsed time': '0.0 sec.', 'Total interviews requested': '1 ', 'Completed interviews': '0 ', 'Percent complete': '0 %', 'Average time per interview': 'NA', 'Task remaining': '1 ', 'Estimated time remaining': 'NA'}
-        """
-        interview_status_summary: InterviewStatisticsCollection = (
-            JobsRunnerStatusMixin._job_level_info(
-                completed_tasks=completed_tasks,
-                elapsed_time=elapsed_time,
-                interviews=interviews,
-            )
-        )
-        if include_model_queues:
-            interview_status_summary.model_queues = list(
-                JobsRunnerStatusMixin._get_model_queues_info(interviews)
-            )
-        else:
-            interview_status_summary.model_queues = None
-        return interview_status_summary
-    @staticmethod
-    def _get_model_info(
-        model: str,
-        num_waiting: int,
-        models_to_tokens: InterviewTokenUsageMapping,
-    ) -> dict:
-        """Get the status of a model.
-        :param model: the model name
-        :param num_waiting: the number of tasks waiting for capacity
-        :param models_to_tokens: a mapping of models to token usage
-        >>> from edsl.jobs.interviews.Interview import Interview
-        >>> interviews = [Interview.example()]
-        >>> models_to_tokens = defaultdict(InterviewTokenUsage)
-        >>> model = interviews[0].model
-        >>> num_waiting = 0
-        >>> JobsRunnerStatusMixin()._get_model_info(model, num_waiting, models_to_tokens)
-        ModelInfo(model_name='...', TPM_limit_k=..., RPM_limit_k=..., num_tasks_waiting=0, token_usage_info=[ModelTokenUsageStats(token_usage_type='new_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000'), ModelTokenUsageStats(token_usage_type='cached_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000')])
-        """
-        ## TODO: This should probably be a coop method
-        prices = get_token_pricing(model.model)
-        token_usage_info = []
-        for token_usage_type in ["new_token_usage", "cached_token_usage"]:
-            token_usage_info.append(
-                JobsRunnerStatusMixin._get_token_usage_info(
-                    token_usage_type, models_to_tokens, model, prices
-                )
-            )
-        return ModelInfo(
-            **{
-                "model_name": model.model,
-                "TPM_limit_k": model.TPM / 1000,
-                "RPM_limit_k": model.RPM / 1000,
-                "num_tasks_waiting": num_waiting,
-                "token_usage_info": token_usage_info,
-            }
-        )
-    @staticmethod
-    def _get_token_usage_info(
-        token_usage_type: Literal["new_token_usage", "cached_token_usage"],
-        models_to_tokens: InterviewTokenUsageMapping,
-        model: str,
-        prices: "TokenPricing",
-    ) -> ModelTokenUsageStats:
-        """Get the token usage info for a model.
-        >>> from edsl.jobs.interviews.Interview import Interview
-        >>> interviews = [Interview.example()]
-        >>> models_to_tokens = defaultdict(InterviewTokenUsage)
-        >>> model = interviews[0].model
-        >>> prices = get_token_pricing(model.model)
-        >>> cache_status = "new_token_usage"
-        >>> JobsRunnerStatusMixin()._get_token_usage_info(cache_status, models_to_tokens, model, prices)
-        ModelTokenUsageStats(token_usage_type='new_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000')
-        """
-        all_token_usage: InterviewTokenUsage = models_to_tokens[model]
-        token_usage: TokenUsage = getattr(all_token_usage, token_usage_type)
-        details = [
-            {"type": token_type, "tokens": getattr(token_usage, token_type)}
-            for token_type in ["prompt_tokens", "completion_tokens"]
-        ]
-        return ModelTokenUsageStats(
-            token_usage_type=token_usage_type,
-            details=details,
-            cost=f"${token_usage.cost(prices):.5f}",
-        )
-    @staticmethod
-    def _add_statistics_to_table(table, status_summary):
-        table.add_column("Statistic", style="dim", no_wrap=True, width=50)
-        table.add_column("Value", width=10)
-        for key, value in status_summary.items():
-            if key != "model_queues":
-                table.add_row(key, value)
-    @staticmethod
-    def display_status_table(status_summary: InterviewStatisticsCollection) -> "Table":
-        table = Table(
-            title="Job Status",
-            show_header=True,
-            header_style="bold magenta",
-            box=SIMPLE,
-        )
-        ### Job-level statistics
-        JobsRunnerStatusMixin._add_statistics_to_table(table, status_summary)
-        ## Model-level statistics
-        spacing = " "
-        if status_summary.model_queues is not None:
-            table.add_row(Text("Model Queues", style="bold red"), "")
-            for model_info in status_summary.model_queues:
-                model_name = model_info.model_name
-                tpm = f"TPM (k)={model_info.TPM_limit_k}"
-                rpm = f"RPM (k)= {model_info.RPM_limit_k}"
-                pretty_model_name = model_name + ";" + tpm + ";" + rpm
-                table.add_row(Text(pretty_model_name, style="blue"), "")
-                table.add_row(
-                    "Number question tasks waiting for capacity",
-                    str(model_info.num_tasks_waiting),
-                )
-                # Token usage and cost info
-                for token_usage_info in model_info.token_usage_info:
-                    token_usage_type = token_usage_info.token_usage_type
-                    table.add_row(
-                        Text(
-                            spacing + token_usage_type.replace("_", " "), style="bold"
-                        ),
-                        "",
-                    )
-                    for detail in token_usage_info.details:
-                        token_type = detail["type"]
-                        tokens = detail["tokens"]
-                        table.add_row(spacing + f"{token_type}", f"{tokens:,}")
-                    # table.add_row(spacing + "cost", cache_info["cost"])
-        return table
-    def status_table(self, completed_tasks: List[asyncio.Task], elapsed_time: float):
-        summary_data = JobsRunnerStatusMixin.generate_status_summary(
-            completed_tasks=completed_tasks,
-            elapsed_time=elapsed_time,
-            interviews=self.total_interviews,
-        )
-        return self.display_status_table(summary_data)
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod(optionflags=doctest.ELLIPSIS)

edsl/scenarios/ScenarioImageMixin.py DELETED Viewed

@@ -1,100 +0,0 @@
-import base64
-import os
-import requests
-import tempfile
-import mimetypes
-from urllib.parse import urlparse
-class ScenarioImageMixin:
-    def add_image(self, image_path: str):
-        """Add an image to a scenario.
-        >>> from edsl.scenarios.Scenario import Scenario
-        >>> s = Scenario({"food": "wood chips"})
-        >>> s.add_image(Scenario.example_image())
-        Scenario({'food': 'wood chips', 'file_path': '...', 'encoded_image': '...'})
-        """
-        new_scenario = self.from_image(image_path)
-        return self + new_scenario
-    @staticmethod
-    def example_image():
-        """Return an example image path."""
-        import os
-        base_path = os.path.dirname(os.path.abspath(__file__))
-        return os.path.join(base_path, "../../static/logo.png")
-    @classmethod
-    def from_image(cls, image_path: str) -> "Scenario":
-        """Creates a scenario with a base64 encoding of an image.
-        >>> from edsl.scenarios.Scenario import Scenario
-        >>> s = Scenario.from_image(Scenario.example_image())
-        >>> s
-        Scenario({'file_path': '...', 'encoded_image': '...'})
-        """
-        if image_path.startswith("http://") or image_path.startswith("https://"):
-            return cls._from_url_image(image_path)
-        else:
-            return cls._from_filepath_image(image_path)
-    @classmethod
-    def _from_url_image(cls, image_url: str) -> "Scenario":
-        """Handles downloading and encoding an image from a URL."""
-        response = requests.get(image_url)
-        if response.status_code == 200:
-            # Try to extract the file extension from the URL
-            parsed_url = urlparse(image_url)
-            file_name = parsed_url.path.split("/")[-1]
-            file_extension = file_name.split(".")[-1] if "." in file_name else None
-            # If the file extension is not found in the URL, use the content type
-            if not file_extension:
-                content_type = response.headers.get("Content-Type")
-                file_extension = mimetypes.guess_extension(content_type)
-            # If still no file extension, use a generic binary extension
-            if not file_extension:
-                file_extension = ".bin"
-            # Create a temporary file with the appropriate extension
-            with tempfile.NamedTemporaryFile(
-                delete=False, suffix=file_extension
-            ) as temp_file:
-                # Write the image content to the temporary file
-                temp_file.write(response.content)
-                temp_file_name = temp_file.name
-        else:
-            raise ValueError("Failed to download the image.")
-        scenario = cls._from_filepath_image(temp_file_name)
-        os.remove(temp_file_name)
-        return scenario
-    @classmethod
-    def _from_filepath_image(cls, image_path: str) -> "Scenario":
-        """Handles encoding an image from a local file path."""
-        with open(image_path, "rb") as image_file:
-            s = cls(
-                {
-                    "file_path": image_path,
-                    "encoded_image": base64.b64encode(image_file.read()).decode(
-                        "utf-8"
-                    ),
-                }
-            )
-            s._has_image = True
-            return s
-    def __repr__(self):
-        return f"Scenario({self.data})"
-if __name__ == "__main__":
-    import doctest
-    from edsl.scenarios.Scenario import Scenario
-    doctest.testmod(extraglobs={"Scenario": Scenario}, optionflags=doctest.ELLIPSIS)

{edsl-0.1.33.dev2.dist-info → edsl-0.1.34.dist-info}/LICENSE RENAMED Viewed

File without changes

{edsl-0.1.33.dev2.dist-info → edsl-0.1.34.dist-info}/WHEEL RENAMED Viewed

File without changes

edsl 0.1.33.dev2__py3-none-any.whl → 0.1.34__py3-none-any.whl

edsl 0.1.33.dev2py3-none-any.whl → 0.1.34py3-none-any.whl