PyPI - edsl - Versions diffs - 0.1.33.dev1__py3-none-any.whl → 0.1.33.dev2__py3-none-any.whl - Mend

edsl 0.1.33.dev1py3-none-any.whl → 0.1.33.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (163) hide show

edsl/TemplateLoader.py +24 -0
edsl/__init__.py +8 -4
edsl/agents/Agent.py +46 -14
edsl/agents/AgentList.py +43 -0
edsl/agents/Invigilator.py +125 -212
edsl/agents/InvigilatorBase.py +140 -32
edsl/agents/PromptConstructionMixin.py +43 -66
edsl/agents/__init__.py +1 -0
edsl/auto/AutoStudy.py +117 -0
edsl/auto/StageBase.py +230 -0
edsl/auto/StageGenerateSurvey.py +178 -0
edsl/auto/StageLabelQuestions.py +125 -0
edsl/auto/StagePersona.py +61 -0
edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
edsl/auto/StagePersonaDimensionValues.py +74 -0
edsl/auto/StagePersonaDimensions.py +69 -0
edsl/auto/StageQuestions.py +73 -0
edsl/auto/SurveyCreatorPipeline.py +21 -0
edsl/auto/utilities.py +224 -0
edsl/config.py +38 -39
edsl/coop/PriceFetcher.py +58 -0
edsl/coop/coop.py +39 -5
edsl/data/Cache.py +35 -1
edsl/data_transfer_models.py +120 -38
edsl/enums.py +2 -0
edsl/exceptions/language_models.py +25 -1
edsl/exceptions/questions.py +62 -5
edsl/exceptions/results.py +4 -0
edsl/inference_services/AnthropicService.py +13 -11
edsl/inference_services/AwsBedrock.py +19 -17
edsl/inference_services/AzureAI.py +37 -20
edsl/inference_services/GoogleService.py +16 -12
edsl/inference_services/GroqService.py +2 -0
edsl/inference_services/InferenceServiceABC.py +24 -0
edsl/inference_services/MistralAIService.py +120 -0
edsl/inference_services/OpenAIService.py +41 -50
edsl/inference_services/TestService.py +71 -0
edsl/inference_services/models_available_cache.py +0 -6
edsl/inference_services/registry.py +4 -0
edsl/jobs/Answers.py +10 -12
edsl/jobs/FailedQuestion.py +78 -0
edsl/jobs/Jobs.py +18 -13
edsl/jobs/buckets/TokenBucket.py +39 -14
edsl/jobs/interviews/Interview.py +297 -77
edsl/jobs/interviews/InterviewExceptionEntry.py +83 -19
edsl/jobs/interviews/interview_exception_tracking.py +0 -70
edsl/jobs/interviews/retry_management.py +3 -1
edsl/jobs/runners/JobsRunnerAsyncio.py +116 -70
edsl/jobs/runners/JobsRunnerStatusMixin.py +1 -1
edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
edsl/jobs/tasks/TaskHistory.py +131 -213
edsl/language_models/LanguageModel.py +239 -129
edsl/language_models/ModelList.py +2 -2
edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
edsl/language_models/fake_openai_call.py +15 -0
edsl/language_models/fake_openai_service.py +61 -0
edsl/language_models/registry.py +15 -2
edsl/language_models/repair.py +0 -19
edsl/language_models/utilities.py +61 -0
edsl/prompts/Prompt.py +52 -2
edsl/questions/AnswerValidatorMixin.py +23 -26
edsl/questions/QuestionBase.py +273 -242
edsl/questions/QuestionBaseGenMixin.py +133 -0
edsl/questions/QuestionBasePromptsMixin.py +266 -0
edsl/questions/QuestionBudget.py +6 -0
edsl/questions/QuestionCheckBox.py +227 -35
edsl/questions/QuestionExtract.py +98 -27
edsl/questions/QuestionFreeText.py +46 -29
edsl/questions/QuestionFunctional.py +7 -0
edsl/questions/QuestionList.py +141 -22
edsl/questions/QuestionMultipleChoice.py +173 -64
edsl/questions/QuestionNumerical.py +87 -46
edsl/questions/QuestionRank.py +182 -24
edsl/questions/RegisterQuestionsMeta.py +31 -12
edsl/questions/ResponseValidatorABC.py +169 -0
edsl/questions/__init__.py +3 -4
edsl/questions/decorators.py +21 -0
edsl/questions/derived/QuestionLikertFive.py +10 -5
edsl/questions/derived/QuestionLinearScale.py +11 -1
edsl/questions/derived/QuestionTopK.py +6 -0
edsl/questions/derived/QuestionYesNo.py +16 -1
edsl/questions/descriptors.py +43 -7
edsl/questions/prompt_templates/question_budget.jinja +13 -0
edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
edsl/questions/prompt_templates/question_extract.jinja +11 -0
edsl/questions/prompt_templates/question_free_text.jinja +3 -0
edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
edsl/questions/prompt_templates/question_list.jinja +17 -0
edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
edsl/questions/prompt_templates/question_numerical.jinja +37 -0
edsl/questions/question_registry.py +6 -2
edsl/questions/templates/__init__.py +0 -0
edsl/questions/templates/checkbox/__init__.py +0 -0
edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
edsl/questions/templates/extract/answering_instructions.jinja +7 -0
edsl/questions/templates/extract/question_presentation.jinja +1 -0
edsl/questions/templates/free_text/__init__.py +0 -0
edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
edsl/questions/templates/free_text/question_presentation.jinja +1 -0
edsl/questions/templates/likert_five/__init__.py +0 -0
edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
edsl/questions/templates/linear_scale/__init__.py +0 -0
edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
edsl/questions/templates/list/__init__.py +0 -0
edsl/questions/templates/list/answering_instructions.jinja +4 -0
edsl/questions/templates/list/question_presentation.jinja +5 -0
edsl/questions/templates/multiple_choice/__init__.py +0 -0
edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
edsl/questions/templates/multiple_choice/html.jinja +0 -0
edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
edsl/questions/templates/numerical/__init__.py +0 -0
edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
edsl/questions/templates/numerical/question_presentation.jinja +7 -0
edsl/questions/templates/rank/answering_instructions.jinja +11 -0
edsl/questions/templates/rank/question_presentation.jinja +15 -0
edsl/questions/templates/top_k/__init__.py +0 -0
edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
edsl/questions/templates/top_k/question_presentation.jinja +22 -0
edsl/questions/templates/yes_no/__init__.py +0 -0
edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
edsl/results/Dataset.py +20 -0
edsl/results/DatasetExportMixin.py +41 -47
edsl/results/DatasetTree.py +145 -0
edsl/results/Result.py +32 -5
edsl/results/Results.py +131 -45
edsl/results/ResultsDBMixin.py +3 -3
edsl/results/Selector.py +118 -0
edsl/results/tree_explore.py +115 -0
edsl/scenarios/Scenario.py +10 -4
edsl/scenarios/ScenarioList.py +348 -39
edsl/scenarios/ScenarioListExportMixin.py +9 -0
edsl/study/SnapShot.py +8 -1
edsl/surveys/RuleCollection.py +2 -2
edsl/surveys/Survey.py +634 -315
edsl/surveys/SurveyExportMixin.py +71 -9
edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
edsl/surveys/SurveyQualtricsImport.py +75 -4
edsl/surveys/instructions/ChangeInstruction.py +47 -0
edsl/surveys/instructions/Instruction.py +34 -0
edsl/surveys/instructions/InstructionCollection.py +77 -0
edsl/surveys/instructions/__init__.py +0 -0
edsl/templates/error_reporting/base.html +24 -0
edsl/templates/error_reporting/exceptions_by_model.html +35 -0
edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
edsl/templates/error_reporting/exceptions_by_type.html +17 -0
edsl/templates/error_reporting/interview_details.html +111 -0
edsl/templates/error_reporting/interviews.html +10 -0
edsl/templates/error_reporting/overview.html +5 -0
edsl/templates/error_reporting/performance_plot.html +2 -0
edsl/templates/error_reporting/report.css +74 -0
edsl/templates/error_reporting/report.html +118 -0
edsl/templates/error_reporting/report.js +25 -0
{edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/METADATA +4 -2
edsl-0.1.33.dev2.dist-info/RECORD +289 -0
edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -286
edsl/utilities/gcp_bucket/simple_example.py +0 -9
edsl-0.1.33.dev1.dist-info/RECORD +0 -209
{edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/LICENSE +0 -0
{edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/WHEEL +0 -0

edsl/jobs/interviews/InterviewExceptionEntry.py CHANGED Viewed

@@ -2,36 +2,98 @@ import traceback
 import datetime
 import time
 from collections import UserDict
-# traceback=traceback.format_exc(),
-# traceback = frame_summary_to_dict(traceback.extract_tb(e.__traceback__))
-# traceback = [frame_summary_to_dict(f) for f in traceback.extract_tb(e.__traceback__)]
+from edsl.jobs.FailedQuestion import FailedQuestion
 class InterviewExceptionEntry:
-    """Class to record an exception that occurred during the interview.
-    >>> entry = InterviewExceptionEntry.example()
-    >>> entry.to_dict()['exception']
-    "ValueError('An error occurred.')"
-    """
-    def __init__(self, exception: Exception, traceback_format="html"):
+    """Class to record an exception that occurred during the interview."""
+    def __init__(
+        self,
+        *,
+        exception: Exception,
+        # failed_question: FailedQuestion,
+        invigilator: "Invigilator",
+        traceback_format="text",
+    ):
         self.time = datetime.datetime.now().isoformat()
         self.exception = exception
+        # self.failed_question = failed_question
+        self.invigilator = invigilator
         self.traceback_format = traceback_format
+    @property
+    def question_type(self):
+        # return self.failed_question.question.question_type
+        return self.invigilator.question.question_type
+    @property
+    def name(self):
+        return repr(self.exception)
+    @property
+    def rendered_prompts(self):
+        return self.invigilator.get_prompts()
+    @property
+    def key_sequence(self):
+        return self.invigilator.model.key_sequence
+    @property
+    def generated_token_string(self):
+        # return "POO"
+        if self.invigilator.raw_model_response is None:
+            return "No raw model response available."
+        else:
+            return self.invigilator.model.get_generated_token_string(
+                self.invigilator.raw_model_response
+            )
+    @property
+    def raw_model_response(self):
+        import json
+        if self.invigilator.raw_model_response is None:
+            return "No raw model response available."
+        return json.dumps(self.invigilator.raw_model_response, indent=2)
     def __getitem__(self, key):
         # Support dict-like access obj['a']
         return str(getattr(self, key))
     @classmethod
     def example(cls):
-        try:
-            raise ValueError("An error occurred.")
-        except Exception as e:
-            entry = InterviewExceptionEntry(e)
-        return entry
+        from edsl import QuestionFreeText
+        from edsl.language_models import LanguageModel
+        m = LanguageModel.example(test_model=True)
+        q = QuestionFreeText.example(exception_to_throw=ValueError)
+        results = q.by(m).run(
+            skip_retry=True, print_exceptions=False, raise_validation_errors=True
+        )
+        return results.task_history.exceptions[0]["how_are_you"][0]
+    @property
+    def code_to_reproduce(self):
+        return self.code(run=False)
+    def code(self, run=True):
+        lines = []
+        lines.append("from edsl import Question, Model, Scenario, Agent")
+        lines.append(f"q = {repr(self.invigilator.question)}")
+        lines.append(f"scenario = {repr(self.invigilator.scenario)}")
+        lines.append(f"agent = {repr(self.invigilator.agent)}")
+        lines.append(f"m = Model('{self.invigilator.model.model}')")
+        lines.append("results = q.by(m).by(agent).by(scenario).run()")
+        code_str = "\n".join(lines)
+        if run:
+            # Create a new namespace to avoid polluting the global namespace
+            namespace = {}
+            exec(code_str, namespace)
+            return namespace["results"]
+        return code_str
     @property
     def traceback(self):
@@ -78,13 +140,15 @@ class InterviewExceptionEntry:
         >>> entry = InterviewExceptionEntry.example()
         >>> entry.to_dict()['exception']
-        "ValueError('An error occurred.')"
+        ValueError()
         """
         return {
-            "exception": repr(self.exception),
+            "exception": self.exception,
             "time": self.time,
             "traceback": self.traceback,
+            # "failed_question": self.failed_question.to_dict(),
+            "invigilator": self.invigilator.to_dict(),
         }
     def push(self):

edsl/jobs/interviews/interview_exception_tracking.py CHANGED Viewed

@@ -1,71 +1,7 @@
-import traceback
-import datetime
-import time
 from collections import UserDict
 from edsl.jobs.interviews.InterviewExceptionEntry import InterviewExceptionEntry
-#                 #traceback=traceback.format_exc(),
-#                 #traceback = frame_summary_to_dict(traceback.extract_tb(e.__traceback__))
-#                 #traceback = [frame_summary_to_dict(f) for f in traceback.extract_tb(e.__traceback__)]
-# class InterviewExceptionEntry:
-#     """Class to record an exception that occurred during the interview.
-#     >>> entry = InterviewExceptionEntry.example()
-#     >>> entry.to_dict()['exception']
-#     "ValueError('An error occurred.')"
-#     """
-#     def __init__(self, exception: Exception):
-#         self.time = datetime.datetime.now().isoformat()
-#         self.exception = exception
-#     def __getitem__(self, key):
-#         # Support dict-like access obj['a']
-#         return str(getattr(self, key))
-#     @classmethod
-#     def example(cls):
-#         try:
-#             raise ValueError("An error occurred.")
-#         except Exception as e:
-#             entry = InterviewExceptionEntry(e)
-#         return entry
-#     @property
-#     def traceback(self):
-#         """Return the exception as HTML."""
-#         e = self.exception
-#         tb_str = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
-#         return tb_str
-#     @property
-#     def html(self):
-#         from rich.console import Console
-#         from rich.table import Table
-#         from rich.traceback import Traceback
-#         from io import StringIO
-#         html_output = StringIO()
-#         console = Console(file=html_output, record=True)
-#         tb = Traceback(show_locals=True)
-#         console.print(tb)
-#         tb = Traceback.from_exception(type(self.exception), self.exception, self.exception.__traceback__, show_locals=True)
-#         console.print(tb)
-#         return html_output.getvalue()
-#     def to_dict(self) -> dict:
-#         """Return the exception as a dictionary."""
-#         return {
-#             'exception': repr(self.exception),
-#             'time': self.time,
-#             'traceback': self.traceback
-#         }
 class InterviewExceptionCollection(UserDict):
     """A collection of exceptions that occurred during the interview."""
@@ -80,12 +16,6 @@ class InterviewExceptionCollection(UserDict):
     def to_dict(self, include_traceback=True) -> dict:
         """Return the collection of exceptions as a dictionary."""
         newdata = {k: [e.to_dict() for e in v] for k, v in self.data.items()}
-        # if not include_traceback:
-        #     for question in newdata:
-        #         for exception in newdata[question]:
-        #             exception[
-        #                 "traceback"
-        #             ] = "Traceback removed. Set include_traceback=True to include."
         return newdata
     def _repr_html_(self) -> str:

edsl/jobs/interviews/retry_management.py CHANGED Viewed

@@ -18,9 +18,11 @@ def print_retry(retry_state, print_to_terminal=True):
     attempt_number = retry_state.attempt_number
     exception = retry_state.outcome.exception()
     wait_time = retry_state.next_action.sleep
+    exception_name = type(exception).__name__
     if print_to_terminal:
         print(
-            f"Attempt {attempt_number} failed with exception:" f"{exception}",
+            f"Attempt {attempt_number} failed with exception '{exception_name}':"
+            f"{exception}",
             f"now waiting {wait_time:.2f} seconds before retrying."
             f"Parameters: start={EDSL_BACKOFF_START_SEC}, max={EDSL_MAX_BACKOFF_SEC}, max_attempts={EDSL_MAX_ATTEMPTS}."
             "\n\n",

edsl/jobs/runners/JobsRunnerAsyncio.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from __future__ import annotations
 import time
+import math
 import asyncio
-import time
+import functools
+from typing import Coroutine, List, AsyncGenerator, Optional, Union, Generator
 from contextlib import contextmanager
-from typing import Coroutine, List, AsyncGenerator, Optional, Union
+from collections import UserList
 from edsl import shared_globals
 from edsl.jobs.interviews.Interview import Interview
@@ -12,12 +13,15 @@ from edsl.jobs.runners.JobsRunnerStatusMixin import JobsRunnerStatusMixin
 from edsl.jobs.tasks.TaskHistory import TaskHistory
 from edsl.jobs.buckets.BucketCollection import BucketCollection
 from edsl.utilities.decorators import jupyter_nb_handler
-import time
-import functools
+from edsl.data.Cache import Cache
+from edsl.results.Result import Result
+from edsl.results.Results import Results
+from edsl.jobs.FailedQuestion import FailedQuestion
 def cache_with_timeout(timeout):
+    """ "Used to keep the generate table from being run too frequetly."""
     def decorator(func):
         cached_result = {}
         last_computation_time = [0]  # Using list to store mutable value
@@ -35,10 +39,6 @@ def cache_with_timeout(timeout):
     return decorator
-# from queue import Queue
-from collections import UserList
 class StatusTracker(UserList):
     def __init__(self, total_tasks: int):
         self.total_tasks = total_tasks
@@ -55,7 +55,7 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
     The Jobs object is a collection of interviews that are to be run.
     """
-    def __init__(self, jobs: Jobs):
+    def __init__(self, jobs: "Jobs"):
         self.jobs = jobs
         # this creates the interviews, which can take a while
         self.interviews: List["Interview"] = jobs.interviews()
@@ -66,81 +66,69 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
         self,
         cache: "Cache",
         n: int = 1,
-        debug: bool = False,
         stop_on_exception: bool = False,
-        sidecar_model: "LanguageModel" = None,
+        sidecar_model: Optional["LanguageModel"] = None,
         total_interviews: Optional[List["Interview"]] = None,
+        raise_validation_errors: bool = False,
     ) -> AsyncGenerator["Result", None]:
         """Creates the tasks, runs them asynchronously, and returns the results as a Results object.
         Completed tasks are yielded as they are completed.
         :param n: how many times to run each interview
-        :param debug:
         :param stop_on_exception: Whether to stop the interview if an exception is raised
         :param sidecar_model: a language model to use in addition to the interview's model
         :param total_interviews: A list of interviews to run can be provided instead.
         """
         tasks = []
-        if total_interviews:
+        if total_interviews:  # was already passed in total interviews
             self.total_interviews = total_interviews
         else:
-            self._populate_total_interviews(
-                n=n
+            self.total_interviews = list(
+                self._populate_total_interviews(n=n)
             )  # Populate self.total_interviews before creating tasks
+        # print("Interviews created")
         for interview in self.total_interviews:
             interviewing_task = self._build_interview_task(
                 interview=interview,
-                debug=debug,
                 stop_on_exception=stop_on_exception,
                 sidecar_model=sidecar_model,
+                raise_validation_errors=raise_validation_errors,
             )
             tasks.append(asyncio.create_task(interviewing_task))
+        # print("Tasks created")
         for task in asyncio.as_completed(tasks):
+            # print(f"Task {task} completed")
             result = await task
             yield result
-    def _populate_total_interviews(self, n: int = 1) -> None:
+    def _populate_total_interviews(
+        self, n: int = 1
+    ) -> Generator["Interview", None, None]:
         """Populates self.total_interviews with n copies of each interview.
         :param n: how many times to run each interview.
         """
-        # TODO: Why not return a list of interviews instead of modifying the object?
-        self.total_interviews = []
         for interview in self.interviews:
             for iteration in range(n):
                 if iteration > 0:
-                    new_interview = interview.duplicate(
-                        iteration=iteration, cache=self.cache
-                    )
-                    self.total_interviews.append(new_interview)
+                    yield interview.duplicate(iteration=iteration, cache=self.cache)
                 else:
-                    interview.cache = (
-                        self.cache
-                    )  # set the cache for the first interview
-                    self.total_interviews.append(interview)
-    async def run_async(self, cache=None, n=1) -> Results:
-        from edsl.results.Results import Results
+                    interview.cache = self.cache
+                    yield interview
-        # breakpoint()
-        # tracker = StatusTracker(total_tasks=len(self.interviews))
-        if cache is None:
-            self.cache = Cache()
-        else:
-            self.cache = cache
+    async def run_async(self, cache: Optional["Cache"] = None, n: int = 1) -> Results:
+        self.cache = Cache() if cache is None else cache
         data = []
         async for result in self.run_async_generator(cache=self.cache, n=n):
             data.append(result)
         return Results(survey=self.jobs.survey, data=data)
     def simple_run(self):
-        from edsl.results.Results import Results
         data = asyncio.run(self.run_async())
         return Results(survey=self.jobs.survey, data=data)
@@ -148,14 +136,13 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
         self,
         *,
         interview: Interview,
-        debug: bool,
         stop_on_exception: bool = False,
-        sidecar_model: Optional[LanguageModel] = None,
-    ) -> Result:
+        sidecar_model: Optional["LanguageModel"] = None,
+        raise_validation_errors: bool = False,
+    ) -> "Result":
         """Conducts an interview and returns the result.
         :param interview: the interview to conduct
-        :param debug: prints debug messages
         :param stop_on_exception: stops the interview if an exception is raised
         :param sidecar_model: a language model to use in addition to the interview's model
         """
@@ -164,53 +151,93 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
         # get the results of the interview
         answer, valid_results = await interview.async_conduct_interview(
-            debug=debug,
             model_buckets=model_buckets,
             stop_on_exception=stop_on_exception,
             sidecar_model=sidecar_model,
+            raise_validation_errors=raise_validation_errors,
         )
-        # we should have a valid result for each question
-        answer_key_names = {k for k in set(answer.keys()) if not k.endswith("_comment")}
+        # answer_key_names = {
+        #     k
+        #     for k in set(answer.keys())
+        #     if not k.endswith("_comment") and not k.endswith("_generated_tokens")
+        # }
+        question_results = {}
+        for result in valid_results:
+            question_results[result.question_name] = result
+        answer_key_names = list(question_results.keys())
+        generated_tokens_dict = {
+            k + "_generated_tokens": question_results[k].generated_tokens
+            for k in answer_key_names
+        }
+        comments_dict = {
+            "k" + "_comment": question_results[k].comment for k in answer_key_names
+        }
+        # we should have a valid result for each question
+        answer_dict = {k: answer[k] for k in answer_key_names}
         assert len(valid_results) == len(answer_key_names)
+        # breakpoint()
+        # generated_tokens_dict = {
+        #     k + "_generated_tokens": v.generated_tokens
+        #     for k, v in zip(answer_key_names, valid_results)
+        # }
+        # comments_dict = {
+        #    k + "_comment": v.comment for k, v in zip(answer_key_names, valid_results)
+        # }
+        # breakpoint()
         # TODO: move this down into Interview
         question_name_to_prompts = dict({})
         for result in valid_results:
-            question_name = result["question_name"]
+            question_name = result.question_name
             question_name_to_prompts[question_name] = {
-                "user_prompt": result["prompts"]["user_prompt"],
-                "system_prompt": result["prompts"]["system_prompt"],
+                "user_prompt": result.prompts["user_prompt"],
+                "system_prompt": result.prompts["system_prompt"],
             }
         prompt_dictionary = {}
         for answer_key_name in answer_key_names:
-            prompt_dictionary[
-                answer_key_name + "_user_prompt"
-            ] = question_name_to_prompts[answer_key_name]["user_prompt"]
-            prompt_dictionary[
-                answer_key_name + "_system_prompt"
-            ] = question_name_to_prompts[answer_key_name]["system_prompt"]
+            prompt_dictionary[answer_key_name + "_user_prompt"] = (
+                question_name_to_prompts[answer_key_name]["user_prompt"]
+            )
+            prompt_dictionary[answer_key_name + "_system_prompt"] = (
+                question_name_to_prompts[answer_key_name]["system_prompt"]
+            )
         raw_model_results_dictionary = {}
         for result in valid_results:
-            question_name = result["question_name"]
-            raw_model_results_dictionary[
-                question_name + "_raw_model_response"
-            ] = result["raw_model_response"]
-        from edsl.results.Result import Result
+            question_name = result.question_name
+            raw_model_results_dictionary[question_name + "_raw_model_response"] = (
+                result.raw_model_response
+            )
+            raw_model_results_dictionary[question_name + "_cost"] = result.cost
+            one_use_buys = (
+                "NA"
+                if isinstance(result.cost, str)
+                or result.cost == 0
+                or result.cost is None
+                else 1.0 / result.cost
+            )
+            raw_model_results_dictionary[question_name + "_one_usd_buys"] = one_use_buys
+        # breakpoint()
         result = Result(
             agent=interview.agent,
             scenario=interview.scenario,
             model=interview.model,
             iteration=interview.iteration,
-            answer=answer,
+            answer=answer_dict,
             prompt=prompt_dictionary,
             raw_model_response=raw_model_results_dictionary,
             survey=interview.survey,
+            generated_tokens=generated_tokens_dict,
+            comments_dict=comments_dict,
         )
         result.interview_hash = hash(interview)
@@ -225,11 +252,11 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
         self,
         cache: Union[Cache, False, None],
         n: int = 1,
-        debug: bool = False,
         stop_on_exception: bool = False,
         progress_bar: bool = False,
         sidecar_model: Optional[LanguageModel] = None,
         print_exceptions: bool = True,
+        raise_validation_errors: bool = False,
     ) -> "Coroutine":
         """Runs a collection of interviews, handling both async and sync contexts."""
         from rich.console import Console
@@ -253,15 +280,15 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
             """Processes results from interviews."""
             async for result in self.run_async_generator(
                 n=n,
-                debug=debug,
                 stop_on_exception=stop_on_exception,
                 cache=cache,
                 sidecar_model=sidecar_model,
+                raise_validation_errors=raise_validation_errors,
             ):
                 self.results.append(result)
                 if progress_bar_context:
                     progress_bar_context.update(generate_table())
-                self.completed = True
+            self.completed = True
         async def update_progress_bar(progress_bar_context):
             """Updates the progress bar at fixed intervals."""
@@ -309,7 +336,11 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
                         progress_bar_context.update(generate_table())
         # puts results in the same order as the total interviews
-        interview_hashes = [hash(interview) for interview in self.total_interviews]
+        interview_lookup = {
+            hash(interview): index
+            for index, interview in enumerate(self.total_interviews)
+        }
+        interview_hashes = list(interview_lookup.keys())
         self.results = sorted(
             self.results, key=lambda x: interview_hashes.index(x.interview_hash)
         )
@@ -318,8 +349,12 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
         task_history = TaskHistory(self.total_interviews, include_traceback=False)
         results.task_history = task_history
+        results.failed_questions = {}
         results.has_exceptions = task_history.has_exceptions
+        # breakpoint()
+        results.bucket_collection = self.bucket_collection
         if results.has_exceptions:
             # put the failed interviews in the results object as a list
             failed_interviews = [
@@ -329,6 +364,15 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
                 for interview in self.total_interviews
                 if interview.has_exceptions
             ]
+            failed_questions = {}
+            for interview in self.total_interviews:
+                if interview.has_exceptions:
+                    index = interview_lookup[hash(interview)]
+                    failed_questions[index] = interview.failed_questions
+            results.failed_questions = failed_questions
             from edsl.jobs.Jobs import Jobs
             results.failed_jobs = Jobs.from_interviews(
@@ -343,7 +387,9 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
                 shared_globals["edsl_runner_exceptions"] = task_history
                 print(msg)
                 # this is where exceptions are opening up
-                task_history.html(cta="Open report to see details.")
+                task_history.html(
+                    cta="Open report to see details.", open_in_browser=True
+                )
                 print(
                     "Also see: https://docs.expectedparrot.com/en/latest/exceptions.html"
                 )

edsl/jobs/runners/JobsRunnerStatusMixin.py CHANGED Viewed

@@ -208,7 +208,7 @@ class JobsRunnerStatusMixin:
         >>> model = interviews[0].model
         >>> num_waiting = 0
         >>> JobsRunnerStatusMixin()._get_model_info(model, num_waiting, models_to_tokens)
-        ModelInfo(model_name='gpt-4-1106-preview', TPM_limit_k=480.0, RPM_limit_k=4.0, num_tasks_waiting=0, token_usage_info=[ModelTokenUsageStats(token_usage_type='new_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000'), ModelTokenUsageStats(token_usage_type='cached_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000')])
+        ModelInfo(model_name='...', TPM_limit_k=..., RPM_limit_k=..., num_tasks_waiting=0, token_usage_info=[ModelTokenUsageStats(token_usage_type='new_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000'), ModelTokenUsageStats(token_usage_type='cached_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000')])
         """
         ## TODO: This should probably be a coop method

edsl 0.1.33.dev1__py3-none-any.whl → 0.1.33.dev2__py3-none-any.whl

edsl 0.1.33.dev1py3-none-any.whl → 0.1.33.dev2py3-none-any.whl