edsl 0.1.33.dev1__py3-none-any.whl → 0.1.33.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. edsl/TemplateLoader.py +24 -0
  2. edsl/__init__.py +8 -4
  3. edsl/agents/Agent.py +46 -14
  4. edsl/agents/AgentList.py +43 -0
  5. edsl/agents/Invigilator.py +125 -212
  6. edsl/agents/InvigilatorBase.py +140 -32
  7. edsl/agents/PromptConstructionMixin.py +43 -66
  8. edsl/agents/__init__.py +1 -0
  9. edsl/auto/AutoStudy.py +117 -0
  10. edsl/auto/StageBase.py +230 -0
  11. edsl/auto/StageGenerateSurvey.py +178 -0
  12. edsl/auto/StageLabelQuestions.py +125 -0
  13. edsl/auto/StagePersona.py +61 -0
  14. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  15. edsl/auto/StagePersonaDimensionValues.py +74 -0
  16. edsl/auto/StagePersonaDimensions.py +69 -0
  17. edsl/auto/StageQuestions.py +73 -0
  18. edsl/auto/SurveyCreatorPipeline.py +21 -0
  19. edsl/auto/utilities.py +224 -0
  20. edsl/config.py +38 -39
  21. edsl/coop/PriceFetcher.py +58 -0
  22. edsl/coop/coop.py +39 -5
  23. edsl/data/Cache.py +35 -1
  24. edsl/data_transfer_models.py +120 -38
  25. edsl/enums.py +2 -0
  26. edsl/exceptions/language_models.py +25 -1
  27. edsl/exceptions/questions.py +62 -5
  28. edsl/exceptions/results.py +4 -0
  29. edsl/inference_services/AnthropicService.py +13 -11
  30. edsl/inference_services/AwsBedrock.py +19 -17
  31. edsl/inference_services/AzureAI.py +37 -20
  32. edsl/inference_services/GoogleService.py +16 -12
  33. edsl/inference_services/GroqService.py +2 -0
  34. edsl/inference_services/InferenceServiceABC.py +24 -0
  35. edsl/inference_services/MistralAIService.py +120 -0
  36. edsl/inference_services/OpenAIService.py +41 -50
  37. edsl/inference_services/TestService.py +71 -0
  38. edsl/inference_services/models_available_cache.py +0 -6
  39. edsl/inference_services/registry.py +4 -0
  40. edsl/jobs/Answers.py +10 -12
  41. edsl/jobs/FailedQuestion.py +78 -0
  42. edsl/jobs/Jobs.py +18 -13
  43. edsl/jobs/buckets/TokenBucket.py +39 -14
  44. edsl/jobs/interviews/Interview.py +297 -77
  45. edsl/jobs/interviews/InterviewExceptionEntry.py +83 -19
  46. edsl/jobs/interviews/interview_exception_tracking.py +0 -70
  47. edsl/jobs/interviews/retry_management.py +3 -1
  48. edsl/jobs/runners/JobsRunnerAsyncio.py +116 -70
  49. edsl/jobs/runners/JobsRunnerStatusMixin.py +1 -1
  50. edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
  51. edsl/jobs/tasks/TaskHistory.py +131 -213
  52. edsl/language_models/LanguageModel.py +239 -129
  53. edsl/language_models/ModelList.py +2 -2
  54. edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
  55. edsl/language_models/fake_openai_call.py +15 -0
  56. edsl/language_models/fake_openai_service.py +61 -0
  57. edsl/language_models/registry.py +15 -2
  58. edsl/language_models/repair.py +0 -19
  59. edsl/language_models/utilities.py +61 -0
  60. edsl/prompts/Prompt.py +52 -2
  61. edsl/questions/AnswerValidatorMixin.py +23 -26
  62. edsl/questions/QuestionBase.py +273 -242
  63. edsl/questions/QuestionBaseGenMixin.py +133 -0
  64. edsl/questions/QuestionBasePromptsMixin.py +266 -0
  65. edsl/questions/QuestionBudget.py +6 -0
  66. edsl/questions/QuestionCheckBox.py +227 -35
  67. edsl/questions/QuestionExtract.py +98 -27
  68. edsl/questions/QuestionFreeText.py +46 -29
  69. edsl/questions/QuestionFunctional.py +7 -0
  70. edsl/questions/QuestionList.py +141 -22
  71. edsl/questions/QuestionMultipleChoice.py +173 -64
  72. edsl/questions/QuestionNumerical.py +87 -46
  73. edsl/questions/QuestionRank.py +182 -24
  74. edsl/questions/RegisterQuestionsMeta.py +31 -12
  75. edsl/questions/ResponseValidatorABC.py +169 -0
  76. edsl/questions/__init__.py +3 -4
  77. edsl/questions/decorators.py +21 -0
  78. edsl/questions/derived/QuestionLikertFive.py +10 -5
  79. edsl/questions/derived/QuestionLinearScale.py +11 -1
  80. edsl/questions/derived/QuestionTopK.py +6 -0
  81. edsl/questions/derived/QuestionYesNo.py +16 -1
  82. edsl/questions/descriptors.py +43 -7
  83. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  84. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  85. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  86. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  87. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  88. edsl/questions/prompt_templates/question_list.jinja +17 -0
  89. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  90. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  91. edsl/questions/question_registry.py +6 -2
  92. edsl/questions/templates/__init__.py +0 -0
  93. edsl/questions/templates/checkbox/__init__.py +0 -0
  94. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  95. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  96. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  97. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  98. edsl/questions/templates/free_text/__init__.py +0 -0
  99. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  100. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  101. edsl/questions/templates/likert_five/__init__.py +0 -0
  102. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  103. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  104. edsl/questions/templates/linear_scale/__init__.py +0 -0
  105. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  106. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  107. edsl/questions/templates/list/__init__.py +0 -0
  108. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  109. edsl/questions/templates/list/question_presentation.jinja +5 -0
  110. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  111. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  112. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  113. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  114. edsl/questions/templates/numerical/__init__.py +0 -0
  115. edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
  116. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  117. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  118. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  119. edsl/questions/templates/top_k/__init__.py +0 -0
  120. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  121. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  122. edsl/questions/templates/yes_no/__init__.py +0 -0
  123. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  124. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  125. edsl/results/Dataset.py +20 -0
  126. edsl/results/DatasetExportMixin.py +41 -47
  127. edsl/results/DatasetTree.py +145 -0
  128. edsl/results/Result.py +32 -5
  129. edsl/results/Results.py +131 -45
  130. edsl/results/ResultsDBMixin.py +3 -3
  131. edsl/results/Selector.py +118 -0
  132. edsl/results/tree_explore.py +115 -0
  133. edsl/scenarios/Scenario.py +10 -4
  134. edsl/scenarios/ScenarioList.py +348 -39
  135. edsl/scenarios/ScenarioListExportMixin.py +9 -0
  136. edsl/study/SnapShot.py +8 -1
  137. edsl/surveys/RuleCollection.py +2 -2
  138. edsl/surveys/Survey.py +634 -315
  139. edsl/surveys/SurveyExportMixin.py +71 -9
  140. edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
  141. edsl/surveys/SurveyQualtricsImport.py +75 -4
  142. edsl/surveys/instructions/ChangeInstruction.py +47 -0
  143. edsl/surveys/instructions/Instruction.py +34 -0
  144. edsl/surveys/instructions/InstructionCollection.py +77 -0
  145. edsl/surveys/instructions/__init__.py +0 -0
  146. edsl/templates/error_reporting/base.html +24 -0
  147. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  148. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  149. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  150. edsl/templates/error_reporting/interview_details.html +111 -0
  151. edsl/templates/error_reporting/interviews.html +10 -0
  152. edsl/templates/error_reporting/overview.html +5 -0
  153. edsl/templates/error_reporting/performance_plot.html +2 -0
  154. edsl/templates/error_reporting/report.css +74 -0
  155. edsl/templates/error_reporting/report.html +118 -0
  156. edsl/templates/error_reporting/report.js +25 -0
  157. {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/METADATA +4 -2
  158. edsl-0.1.33.dev2.dist-info/RECORD +289 -0
  159. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -286
  160. edsl/utilities/gcp_bucket/simple_example.py +0 -9
  161. edsl-0.1.33.dev1.dist-info/RECORD +0 -209
  162. {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/LICENSE +0 -0
  163. {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/WHEEL +0 -0
@@ -2,36 +2,98 @@ import traceback
2
2
  import datetime
3
3
  import time
4
4
  from collections import UserDict
5
-
6
- # traceback=traceback.format_exc(),
7
- # traceback = frame_summary_to_dict(traceback.extract_tb(e.__traceback__))
8
- # traceback = [frame_summary_to_dict(f) for f in traceback.extract_tb(e.__traceback__)]
5
+ from edsl.jobs.FailedQuestion import FailedQuestion
9
6
 
10
7
 
11
8
  class InterviewExceptionEntry:
12
- """Class to record an exception that occurred during the interview.
13
-
14
- >>> entry = InterviewExceptionEntry.example()
15
- >>> entry.to_dict()['exception']
16
- "ValueError('An error occurred.')"
17
- """
18
-
19
- def __init__(self, exception: Exception, traceback_format="html"):
9
+ """Class to record an exception that occurred during the interview."""
10
+
11
+ def __init__(
12
+ self,
13
+ *,
14
+ exception: Exception,
15
+ # failed_question: FailedQuestion,
16
+ invigilator: "Invigilator",
17
+ traceback_format="text",
18
+ ):
20
19
  self.time = datetime.datetime.now().isoformat()
21
20
  self.exception = exception
21
+ # self.failed_question = failed_question
22
+ self.invigilator = invigilator
22
23
  self.traceback_format = traceback_format
23
24
 
25
+ @property
26
+ def question_type(self):
27
+ # return self.failed_question.question.question_type
28
+ return self.invigilator.question.question_type
29
+
30
+ @property
31
+ def name(self):
32
+ return repr(self.exception)
33
+
34
+ @property
35
+ def rendered_prompts(self):
36
+ return self.invigilator.get_prompts()
37
+
38
+ @property
39
+ def key_sequence(self):
40
+ return self.invigilator.model.key_sequence
41
+
42
+ @property
43
+ def generated_token_string(self):
44
+ # return "POO"
45
+ if self.invigilator.raw_model_response is None:
46
+ return "No raw model response available."
47
+ else:
48
+ return self.invigilator.model.get_generated_token_string(
49
+ self.invigilator.raw_model_response
50
+ )
51
+
52
+ @property
53
+ def raw_model_response(self):
54
+ import json
55
+
56
+ if self.invigilator.raw_model_response is None:
57
+ return "No raw model response available."
58
+ return json.dumps(self.invigilator.raw_model_response, indent=2)
59
+
24
60
  def __getitem__(self, key):
25
61
  # Support dict-like access obj['a']
26
62
  return str(getattr(self, key))
27
63
 
28
64
  @classmethod
29
65
  def example(cls):
30
- try:
31
- raise ValueError("An error occurred.")
32
- except Exception as e:
33
- entry = InterviewExceptionEntry(e)
34
- return entry
66
+ from edsl import QuestionFreeText
67
+ from edsl.language_models import LanguageModel
68
+
69
+ m = LanguageModel.example(test_model=True)
70
+ q = QuestionFreeText.example(exception_to_throw=ValueError)
71
+ results = q.by(m).run(
72
+ skip_retry=True, print_exceptions=False, raise_validation_errors=True
73
+ )
74
+ return results.task_history.exceptions[0]["how_are_you"][0]
75
+
76
+ @property
77
+ def code_to_reproduce(self):
78
+ return self.code(run=False)
79
+
80
+ def code(self, run=True):
81
+ lines = []
82
+ lines.append("from edsl import Question, Model, Scenario, Agent")
83
+
84
+ lines.append(f"q = {repr(self.invigilator.question)}")
85
+ lines.append(f"scenario = {repr(self.invigilator.scenario)}")
86
+ lines.append(f"agent = {repr(self.invigilator.agent)}")
87
+ lines.append(f"m = Model('{self.invigilator.model.model}')")
88
+ lines.append("results = q.by(m).by(agent).by(scenario).run()")
89
+ code_str = "\n".join(lines)
90
+
91
+ if run:
92
+ # Create a new namespace to avoid polluting the global namespace
93
+ namespace = {}
94
+ exec(code_str, namespace)
95
+ return namespace["results"]
96
+ return code_str
35
97
 
36
98
  @property
37
99
  def traceback(self):
@@ -78,13 +140,15 @@ class InterviewExceptionEntry:
78
140
 
79
141
  >>> entry = InterviewExceptionEntry.example()
80
142
  >>> entry.to_dict()['exception']
81
- "ValueError('An error occurred.')"
143
+ ValueError()
82
144
 
83
145
  """
84
146
  return {
85
- "exception": repr(self.exception),
147
+ "exception": self.exception,
86
148
  "time": self.time,
87
149
  "traceback": self.traceback,
150
+ # "failed_question": self.failed_question.to_dict(),
151
+ "invigilator": self.invigilator.to_dict(),
88
152
  }
89
153
 
90
154
  def push(self):
@@ -1,71 +1,7 @@
1
- import traceback
2
- import datetime
3
- import time
4
1
  from collections import UserDict
5
2
 
6
3
  from edsl.jobs.interviews.InterviewExceptionEntry import InterviewExceptionEntry
7
4
 
8
- # #traceback=traceback.format_exc(),
9
- # #traceback = frame_summary_to_dict(traceback.extract_tb(e.__traceback__))
10
- # #traceback = [frame_summary_to_dict(f) for f in traceback.extract_tb(e.__traceback__)]
11
-
12
- # class InterviewExceptionEntry:
13
- # """Class to record an exception that occurred during the interview.
14
-
15
- # >>> entry = InterviewExceptionEntry.example()
16
- # >>> entry.to_dict()['exception']
17
- # "ValueError('An error occurred.')"
18
- # """
19
-
20
- # def __init__(self, exception: Exception):
21
- # self.time = datetime.datetime.now().isoformat()
22
- # self.exception = exception
23
-
24
- # def __getitem__(self, key):
25
- # # Support dict-like access obj['a']
26
- # return str(getattr(self, key))
27
-
28
- # @classmethod
29
- # def example(cls):
30
- # try:
31
- # raise ValueError("An error occurred.")
32
- # except Exception as e:
33
- # entry = InterviewExceptionEntry(e)
34
- # return entry
35
-
36
- # @property
37
- # def traceback(self):
38
- # """Return the exception as HTML."""
39
- # e = self.exception
40
- # tb_str = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
41
- # return tb_str
42
-
43
-
44
- # @property
45
- # def html(self):
46
- # from rich.console import Console
47
- # from rich.table import Table
48
- # from rich.traceback import Traceback
49
-
50
- # from io import StringIO
51
- # html_output = StringIO()
52
-
53
- # console = Console(file=html_output, record=True)
54
- # tb = Traceback(show_locals=True)
55
- # console.print(tb)
56
-
57
- # tb = Traceback.from_exception(type(self.exception), self.exception, self.exception.__traceback__, show_locals=True)
58
- # console.print(tb)
59
- # return html_output.getvalue()
60
-
61
- # def to_dict(self) -> dict:
62
- # """Return the exception as a dictionary."""
63
- # return {
64
- # 'exception': repr(self.exception),
65
- # 'time': self.time,
66
- # 'traceback': self.traceback
67
- # }
68
-
69
5
 
70
6
  class InterviewExceptionCollection(UserDict):
71
7
  """A collection of exceptions that occurred during the interview."""
@@ -80,12 +16,6 @@ class InterviewExceptionCollection(UserDict):
80
16
  def to_dict(self, include_traceback=True) -> dict:
81
17
  """Return the collection of exceptions as a dictionary."""
82
18
  newdata = {k: [e.to_dict() for e in v] for k, v in self.data.items()}
83
- # if not include_traceback:
84
- # for question in newdata:
85
- # for exception in newdata[question]:
86
- # exception[
87
- # "traceback"
88
- # ] = "Traceback removed. Set include_traceback=True to include."
89
19
  return newdata
90
20
 
91
21
  def _repr_html_(self) -> str:
@@ -18,9 +18,11 @@ def print_retry(retry_state, print_to_terminal=True):
18
18
  attempt_number = retry_state.attempt_number
19
19
  exception = retry_state.outcome.exception()
20
20
  wait_time = retry_state.next_action.sleep
21
+ exception_name = type(exception).__name__
21
22
  if print_to_terminal:
22
23
  print(
23
- f"Attempt {attempt_number} failed with exception:" f"{exception}",
24
+ f"Attempt {attempt_number} failed with exception '{exception_name}':"
25
+ f"{exception}",
24
26
  f"now waiting {wait_time:.2f} seconds before retrying."
25
27
  f"Parameters: start={EDSL_BACKOFF_START_SEC}, max={EDSL_MAX_BACKOFF_SEC}, max_attempts={EDSL_MAX_ATTEMPTS}."
26
28
  "\n\n",
@@ -1,10 +1,11 @@
1
1
  from __future__ import annotations
2
2
  import time
3
+ import math
3
4
  import asyncio
4
- import time
5
+ import functools
6
+ from typing import Coroutine, List, AsyncGenerator, Optional, Union, Generator
5
7
  from contextlib import contextmanager
6
-
7
- from typing import Coroutine, List, AsyncGenerator, Optional, Union
8
+ from collections import UserList
8
9
 
9
10
  from edsl import shared_globals
10
11
  from edsl.jobs.interviews.Interview import Interview
@@ -12,12 +13,15 @@ from edsl.jobs.runners.JobsRunnerStatusMixin import JobsRunnerStatusMixin
12
13
  from edsl.jobs.tasks.TaskHistory import TaskHistory
13
14
  from edsl.jobs.buckets.BucketCollection import BucketCollection
14
15
  from edsl.utilities.decorators import jupyter_nb_handler
15
-
16
- import time
17
- import functools
16
+ from edsl.data.Cache import Cache
17
+ from edsl.results.Result import Result
18
+ from edsl.results.Results import Results
19
+ from edsl.jobs.FailedQuestion import FailedQuestion
18
20
 
19
21
 
20
22
  def cache_with_timeout(timeout):
23
+ """ "Used to keep the generate table from being run too frequetly."""
24
+
21
25
  def decorator(func):
22
26
  cached_result = {}
23
27
  last_computation_time = [0] # Using list to store mutable value
@@ -35,10 +39,6 @@ def cache_with_timeout(timeout):
35
39
  return decorator
36
40
 
37
41
 
38
- # from queue import Queue
39
- from collections import UserList
40
-
41
-
42
42
  class StatusTracker(UserList):
43
43
  def __init__(self, total_tasks: int):
44
44
  self.total_tasks = total_tasks
@@ -55,7 +55,7 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
55
55
  The Jobs object is a collection of interviews that are to be run.
56
56
  """
57
57
 
58
- def __init__(self, jobs: Jobs):
58
+ def __init__(self, jobs: "Jobs"):
59
59
  self.jobs = jobs
60
60
  # this creates the interviews, which can take a while
61
61
  self.interviews: List["Interview"] = jobs.interviews()
@@ -66,81 +66,69 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
66
66
  self,
67
67
  cache: "Cache",
68
68
  n: int = 1,
69
- debug: bool = False,
70
69
  stop_on_exception: bool = False,
71
- sidecar_model: "LanguageModel" = None,
70
+ sidecar_model: Optional["LanguageModel"] = None,
72
71
  total_interviews: Optional[List["Interview"]] = None,
72
+ raise_validation_errors: bool = False,
73
73
  ) -> AsyncGenerator["Result", None]:
74
74
  """Creates the tasks, runs them asynchronously, and returns the results as a Results object.
75
75
 
76
76
  Completed tasks are yielded as they are completed.
77
77
 
78
78
  :param n: how many times to run each interview
79
- :param debug:
80
79
  :param stop_on_exception: Whether to stop the interview if an exception is raised
81
80
  :param sidecar_model: a language model to use in addition to the interview's model
82
81
  :param total_interviews: A list of interviews to run can be provided instead.
83
82
  """
84
83
  tasks = []
85
- if total_interviews:
84
+ if total_interviews: # was already passed in total interviews
86
85
  self.total_interviews = total_interviews
87
86
  else:
88
- self._populate_total_interviews(
89
- n=n
87
+ self.total_interviews = list(
88
+ self._populate_total_interviews(n=n)
90
89
  ) # Populate self.total_interviews before creating tasks
91
90
 
91
+ # print("Interviews created")
92
+
92
93
  for interview in self.total_interviews:
93
94
  interviewing_task = self._build_interview_task(
94
95
  interview=interview,
95
- debug=debug,
96
96
  stop_on_exception=stop_on_exception,
97
97
  sidecar_model=sidecar_model,
98
+ raise_validation_errors=raise_validation_errors,
98
99
  )
99
100
  tasks.append(asyncio.create_task(interviewing_task))
100
101
 
102
+ # print("Tasks created")
103
+
101
104
  for task in asyncio.as_completed(tasks):
105
+ # print(f"Task {task} completed")
102
106
  result = await task
103
107
  yield result
104
108
 
105
- def _populate_total_interviews(self, n: int = 1) -> None:
109
+ def _populate_total_interviews(
110
+ self, n: int = 1
111
+ ) -> Generator["Interview", None, None]:
106
112
  """Populates self.total_interviews with n copies of each interview.
107
113
 
108
114
  :param n: how many times to run each interview.
109
115
  """
110
- # TODO: Why not return a list of interviews instead of modifying the object?
111
-
112
- self.total_interviews = []
113
116
  for interview in self.interviews:
114
117
  for iteration in range(n):
115
118
  if iteration > 0:
116
- new_interview = interview.duplicate(
117
- iteration=iteration, cache=self.cache
118
- )
119
- self.total_interviews.append(new_interview)
119
+ yield interview.duplicate(iteration=iteration, cache=self.cache)
120
120
  else:
121
- interview.cache = (
122
- self.cache
123
- ) # set the cache for the first interview
124
- self.total_interviews.append(interview)
125
-
126
- async def run_async(self, cache=None, n=1) -> Results:
127
- from edsl.results.Results import Results
121
+ interview.cache = self.cache
122
+ yield interview
128
123
 
129
- # breakpoint()
130
- # tracker = StatusTracker(total_tasks=len(self.interviews))
131
-
132
- if cache is None:
133
- self.cache = Cache()
134
- else:
135
- self.cache = cache
124
+ async def run_async(self, cache: Optional["Cache"] = None, n: int = 1) -> Results:
125
+ self.cache = Cache() if cache is None else cache
136
126
  data = []
137
127
  async for result in self.run_async_generator(cache=self.cache, n=n):
138
128
  data.append(result)
139
129
  return Results(survey=self.jobs.survey, data=data)
140
130
 
141
131
  def simple_run(self):
142
- from edsl.results.Results import Results
143
-
144
132
  data = asyncio.run(self.run_async())
145
133
  return Results(survey=self.jobs.survey, data=data)
146
134
 
@@ -148,14 +136,13 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
148
136
  self,
149
137
  *,
150
138
  interview: Interview,
151
- debug: bool,
152
139
  stop_on_exception: bool = False,
153
- sidecar_model: Optional[LanguageModel] = None,
154
- ) -> Result:
140
+ sidecar_model: Optional["LanguageModel"] = None,
141
+ raise_validation_errors: bool = False,
142
+ ) -> "Result":
155
143
  """Conducts an interview and returns the result.
156
144
 
157
145
  :param interview: the interview to conduct
158
- :param debug: prints debug messages
159
146
  :param stop_on_exception: stops the interview if an exception is raised
160
147
  :param sidecar_model: a language model to use in addition to the interview's model
161
148
  """
@@ -164,53 +151,93 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
164
151
 
165
152
  # get the results of the interview
166
153
  answer, valid_results = await interview.async_conduct_interview(
167
- debug=debug,
168
154
  model_buckets=model_buckets,
169
155
  stop_on_exception=stop_on_exception,
170
156
  sidecar_model=sidecar_model,
157
+ raise_validation_errors=raise_validation_errors,
171
158
  )
172
159
 
173
- # we should have a valid result for each question
174
- answer_key_names = {k for k in set(answer.keys()) if not k.endswith("_comment")}
160
+ # answer_key_names = {
161
+ # k
162
+ # for k in set(answer.keys())
163
+ # if not k.endswith("_comment") and not k.endswith("_generated_tokens")
164
+ # }
165
+
166
+ question_results = {}
167
+ for result in valid_results:
168
+ question_results[result.question_name] = result
169
+
170
+ answer_key_names = list(question_results.keys())
171
+
172
+ generated_tokens_dict = {
173
+ k + "_generated_tokens": question_results[k].generated_tokens
174
+ for k in answer_key_names
175
+ }
176
+ comments_dict = {
177
+ "k" + "_comment": question_results[k].comment for k in answer_key_names
178
+ }
175
179
 
180
+ # we should have a valid result for each question
181
+ answer_dict = {k: answer[k] for k in answer_key_names}
176
182
  assert len(valid_results) == len(answer_key_names)
177
183
 
184
+ # breakpoint()
185
+ # generated_tokens_dict = {
186
+ # k + "_generated_tokens": v.generated_tokens
187
+ # for k, v in zip(answer_key_names, valid_results)
188
+ # }
189
+
190
+ # comments_dict = {
191
+ # k + "_comment": v.comment for k, v in zip(answer_key_names, valid_results)
192
+ # }
193
+ # breakpoint()
194
+
178
195
  # TODO: move this down into Interview
179
196
  question_name_to_prompts = dict({})
180
197
  for result in valid_results:
181
- question_name = result["question_name"]
198
+ question_name = result.question_name
182
199
  question_name_to_prompts[question_name] = {
183
- "user_prompt": result["prompts"]["user_prompt"],
184
- "system_prompt": result["prompts"]["system_prompt"],
200
+ "user_prompt": result.prompts["user_prompt"],
201
+ "system_prompt": result.prompts["system_prompt"],
185
202
  }
186
203
 
187
204
  prompt_dictionary = {}
188
205
  for answer_key_name in answer_key_names:
189
- prompt_dictionary[
190
- answer_key_name + "_user_prompt"
191
- ] = question_name_to_prompts[answer_key_name]["user_prompt"]
192
- prompt_dictionary[
193
- answer_key_name + "_system_prompt"
194
- ] = question_name_to_prompts[answer_key_name]["system_prompt"]
206
+ prompt_dictionary[answer_key_name + "_user_prompt"] = (
207
+ question_name_to_prompts[answer_key_name]["user_prompt"]
208
+ )
209
+ prompt_dictionary[answer_key_name + "_system_prompt"] = (
210
+ question_name_to_prompts[answer_key_name]["system_prompt"]
211
+ )
195
212
 
196
213
  raw_model_results_dictionary = {}
197
214
  for result in valid_results:
198
- question_name = result["question_name"]
199
- raw_model_results_dictionary[
200
- question_name + "_raw_model_response"
201
- ] = result["raw_model_response"]
202
-
203
- from edsl.results.Result import Result
215
+ question_name = result.question_name
216
+ raw_model_results_dictionary[question_name + "_raw_model_response"] = (
217
+ result.raw_model_response
218
+ )
219
+ raw_model_results_dictionary[question_name + "_cost"] = result.cost
220
+ one_use_buys = (
221
+ "NA"
222
+ if isinstance(result.cost, str)
223
+ or result.cost == 0
224
+ or result.cost is None
225
+ else 1.0 / result.cost
226
+ )
227
+ raw_model_results_dictionary[question_name + "_one_usd_buys"] = one_use_buys
204
228
 
229
+ # breakpoint()
205
230
  result = Result(
206
231
  agent=interview.agent,
207
232
  scenario=interview.scenario,
208
233
  model=interview.model,
209
234
  iteration=interview.iteration,
210
- answer=answer,
235
+ answer=answer_dict,
211
236
  prompt=prompt_dictionary,
212
237
  raw_model_response=raw_model_results_dictionary,
213
238
  survey=interview.survey,
239
+ generated_tokens=generated_tokens_dict,
240
+ comments_dict=comments_dict,
214
241
  )
215
242
  result.interview_hash = hash(interview)
216
243
 
@@ -225,11 +252,11 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
225
252
  self,
226
253
  cache: Union[Cache, False, None],
227
254
  n: int = 1,
228
- debug: bool = False,
229
255
  stop_on_exception: bool = False,
230
256
  progress_bar: bool = False,
231
257
  sidecar_model: Optional[LanguageModel] = None,
232
258
  print_exceptions: bool = True,
259
+ raise_validation_errors: bool = False,
233
260
  ) -> "Coroutine":
234
261
  """Runs a collection of interviews, handling both async and sync contexts."""
235
262
  from rich.console import Console
@@ -253,15 +280,15 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
253
280
  """Processes results from interviews."""
254
281
  async for result in self.run_async_generator(
255
282
  n=n,
256
- debug=debug,
257
283
  stop_on_exception=stop_on_exception,
258
284
  cache=cache,
259
285
  sidecar_model=sidecar_model,
286
+ raise_validation_errors=raise_validation_errors,
260
287
  ):
261
288
  self.results.append(result)
262
289
  if progress_bar_context:
263
290
  progress_bar_context.update(generate_table())
264
- self.completed = True
291
+ self.completed = True
265
292
 
266
293
  async def update_progress_bar(progress_bar_context):
267
294
  """Updates the progress bar at fixed intervals."""
@@ -309,7 +336,11 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
309
336
  progress_bar_context.update(generate_table())
310
337
 
311
338
  # puts results in the same order as the total interviews
312
- interview_hashes = [hash(interview) for interview in self.total_interviews]
339
+ interview_lookup = {
340
+ hash(interview): index
341
+ for index, interview in enumerate(self.total_interviews)
342
+ }
343
+ interview_hashes = list(interview_lookup.keys())
313
344
  self.results = sorted(
314
345
  self.results, key=lambda x: interview_hashes.index(x.interview_hash)
315
346
  )
@@ -318,8 +349,12 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
318
349
  task_history = TaskHistory(self.total_interviews, include_traceback=False)
319
350
  results.task_history = task_history
320
351
 
352
+ results.failed_questions = {}
321
353
  results.has_exceptions = task_history.has_exceptions
322
354
 
355
+ # breakpoint()
356
+ results.bucket_collection = self.bucket_collection
357
+
323
358
  if results.has_exceptions:
324
359
  # put the failed interviews in the results object as a list
325
360
  failed_interviews = [
@@ -329,6 +364,15 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
329
364
  for interview in self.total_interviews
330
365
  if interview.has_exceptions
331
366
  ]
367
+
368
+ failed_questions = {}
369
+ for interview in self.total_interviews:
370
+ if interview.has_exceptions:
371
+ index = interview_lookup[hash(interview)]
372
+ failed_questions[index] = interview.failed_questions
373
+
374
+ results.failed_questions = failed_questions
375
+
332
376
  from edsl.jobs.Jobs import Jobs
333
377
 
334
378
  results.failed_jobs = Jobs.from_interviews(
@@ -343,7 +387,9 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
343
387
  shared_globals["edsl_runner_exceptions"] = task_history
344
388
  print(msg)
345
389
  # this is where exceptions are opening up
346
- task_history.html(cta="Open report to see details.")
390
+ task_history.html(
391
+ cta="Open report to see details.", open_in_browser=True
392
+ )
347
393
  print(
348
394
  "Also see: https://docs.expectedparrot.com/en/latest/exceptions.html"
349
395
  )
@@ -208,7 +208,7 @@ class JobsRunnerStatusMixin:
208
208
  >>> model = interviews[0].model
209
209
  >>> num_waiting = 0
210
210
  >>> JobsRunnerStatusMixin()._get_model_info(model, num_waiting, models_to_tokens)
211
- ModelInfo(model_name='gpt-4-1106-preview', TPM_limit_k=480.0, RPM_limit_k=4.0, num_tasks_waiting=0, token_usage_info=[ModelTokenUsageStats(token_usage_type='new_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000'), ModelTokenUsageStats(token_usage_type='cached_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000')])
211
+ ModelInfo(model_name='...', TPM_limit_k=..., RPM_limit_k=..., num_tasks_waiting=0, token_usage_info=[ModelTokenUsageStats(token_usage_type='new_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000'), ModelTokenUsageStats(token_usage_type='cached_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000')])
212
212
  """
213
213
 
214
214
  ## TODO: This should probably be a coop method