edsl 0.1.31.dev4__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. edsl/Base.py +9 -3
  2. edsl/TemplateLoader.py +24 -0
  3. edsl/__init__.py +8 -3
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +40 -8
  6. edsl/agents/AgentList.py +43 -0
  7. edsl/agents/Invigilator.py +136 -221
  8. edsl/agents/InvigilatorBase.py +148 -59
  9. edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +154 -85
  10. edsl/agents/__init__.py +1 -0
  11. edsl/auto/AutoStudy.py +117 -0
  12. edsl/auto/StageBase.py +230 -0
  13. edsl/auto/StageGenerateSurvey.py +178 -0
  14. edsl/auto/StageLabelQuestions.py +125 -0
  15. edsl/auto/StagePersona.py +61 -0
  16. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  17. edsl/auto/StagePersonaDimensionValues.py +74 -0
  18. edsl/auto/StagePersonaDimensions.py +69 -0
  19. edsl/auto/StageQuestions.py +73 -0
  20. edsl/auto/SurveyCreatorPipeline.py +21 -0
  21. edsl/auto/utilities.py +224 -0
  22. edsl/config.py +48 -47
  23. edsl/conjure/Conjure.py +6 -0
  24. edsl/coop/PriceFetcher.py +58 -0
  25. edsl/coop/coop.py +50 -7
  26. edsl/data/Cache.py +35 -1
  27. edsl/data/CacheHandler.py +3 -4
  28. edsl/data_transfer_models.py +73 -38
  29. edsl/enums.py +8 -0
  30. edsl/exceptions/general.py +10 -8
  31. edsl/exceptions/language_models.py +25 -1
  32. edsl/exceptions/questions.py +62 -5
  33. edsl/exceptions/results.py +4 -0
  34. edsl/inference_services/AnthropicService.py +13 -11
  35. edsl/inference_services/AwsBedrock.py +112 -0
  36. edsl/inference_services/AzureAI.py +214 -0
  37. edsl/inference_services/DeepInfraService.py +4 -3
  38. edsl/inference_services/GoogleService.py +16 -12
  39. edsl/inference_services/GroqService.py +5 -4
  40. edsl/inference_services/InferenceServiceABC.py +58 -3
  41. edsl/inference_services/InferenceServicesCollection.py +13 -8
  42. edsl/inference_services/MistralAIService.py +120 -0
  43. edsl/inference_services/OllamaService.py +18 -0
  44. edsl/inference_services/OpenAIService.py +55 -56
  45. edsl/inference_services/TestService.py +80 -0
  46. edsl/inference_services/TogetherAIService.py +170 -0
  47. edsl/inference_services/models_available_cache.py +25 -0
  48. edsl/inference_services/registry.py +19 -1
  49. edsl/jobs/Answers.py +10 -12
  50. edsl/jobs/FailedQuestion.py +78 -0
  51. edsl/jobs/Jobs.py +137 -41
  52. edsl/jobs/buckets/BucketCollection.py +24 -15
  53. edsl/jobs/buckets/TokenBucket.py +105 -18
  54. edsl/jobs/interviews/Interview.py +393 -83
  55. edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +22 -18
  56. edsl/jobs/interviews/InterviewExceptionEntry.py +167 -0
  57. edsl/jobs/runners/JobsRunnerAsyncio.py +152 -160
  58. edsl/jobs/runners/JobsRunnerStatus.py +331 -0
  59. edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
  60. edsl/jobs/tasks/TaskCreators.py +1 -1
  61. edsl/jobs/tasks/TaskHistory.py +205 -126
  62. edsl/language_models/LanguageModel.py +297 -177
  63. edsl/language_models/ModelList.py +2 -2
  64. edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
  65. edsl/language_models/fake_openai_call.py +15 -0
  66. edsl/language_models/fake_openai_service.py +61 -0
  67. edsl/language_models/registry.py +25 -8
  68. edsl/language_models/repair.py +0 -19
  69. edsl/language_models/utilities.py +61 -0
  70. edsl/notebooks/Notebook.py +20 -2
  71. edsl/prompts/Prompt.py +52 -2
  72. edsl/questions/AnswerValidatorMixin.py +23 -26
  73. edsl/questions/QuestionBase.py +330 -249
  74. edsl/questions/QuestionBaseGenMixin.py +133 -0
  75. edsl/questions/QuestionBasePromptsMixin.py +266 -0
  76. edsl/questions/QuestionBudget.py +99 -42
  77. edsl/questions/QuestionCheckBox.py +227 -36
  78. edsl/questions/QuestionExtract.py +98 -28
  79. edsl/questions/QuestionFreeText.py +47 -31
  80. edsl/questions/QuestionFunctional.py +7 -0
  81. edsl/questions/QuestionList.py +141 -23
  82. edsl/questions/QuestionMultipleChoice.py +159 -66
  83. edsl/questions/QuestionNumerical.py +88 -47
  84. edsl/questions/QuestionRank.py +182 -25
  85. edsl/questions/Quick.py +41 -0
  86. edsl/questions/RegisterQuestionsMeta.py +31 -12
  87. edsl/questions/ResponseValidatorABC.py +170 -0
  88. edsl/questions/__init__.py +3 -4
  89. edsl/questions/decorators.py +21 -0
  90. edsl/questions/derived/QuestionLikertFive.py +10 -5
  91. edsl/questions/derived/QuestionLinearScale.py +15 -2
  92. edsl/questions/derived/QuestionTopK.py +10 -1
  93. edsl/questions/derived/QuestionYesNo.py +24 -3
  94. edsl/questions/descriptors.py +43 -7
  95. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  96. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  97. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  98. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  99. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  100. edsl/questions/prompt_templates/question_list.jinja +17 -0
  101. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  102. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  103. edsl/questions/question_registry.py +6 -2
  104. edsl/questions/templates/__init__.py +0 -0
  105. edsl/questions/templates/budget/__init__.py +0 -0
  106. edsl/questions/templates/budget/answering_instructions.jinja +7 -0
  107. edsl/questions/templates/budget/question_presentation.jinja +7 -0
  108. edsl/questions/templates/checkbox/__init__.py +0 -0
  109. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  110. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  111. edsl/questions/templates/extract/__init__.py +0 -0
  112. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  113. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  114. edsl/questions/templates/free_text/__init__.py +0 -0
  115. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  116. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  117. edsl/questions/templates/likert_five/__init__.py +0 -0
  118. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  119. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  120. edsl/questions/templates/linear_scale/__init__.py +0 -0
  121. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  122. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  123. edsl/questions/templates/list/__init__.py +0 -0
  124. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  125. edsl/questions/templates/list/question_presentation.jinja +5 -0
  126. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  127. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  128. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  129. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  130. edsl/questions/templates/numerical/__init__.py +0 -0
  131. edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
  132. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  133. edsl/questions/templates/rank/__init__.py +0 -0
  134. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  135. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  136. edsl/questions/templates/top_k/__init__.py +0 -0
  137. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  138. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  139. edsl/questions/templates/yes_no/__init__.py +0 -0
  140. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  141. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  142. edsl/results/Dataset.py +20 -0
  143. edsl/results/DatasetExportMixin.py +58 -30
  144. edsl/results/DatasetTree.py +145 -0
  145. edsl/results/Result.py +32 -5
  146. edsl/results/Results.py +135 -46
  147. edsl/results/ResultsDBMixin.py +3 -3
  148. edsl/results/Selector.py +118 -0
  149. edsl/results/tree_explore.py +115 -0
  150. edsl/scenarios/FileStore.py +71 -10
  151. edsl/scenarios/Scenario.py +109 -24
  152. edsl/scenarios/ScenarioImageMixin.py +2 -2
  153. edsl/scenarios/ScenarioList.py +546 -21
  154. edsl/scenarios/ScenarioListExportMixin.py +24 -4
  155. edsl/scenarios/ScenarioListPdfMixin.py +153 -4
  156. edsl/study/SnapShot.py +8 -1
  157. edsl/study/Study.py +32 -0
  158. edsl/surveys/Rule.py +15 -3
  159. edsl/surveys/RuleCollection.py +21 -5
  160. edsl/surveys/Survey.py +707 -298
  161. edsl/surveys/SurveyExportMixin.py +71 -9
  162. edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
  163. edsl/surveys/SurveyQualtricsImport.py +284 -0
  164. edsl/surveys/instructions/ChangeInstruction.py +47 -0
  165. edsl/surveys/instructions/Instruction.py +34 -0
  166. edsl/surveys/instructions/InstructionCollection.py +77 -0
  167. edsl/surveys/instructions/__init__.py +0 -0
  168. edsl/templates/error_reporting/base.html +24 -0
  169. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  170. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  171. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  172. edsl/templates/error_reporting/interview_details.html +116 -0
  173. edsl/templates/error_reporting/interviews.html +10 -0
  174. edsl/templates/error_reporting/overview.html +5 -0
  175. edsl/templates/error_reporting/performance_plot.html +2 -0
  176. edsl/templates/error_reporting/report.css +74 -0
  177. edsl/templates/error_reporting/report.html +118 -0
  178. edsl/templates/error_reporting/report.js +25 -0
  179. edsl/utilities/utilities.py +40 -1
  180. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/METADATA +8 -2
  181. edsl-0.1.33.dist-info/RECORD +295 -0
  182. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -271
  183. edsl/jobs/interviews/retry_management.py +0 -37
  184. edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -303
  185. edsl/utilities/gcp_bucket/simple_example.py +0 -9
  186. edsl-0.1.31.dev4.dist-info/RECORD +0 -204
  187. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
  188. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0
@@ -0,0 +1,167 @@
1
+ import traceback
2
+ import datetime
3
+ import time
4
+ from collections import UserDict
5
+ from edsl.jobs.FailedQuestion import FailedQuestion
6
+
7
+
8
+ class InterviewExceptionEntry:
9
+ """Class to record an exception that occurred during the interview."""
10
+
11
+ def __init__(
12
+ self,
13
+ *,
14
+ exception: Exception,
15
+ # failed_question: FailedQuestion,
16
+ invigilator: "Invigilator",
17
+ traceback_format="text",
18
+ answers=None,
19
+ ):
20
+ self.time = datetime.datetime.now().isoformat()
21
+ self.exception = exception
22
+ # self.failed_question = failed_question
23
+ self.invigilator = invigilator
24
+ self.traceback_format = traceback_format
25
+ self.answers = answers
26
+
27
+ @property
28
+ def question_type(self):
29
+ # return self.failed_question.question.question_type
30
+ return self.invigilator.question.question_type
31
+
32
+ @property
33
+ def name(self):
34
+ return repr(self.exception)
35
+
36
+ @property
37
+ def rendered_prompts(self):
38
+ return self.invigilator.get_prompts()
39
+
40
+ @property
41
+ def key_sequence(self):
42
+ return self.invigilator.model.key_sequence
43
+
44
+ @property
45
+ def generated_token_string(self):
46
+ # return "POO"
47
+ if self.invigilator.raw_model_response is None:
48
+ return "No raw model response available."
49
+ else:
50
+ return self.invigilator.model.get_generated_token_string(
51
+ self.invigilator.raw_model_response
52
+ )
53
+
54
+ @property
55
+ def raw_model_response(self):
56
+ import json
57
+
58
+ if self.invigilator.raw_model_response is None:
59
+ return "No raw model response available."
60
+ return json.dumps(self.invigilator.raw_model_response, indent=2)
61
+
62
+ def __getitem__(self, key):
63
+ # Support dict-like access obj['a']
64
+ return str(getattr(self, key))
65
+
66
+ @classmethod
67
+ def example(cls):
68
+ from edsl import QuestionFreeText
69
+ from edsl.language_models import LanguageModel
70
+
71
+ m = LanguageModel.example(test_model=True)
72
+ q = QuestionFreeText.example(exception_to_throw=ValueError)
73
+ results = q.by(m).run(
74
+ skip_retry=True, print_exceptions=False, raise_validation_errors=True
75
+ )
76
+ return results.task_history.exceptions[0]["how_are_you"][0]
77
+
78
+ @property
79
+ def code_to_reproduce(self):
80
+ return self.code(run=False)
81
+
82
+ def code(self, run=True):
83
+ lines = []
84
+ lines.append("from edsl import Question, Model, Scenario, Agent")
85
+
86
+ lines.append(f"q = {repr(self.invigilator.question)}")
87
+ lines.append(f"scenario = {repr(self.invigilator.scenario)}")
88
+ lines.append(f"agent = {repr(self.invigilator.agent)}")
89
+ lines.append(f"m = Model('{self.invigilator.model.model}')")
90
+ lines.append("results = q.by(m).by(agent).by(scenario).run()")
91
+ code_str = "\n".join(lines)
92
+
93
+ if run:
94
+ # Create a new namespace to avoid polluting the global namespace
95
+ namespace = {}
96
+ exec(code_str, namespace)
97
+ return namespace["results"]
98
+ return code_str
99
+
100
+ @property
101
+ def traceback(self):
102
+ """Return the exception as HTML."""
103
+ if self.traceback_format == "html":
104
+ return self.html_traceback
105
+ else:
106
+ return self.text_traceback
107
+
108
+ @property
109
+ def text_traceback(self):
110
+ """
111
+ >>> entry = InterviewExceptionEntry.example()
112
+ >>> entry.text_traceback
113
+ 'Traceback (most recent call last):...'
114
+ """
115
+ e = self.exception
116
+ tb_str = "".join(traceback.format_exception(type(e), e, e.__traceback__))
117
+ return tb_str
118
+
119
+ @property
120
+ def html_traceback(self):
121
+ from rich.console import Console
122
+ from rich.table import Table
123
+ from rich.traceback import Traceback
124
+
125
+ from io import StringIO
126
+
127
+ html_output = StringIO()
128
+
129
+ console = Console(file=html_output, record=True)
130
+
131
+ tb = Traceback.from_exception(
132
+ type(self.exception),
133
+ self.exception,
134
+ self.exception.__traceback__,
135
+ show_locals=True,
136
+ )
137
+ console.print(tb)
138
+ return html_output.getvalue()
139
+
140
+ def to_dict(self) -> dict:
141
+ """Return the exception as a dictionary.
142
+
143
+ >>> entry = InterviewExceptionEntry.example()
144
+ >>> entry.to_dict()['exception']
145
+ ValueError()
146
+
147
+ """
148
+ return {
149
+ "exception": self.exception,
150
+ "time": self.time,
151
+ "traceback": self.traceback,
152
+ # "failed_question": self.failed_question.to_dict(),
153
+ "invigilator": self.invigilator.to_dict(),
154
+ }
155
+
156
+ def push(self):
157
+ from edsl import Coop
158
+
159
+ coop = Coop()
160
+ results = coop.error_create(self.to_dict())
161
+ return results
162
+
163
+
164
+ if __name__ == "__main__":
165
+ import doctest
166
+
167
+ doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -1,141 +1,119 @@
1
1
  from __future__ import annotations
2
2
  import time
3
+ import math
3
4
  import asyncio
4
- import time
5
+ import functools
6
+ import threading
7
+ from typing import Coroutine, List, AsyncGenerator, Optional, Union, Generator
5
8
  from contextlib import contextmanager
9
+ from collections import UserList
6
10
 
7
- from typing import Coroutine, List, AsyncGenerator, Optional, Union
11
+ from edsl.results.Results import Results
12
+ from rich.live import Live
13
+ from rich.console import Console
8
14
 
9
15
  from edsl import shared_globals
10
16
  from edsl.jobs.interviews.Interview import Interview
11
- from edsl.jobs.runners.JobsRunnerStatusMixin import JobsRunnerStatusMixin
17
+ from edsl.jobs.runners.JobsRunnerStatus import JobsRunnerStatus
18
+
12
19
  from edsl.jobs.tasks.TaskHistory import TaskHistory
13
20
  from edsl.jobs.buckets.BucketCollection import BucketCollection
14
21
  from edsl.utilities.decorators import jupyter_nb_handler
22
+ from edsl.data.Cache import Cache
23
+ from edsl.results.Result import Result
24
+ from edsl.results.Results import Results
15
25
 
16
- import time
17
- import functools
18
-
19
- def cache_with_timeout(timeout):
20
- def decorator(func):
21
- cached_result = {}
22
- last_computation_time = [0] # Using list to store mutable value
23
-
24
- @functools.wraps(func)
25
- def wrapper(*args, **kwargs):
26
- current_time = time.time()
27
- if (current_time - last_computation_time[0]) >= timeout:
28
- cached_result['value'] = func(*args, **kwargs)
29
- last_computation_time[0] = current_time
30
- return cached_result['value']
31
-
32
- return wrapper
33
- return decorator
34
-
35
- #from queue import Queue
36
- from collections import UserList
37
26
 
38
27
  class StatusTracker(UserList):
39
28
  def __init__(self, total_tasks: int):
40
29
  self.total_tasks = total_tasks
41
30
  super().__init__()
42
-
31
+
43
32
  def current_status(self):
44
- return print(f"Completed: {len(self.data)} of {self.total_tasks}", end = "\r")
33
+ return print(f"Completed: {len(self.data)} of {self.total_tasks}", end="\r")
45
34
 
46
- class JobsRunnerAsyncio(JobsRunnerStatusMixin):
35
+
36
+ class JobsRunnerAsyncio:
47
37
  """A class for running a collection of interviews asynchronously.
48
38
 
49
39
  It gets instaniated from a Jobs object.
50
40
  The Jobs object is a collection of interviews that are to be run.
51
41
  """
52
42
 
53
- def __init__(self, jobs: Jobs):
43
+ def __init__(self, jobs: "Jobs"):
54
44
  self.jobs = jobs
55
- # this creates the interviews, which can take a while
56
45
  self.interviews: List["Interview"] = jobs.interviews()
57
46
  self.bucket_collection: "BucketCollection" = jobs.bucket_collection
58
47
  self.total_interviews: List["Interview"] = []
59
48
 
49
+ # self.jobs_runner_status = JobsRunnerStatus(self, n=1)
50
+
60
51
  async def run_async_generator(
61
52
  self,
62
53
  cache: "Cache",
63
54
  n: int = 1,
64
- debug: bool = False,
65
55
  stop_on_exception: bool = False,
66
- sidecar_model: "LanguageModel" = None,
56
+ sidecar_model: Optional["LanguageModel"] = None,
67
57
  total_interviews: Optional[List["Interview"]] = None,
58
+ raise_validation_errors: bool = False,
68
59
  ) -> AsyncGenerator["Result", None]:
69
60
  """Creates the tasks, runs them asynchronously, and returns the results as a Results object.
70
61
 
71
62
  Completed tasks are yielded as they are completed.
72
63
 
73
64
  :param n: how many times to run each interview
74
- :param debug:
75
65
  :param stop_on_exception: Whether to stop the interview if an exception is raised
76
66
  :param sidecar_model: a language model to use in addition to the interview's model
77
67
  :param total_interviews: A list of interviews to run can be provided instead.
68
+ :param raise_validation_errors: Whether to raise validation errors
78
69
  """
79
70
  tasks = []
80
- if total_interviews:
71
+ if total_interviews: # was already passed in total interviews
81
72
  self.total_interviews = total_interviews
82
73
  else:
83
- self._populate_total_interviews(
84
- n=n
74
+ self.total_interviews = list(
75
+ self._populate_total_interviews(n=n)
85
76
  ) # Populate self.total_interviews before creating tasks
86
77
 
87
78
  for interview in self.total_interviews:
88
79
  interviewing_task = self._build_interview_task(
89
80
  interview=interview,
90
- debug=debug,
91
81
  stop_on_exception=stop_on_exception,
92
82
  sidecar_model=sidecar_model,
83
+ raise_validation_errors=raise_validation_errors,
93
84
  )
94
85
  tasks.append(asyncio.create_task(interviewing_task))
95
86
 
96
87
  for task in asyncio.as_completed(tasks):
97
88
  result = await task
89
+ self.jobs_runner_status.add_completed_interview(result)
98
90
  yield result
99
91
 
100
- def _populate_total_interviews(self, n: int = 1) -> None:
92
+ def _populate_total_interviews(
93
+ self, n: int = 1
94
+ ) -> Generator["Interview", None, None]:
101
95
  """Populates self.total_interviews with n copies of each interview.
102
96
 
103
97
  :param n: how many times to run each interview.
104
98
  """
105
- # TODO: Why not return a list of interviews instead of modifying the object?
106
-
107
- self.total_interviews = []
108
99
  for interview in self.interviews:
109
100
  for iteration in range(n):
110
101
  if iteration > 0:
111
- new_interview = interview.duplicate(
112
- iteration=iteration, cache=self.cache
113
- )
114
- self.total_interviews.append(new_interview)
102
+ yield interview.duplicate(iteration=iteration, cache=self.cache)
115
103
  else:
116
- interview.cache = (
117
- self.cache
118
- ) # set the cache for the first interview
119
- self.total_interviews.append(interview)
120
-
121
- async def run_async(self, cache=None, n=1) -> Results:
122
- from edsl.results.Results import Results
123
-
124
- #breakpoint()
125
- #tracker = StatusTracker(total_tasks=len(self.interviews))
104
+ interview.cache = self.cache
105
+ yield interview
126
106
 
127
- if cache is None:
128
- self.cache = Cache()
129
- else:
130
- self.cache = cache
107
+ async def run_async(self, cache: Optional["Cache"] = None, n: int = 1) -> Results:
108
+ """Used for some other modules that have a non-standard way of running interviews."""
109
+ self.jobs_runner_status = JobsRunnerStatus(self, n=n)
110
+ self.cache = Cache() if cache is None else cache
131
111
  data = []
132
112
  async for result in self.run_async_generator(cache=self.cache, n=n):
133
113
  data.append(result)
134
114
  return Results(survey=self.jobs.survey, data=data)
135
115
 
136
116
  def simple_run(self):
137
- from edsl.results.Results import Results
138
-
139
117
  data = asyncio.run(self.run_async())
140
118
  return Results(survey=self.jobs.survey, data=data)
141
119
 
@@ -143,14 +121,13 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
143
121
  self,
144
122
  *,
145
123
  interview: Interview,
146
- debug: bool,
147
124
  stop_on_exception: bool = False,
148
- sidecar_model: Optional[LanguageModel] = None,
149
- ) -> Result:
125
+ sidecar_model: Optional["LanguageModel"] = None,
126
+ raise_validation_errors: bool = False,
127
+ ) -> "Result":
150
128
  """Conducts an interview and returns the result.
151
129
 
152
130
  :param interview: the interview to conduct
153
- :param debug: prints debug messages
154
131
  :param stop_on_exception: stops the interview if an exception is raised
155
132
  :param sidecar_model: a language model to use in addition to the interview's model
156
133
  """
@@ -159,24 +136,37 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
159
136
 
160
137
  # get the results of the interview
161
138
  answer, valid_results = await interview.async_conduct_interview(
162
- debug=debug,
163
139
  model_buckets=model_buckets,
164
140
  stop_on_exception=stop_on_exception,
165
141
  sidecar_model=sidecar_model,
142
+ raise_validation_errors=raise_validation_errors,
166
143
  )
167
144
 
168
- # we should have a valid result for each question
169
- answer_key_names = {k for k in set(answer.keys()) if not k.endswith("_comment")}
145
+ question_results = {}
146
+ for result in valid_results:
147
+ question_results[result.question_name] = result
148
+
149
+ answer_key_names = list(question_results.keys())
170
150
 
151
+ generated_tokens_dict = {
152
+ k + "_generated_tokens": question_results[k].generated_tokens
153
+ for k in answer_key_names
154
+ }
155
+ comments_dict = {
156
+ k + "_comment": question_results[k].comment for k in answer_key_names
157
+ }
158
+
159
+ # we should have a valid result for each question
160
+ answer_dict = {k: answer[k] for k in answer_key_names}
171
161
  assert len(valid_results) == len(answer_key_names)
172
162
 
173
163
  # TODO: move this down into Interview
174
164
  question_name_to_prompts = dict({})
175
165
  for result in valid_results:
176
- question_name = result["question_name"]
166
+ question_name = result.question_name
177
167
  question_name_to_prompts[question_name] = {
178
- "user_prompt": result["prompts"]["user_prompt"],
179
- "system_prompt": result["prompts"]["system_prompt"],
168
+ "user_prompt": result.prompts["user_prompt"],
169
+ "system_prompt": result.prompts["system_prompt"],
180
170
  }
181
171
 
182
172
  prompt_dictionary = {}
@@ -190,141 +180,143 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
190
180
 
191
181
  raw_model_results_dictionary = {}
192
182
  for result in valid_results:
193
- question_name = result["question_name"]
183
+ question_name = result.question_name
194
184
  raw_model_results_dictionary[
195
185
  question_name + "_raw_model_response"
196
- ] = result["raw_model_response"]
197
-
198
- from edsl.results.Result import Result
186
+ ] = result.raw_model_response
187
+ raw_model_results_dictionary[question_name + "_cost"] = result.cost
188
+ one_use_buys = (
189
+ "NA"
190
+ if isinstance(result.cost, str)
191
+ or result.cost == 0
192
+ or result.cost is None
193
+ else 1.0 / result.cost
194
+ )
195
+ raw_model_results_dictionary[question_name + "_one_usd_buys"] = one_use_buys
199
196
 
200
197
  result = Result(
201
198
  agent=interview.agent,
202
199
  scenario=interview.scenario,
203
200
  model=interview.model,
204
201
  iteration=interview.iteration,
205
- answer=answer,
202
+ answer=answer_dict,
206
203
  prompt=prompt_dictionary,
207
204
  raw_model_response=raw_model_results_dictionary,
208
205
  survey=interview.survey,
206
+ generated_tokens=generated_tokens_dict,
207
+ comments_dict=comments_dict,
209
208
  )
209
+ result.interview_hash = hash(interview)
210
+
210
211
  return result
211
212
 
212
213
  @property
213
214
  def elapsed_time(self):
214
215
  return time.monotonic() - self.start_time
215
216
 
217
+ def process_results(
218
+ self, raw_results: Results, cache: Cache, print_exceptions: bool
219
+ ):
220
+ interview_lookup = {
221
+ hash(interview): index
222
+ for index, interview in enumerate(self.total_interviews)
223
+ }
224
+ interview_hashes = list(interview_lookup.keys())
225
+
226
+ results = Results(
227
+ survey=self.jobs.survey,
228
+ data=sorted(
229
+ raw_results, key=lambda x: interview_hashes.index(x.interview_hash)
230
+ ),
231
+ )
232
+ results.cache = cache
233
+ results.task_history = TaskHistory(
234
+ self.total_interviews, include_traceback=False
235
+ )
236
+ results.has_unfixed_exceptions = results.task_history.has_unfixed_exceptions
237
+ results.bucket_collection = self.bucket_collection
238
+
239
+ if results.has_unfixed_exceptions and print_exceptions:
240
+ from edsl.scenarios.FileStore import HTMLFileStore
241
+ from edsl.config import CONFIG
242
+ from edsl.coop.coop import Coop
243
+
244
+ msg = f"Exceptions were raised in {len(results.task_history.indices)} out of {len(self.total_interviews)} interviews.\n"
245
+
246
+ if len(results.task_history.indices) > 5:
247
+ msg += f"Exceptions were raised in the following interviews: {results.task_history.indices}.\n"
248
+
249
+ print(msg)
250
+ # this is where exceptions are opening up
251
+ filepath = results.task_history.html(
252
+ cta="Open report to see details.",
253
+ open_in_browser=True,
254
+ return_link=True,
255
+ )
256
+
257
+ try:
258
+ coop = Coop()
259
+ user_edsl_settings = coop.edsl_settings
260
+ remote_logging = user_edsl_settings["remote_logging"]
261
+ except Exception as e:
262
+ print(e)
263
+ remote_logging = False
264
+ if remote_logging:
265
+ filestore = HTMLFileStore(filepath)
266
+ coop_details = filestore.push(description="Error report")
267
+ print(coop_details)
268
+
269
+ print("Also see: https://docs.expectedparrot.com/en/latest/exceptions.html")
270
+
271
+ return results
272
+
216
273
  @jupyter_nb_handler
217
274
  async def run(
218
275
  self,
219
276
  cache: Union[Cache, False, None],
220
277
  n: int = 1,
221
- debug: bool = False,
222
278
  stop_on_exception: bool = False,
223
279
  progress_bar: bool = False,
224
280
  sidecar_model: Optional[LanguageModel] = None,
225
281
  print_exceptions: bool = True,
282
+ raise_validation_errors: bool = False,
226
283
  ) -> "Coroutine":
227
284
  """Runs a collection of interviews, handling both async and sync contexts."""
228
- from rich.console import Console
229
285
 
230
- console = Console()
231
286
  self.results = []
232
287
  self.start_time = time.monotonic()
233
288
  self.completed = False
234
289
  self.cache = cache
235
290
  self.sidecar_model = sidecar_model
236
291
 
237
- from edsl.results.Results import Results
238
- from rich.live import Live
239
- from rich.console import Console
240
-
241
- @cache_with_timeout(1)
242
- def generate_table():
243
- return self.status_table(self.results, self.elapsed_time)
292
+ self.jobs_runner_status = JobsRunnerStatus(self, n=n)
244
293
 
245
- async def process_results(cache, progress_bar_context = None):
294
+ async def process_results(cache):
246
295
  """Processes results from interviews."""
247
296
  async for result in self.run_async_generator(
248
297
  n=n,
249
- debug=debug,
250
298
  stop_on_exception=stop_on_exception,
251
299
  cache=cache,
252
300
  sidecar_model=sidecar_model,
301
+ raise_validation_errors=raise_validation_errors,
253
302
  ):
254
303
  self.results.append(result)
255
- if progress_bar_context:
256
- progress_bar_context.update(generate_table())
257
- self.completed = True
258
-
259
- async def update_progress_bar(progress_bar_context):
260
- """Updates the progress bar at fixed intervals."""
261
- if progress_bar_context is None:
262
- return
263
-
264
- while True:
265
- progress_bar_context.update(generate_table())
266
- await asyncio.sleep(0.1) # Update interval
267
- if self.completed:
268
- break
269
-
270
- @contextmanager
271
- def conditional_context(condition, context_manager):
272
- if condition:
273
- with context_manager as cm:
274
- yield cm
275
- else:
276
- yield
277
-
278
- with conditional_context(progress_bar, Live(generate_table(), console=console, refresh_per_second=1)) as progress_bar_context:
279
-
280
- with cache as c:
281
-
282
- progress_task = asyncio.create_task(update_progress_bar(progress_bar_context))
283
-
284
- try:
285
- await asyncio.gather(progress_task, process_results(cache = c, progress_bar_context = progress_bar_context))
286
- except asyncio.CancelledError:
287
- pass
288
- finally:
289
- progress_task.cancel() # Cancel the progress_task when process_results is done
290
- await progress_task
291
-
292
- await asyncio.sleep(1) # short delay to show the final status
293
-
294
- if progress_bar_context:
295
- progress_bar_context.update(generate_table())
296
-
297
-
298
- results = Results(survey=self.jobs.survey, data=self.results)
299
- task_history = TaskHistory(self.total_interviews, include_traceback=False)
300
- results.task_history = task_history
301
-
302
- results.has_exceptions = task_history.has_exceptions
303
-
304
- if results.has_exceptions:
305
- failed_interviews = [
306
- interview.duplicate(
307
- iteration=interview.iteration, cache=interview.cache
308
- )
309
- for interview in self.total_interviews
310
- if interview.has_exceptions
311
- ]
312
- from edsl.jobs.Jobs import Jobs
313
-
314
- results.failed_jobs = Jobs.from_interviews(
315
- [interview for interview in failed_interviews]
316
- )
317
- if print_exceptions:
318
- msg = f"Exceptions were raised in {len(results.task_history.indices)} out of {len(self.total_interviews)} interviews.\n"
304
+ self.completed = True
319
305
 
320
- if len(results.task_history.indices) > 5:
321
- msg += f"Exceptions were raised in the following interviews: {results.task_history.indices}.\n"
306
+ def run_progress_bar():
307
+ """Runs the progress bar in a separate thread."""
308
+ self.jobs_runner_status.update_progress()
322
309
 
323
- shared_globals["edsl_runner_exceptions"] = task_history
324
- print(msg)
325
- task_history.html(cta="Open report to see details.")
326
- print(
327
- "Also see: https://docs.expectedparrot.com/en/latest/exceptions.html"
328
- )
310
+ if progress_bar:
311
+ progress_thread = threading.Thread(target=run_progress_bar)
312
+ progress_thread.start()
329
313
 
330
- return results
314
+ with cache as c:
315
+ await process_results(cache=c)
316
+
317
+ if progress_bar:
318
+ progress_thread.join()
319
+
320
+ return self.process_results(
321
+ raw_results=self.results, cache=cache, print_exceptions=print_exceptions
322
+ )