edsl 0.1.39__py3-none-any.whl → 0.1.39.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. edsl/Base.py +116 -197
  2. edsl/__init__.py +7 -15
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +147 -351
  5. edsl/agents/AgentList.py +73 -211
  6. edsl/agents/Invigilator.py +50 -101
  7. edsl/agents/InvigilatorBase.py +70 -62
  8. edsl/agents/PromptConstructor.py +225 -143
  9. edsl/agents/__init__.py +1 -0
  10. edsl/agents/prompt_helpers.py +3 -3
  11. edsl/auto/AutoStudy.py +5 -18
  12. edsl/auto/StageBase.py +40 -53
  13. edsl/auto/StageQuestions.py +1 -2
  14. edsl/auto/utilities.py +6 -0
  15. edsl/config.py +2 -22
  16. edsl/conversation/car_buying.py +1 -2
  17. edsl/coop/PriceFetcher.py +1 -1
  18. edsl/coop/coop.py +47 -125
  19. edsl/coop/utils.py +14 -14
  20. edsl/data/Cache.py +27 -45
  21. edsl/data/CacheEntry.py +15 -12
  22. edsl/data/CacheHandler.py +12 -31
  23. edsl/data/RemoteCacheSync.py +46 -154
  24. edsl/data/__init__.py +3 -4
  25. edsl/data_transfer_models.py +1 -2
  26. edsl/enums.py +0 -27
  27. edsl/exceptions/__init__.py +50 -50
  28. edsl/exceptions/agents.py +0 -12
  29. edsl/exceptions/questions.py +6 -24
  30. edsl/exceptions/scenarios.py +0 -7
  31. edsl/inference_services/AnthropicService.py +19 -38
  32. edsl/inference_services/AwsBedrock.py +2 -0
  33. edsl/inference_services/AzureAI.py +2 -0
  34. edsl/inference_services/GoogleService.py +12 -7
  35. edsl/inference_services/InferenceServiceABC.py +85 -18
  36. edsl/inference_services/InferenceServicesCollection.py +79 -120
  37. edsl/inference_services/MistralAIService.py +3 -0
  38. edsl/inference_services/OpenAIService.py +35 -47
  39. edsl/inference_services/PerplexityService.py +3 -0
  40. edsl/inference_services/TestService.py +10 -11
  41. edsl/inference_services/TogetherAIService.py +3 -5
  42. edsl/jobs/Answers.py +14 -1
  43. edsl/jobs/Jobs.py +431 -356
  44. edsl/jobs/JobsChecks.py +10 -35
  45. edsl/jobs/JobsPrompts.py +4 -6
  46. edsl/jobs/JobsRemoteInferenceHandler.py +133 -205
  47. edsl/jobs/buckets/BucketCollection.py +3 -44
  48. edsl/jobs/buckets/TokenBucket.py +21 -53
  49. edsl/jobs/interviews/Interview.py +408 -143
  50. edsl/jobs/runners/JobsRunnerAsyncio.py +403 -88
  51. edsl/jobs/runners/JobsRunnerStatus.py +165 -133
  52. edsl/jobs/tasks/QuestionTaskCreator.py +19 -21
  53. edsl/jobs/tasks/TaskHistory.py +18 -38
  54. edsl/jobs/tasks/task_status_enum.py +2 -0
  55. edsl/language_models/KeyLookup.py +30 -0
  56. edsl/language_models/LanguageModel.py +236 -194
  57. edsl/language_models/ModelList.py +19 -28
  58. edsl/language_models/__init__.py +2 -1
  59. edsl/language_models/registry.py +190 -0
  60. edsl/language_models/repair.py +2 -2
  61. edsl/language_models/unused/ReplicateBase.py +83 -0
  62. edsl/language_models/utilities.py +4 -5
  63. edsl/notebooks/Notebook.py +14 -19
  64. edsl/prompts/Prompt.py +39 -29
  65. edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +2 -47
  66. edsl/questions/QuestionBase.py +214 -68
  67. edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +50 -57
  68. edsl/questions/QuestionBasePromptsMixin.py +3 -7
  69. edsl/questions/QuestionBudget.py +1 -1
  70. edsl/questions/QuestionCheckBox.py +3 -3
  71. edsl/questions/QuestionExtract.py +7 -5
  72. edsl/questions/QuestionFreeText.py +3 -2
  73. edsl/questions/QuestionList.py +18 -10
  74. edsl/questions/QuestionMultipleChoice.py +23 -67
  75. edsl/questions/QuestionNumerical.py +4 -2
  76. edsl/questions/QuestionRank.py +17 -7
  77. edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +26 -40
  78. edsl/questions/SimpleAskMixin.py +3 -4
  79. edsl/questions/__init__.py +1 -2
  80. edsl/questions/derived/QuestionLinearScale.py +3 -6
  81. edsl/questions/derived/QuestionTopK.py +1 -1
  82. edsl/questions/descriptors.py +3 -17
  83. edsl/questions/question_registry.py +1 -1
  84. edsl/results/CSSParameterizer.py +1 -1
  85. edsl/results/Dataset.py +7 -170
  86. edsl/results/DatasetExportMixin.py +305 -168
  87. edsl/results/DatasetTree.py +8 -28
  88. edsl/results/Result.py +206 -298
  89. edsl/results/Results.py +131 -149
  90. edsl/results/ResultsDBMixin.py +238 -0
  91. edsl/results/ResultsExportMixin.py +0 -2
  92. edsl/results/{results_selector.py → Selector.py} +13 -23
  93. edsl/results/TableDisplay.py +171 -98
  94. edsl/results/__init__.py +1 -1
  95. edsl/scenarios/FileStore.py +239 -150
  96. edsl/scenarios/Scenario.py +193 -90
  97. edsl/scenarios/ScenarioHtmlMixin.py +3 -4
  98. edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +6 -10
  99. edsl/scenarios/ScenarioList.py +244 -415
  100. edsl/scenarios/ScenarioListExportMixin.py +7 -0
  101. edsl/scenarios/ScenarioListPdfMixin.py +37 -15
  102. edsl/scenarios/__init__.py +2 -1
  103. edsl/study/ObjectEntry.py +1 -1
  104. edsl/study/SnapShot.py +1 -1
  105. edsl/study/Study.py +12 -5
  106. edsl/surveys/Rule.py +4 -5
  107. edsl/surveys/RuleCollection.py +27 -25
  108. edsl/surveys/Survey.py +791 -270
  109. edsl/surveys/SurveyCSS.py +8 -20
  110. edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +9 -11
  111. edsl/surveys/__init__.py +2 -4
  112. edsl/surveys/descriptors.py +2 -6
  113. edsl/surveys/instructions/ChangeInstruction.py +2 -1
  114. edsl/surveys/instructions/Instruction.py +13 -4
  115. edsl/surveys/instructions/InstructionCollection.py +6 -11
  116. edsl/templates/error_reporting/interview_details.html +1 -1
  117. edsl/templates/error_reporting/report.html +1 -1
  118. edsl/tools/plotting.py +1 -1
  119. edsl/utilities/utilities.py +23 -35
  120. {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/METADATA +10 -12
  121. edsl-0.1.39.dev1.dist-info/RECORD +277 -0
  122. {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/WHEEL +1 -1
  123. edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
  124. edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
  125. edsl/agents/question_option_processor.py +0 -172
  126. edsl/coop/CoopFunctionsMixin.py +0 -15
  127. edsl/coop/ExpectedParrotKeyHandler.py +0 -125
  128. edsl/exceptions/inference_services.py +0 -5
  129. edsl/inference_services/AvailableModelCacheHandler.py +0 -184
  130. edsl/inference_services/AvailableModelFetcher.py +0 -215
  131. edsl/inference_services/ServiceAvailability.py +0 -135
  132. edsl/inference_services/data_structures.py +0 -134
  133. edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -223
  134. edsl/jobs/FetchInvigilator.py +0 -47
  135. edsl/jobs/InterviewTaskManager.py +0 -98
  136. edsl/jobs/InterviewsConstructor.py +0 -50
  137. edsl/jobs/JobsComponentConstructor.py +0 -189
  138. edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
  139. edsl/jobs/RequestTokenEstimator.py +0 -30
  140. edsl/jobs/async_interview_runner.py +0 -138
  141. edsl/jobs/buckets/TokenBucketAPI.py +0 -211
  142. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  143. edsl/jobs/check_survey_scenario_compatibility.py +0 -85
  144. edsl/jobs/data_structures.py +0 -120
  145. edsl/jobs/decorators.py +0 -35
  146. edsl/jobs/jobs_status_enums.py +0 -9
  147. edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
  148. edsl/jobs/results_exceptions_handler.py +0 -98
  149. edsl/language_models/ComputeCost.py +0 -63
  150. edsl/language_models/PriceManager.py +0 -127
  151. edsl/language_models/RawResponseHandler.py +0 -106
  152. edsl/language_models/ServiceDataSources.py +0 -0
  153. edsl/language_models/key_management/KeyLookup.py +0 -63
  154. edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
  155. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  156. edsl/language_models/key_management/__init__.py +0 -0
  157. edsl/language_models/key_management/models.py +0 -131
  158. edsl/language_models/model.py +0 -256
  159. edsl/notebooks/NotebookToLaTeX.py +0 -142
  160. edsl/questions/ExceptionExplainer.py +0 -77
  161. edsl/questions/HTMLQuestion.py +0 -103
  162. edsl/questions/QuestionMatrix.py +0 -265
  163. edsl/questions/data_structures.py +0 -20
  164. edsl/questions/loop_processor.py +0 -149
  165. edsl/questions/response_validator_factory.py +0 -34
  166. edsl/questions/templates/matrix/__init__.py +0 -1
  167. edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
  168. edsl/questions/templates/matrix/question_presentation.jinja +0 -20
  169. edsl/results/MarkdownToDocx.py +0 -122
  170. edsl/results/MarkdownToPDF.py +0 -111
  171. edsl/results/TextEditor.py +0 -50
  172. edsl/results/file_exports.py +0 -252
  173. edsl/results/smart_objects.py +0 -96
  174. edsl/results/table_data_class.py +0 -12
  175. edsl/results/table_renderers.py +0 -118
  176. edsl/scenarios/ConstructDownloadLink.py +0 -109
  177. edsl/scenarios/DocumentChunker.py +0 -102
  178. edsl/scenarios/DocxScenario.py +0 -16
  179. edsl/scenarios/PdfExtractor.py +0 -40
  180. edsl/scenarios/directory_scanner.py +0 -96
  181. edsl/scenarios/file_methods.py +0 -85
  182. edsl/scenarios/handlers/__init__.py +0 -13
  183. edsl/scenarios/handlers/csv.py +0 -49
  184. edsl/scenarios/handlers/docx.py +0 -76
  185. edsl/scenarios/handlers/html.py +0 -37
  186. edsl/scenarios/handlers/json.py +0 -111
  187. edsl/scenarios/handlers/latex.py +0 -5
  188. edsl/scenarios/handlers/md.py +0 -51
  189. edsl/scenarios/handlers/pdf.py +0 -68
  190. edsl/scenarios/handlers/png.py +0 -39
  191. edsl/scenarios/handlers/pptx.py +0 -105
  192. edsl/scenarios/handlers/py.py +0 -294
  193. edsl/scenarios/handlers/sql.py +0 -313
  194. edsl/scenarios/handlers/sqlite.py +0 -149
  195. edsl/scenarios/handlers/txt.py +0 -33
  196. edsl/scenarios/scenario_selector.py +0 -156
  197. edsl/surveys/ConstructDAG.py +0 -92
  198. edsl/surveys/EditSurvey.py +0 -221
  199. edsl/surveys/InstructionHandler.py +0 -100
  200. edsl/surveys/MemoryManagement.py +0 -72
  201. edsl/surveys/RuleManager.py +0 -172
  202. edsl/surveys/Simulator.py +0 -75
  203. edsl/surveys/SurveyToApp.py +0 -141
  204. edsl/utilities/PrettyList.py +0 -56
  205. edsl/utilities/is_notebook.py +0 -18
  206. edsl/utilities/is_valid_variable_name.py +0 -11
  207. edsl/utilities/remove_edsl_version.py +0 -24
  208. edsl-0.1.39.dist-info/RECORD +0 -358
  209. /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
  210. /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
  211. /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
  212. {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/LICENSE +0 -0
@@ -3,12 +3,21 @@ from __future__ import annotations
3
3
  import os
4
4
  import time
5
5
  import requests
6
+ import warnings
6
7
  from abc import ABC, abstractmethod
7
8
  from dataclasses import dataclass
9
+
10
+ from typing import Any, List, DefaultDict, Optional, Dict
8
11
  from collections import defaultdict
9
- from typing import Any, Dict, Optional
10
12
  from uuid import UUID
11
13
 
14
+ from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
15
+
16
+ InterviewTokenUsageMapping = DefaultDict[str, InterviewTokenUsage]
17
+
18
+ from edsl.jobs.interviews.InterviewStatistic import InterviewStatistic
19
+ from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
20
+
12
21
 
13
22
  @dataclass
14
23
  class ModelInfo:
@@ -19,44 +28,11 @@ class ModelInfo:
19
28
  token_usage_info: dict
20
29
 
21
30
 
22
- class StatisticsTracker:
23
- def __init__(self, total_interviews: int, distinct_models: list[str]):
24
- self.start_time = time.time()
25
- self.total_interviews = total_interviews
26
- self.completed_count = 0
27
- self.completed_by_model = defaultdict(int)
28
- self.distinct_models = distinct_models
29
- self.total_exceptions = 0
30
- self.unfixed_exceptions = 0
31
-
32
- def add_completed_interview(
33
- self, model: str, num_exceptions: int = 0, num_unfixed: int = 0
34
- ):
35
- self.completed_count += 1
36
- self.completed_by_model[model] += 1
37
- self.total_exceptions += num_exceptions
38
- self.unfixed_exceptions += num_unfixed
39
-
40
- def get_elapsed_time(self) -> float:
41
- return time.time() - self.start_time
42
-
43
- def get_average_time_per_interview(self) -> float:
44
- return (
45
- self.get_elapsed_time() / self.completed_count
46
- if self.completed_count > 0
47
- else 0
48
- )
49
-
50
- def get_throughput(self) -> float:
51
- elapsed = self.get_elapsed_time()
52
- return self.completed_count / elapsed if elapsed > 0 else 0
53
-
54
- def get_estimated_time_remaining(self) -> float:
55
- if self.completed_count == 0:
56
- return 0
57
- avg_time = self.get_average_time_per_interview()
58
- remaining = self.total_interviews - self.completed_count
59
- return avg_time * remaining
31
+ @dataclass
32
+ class ModelTokenUsageStats:
33
+ token_usage_type: str
34
+ details: List[dict]
35
+ cost: str
60
36
 
61
37
 
62
38
  class JobsRunnerStatusBase(ABC):
@@ -70,39 +46,48 @@ class JobsRunnerStatusBase(ABC):
70
46
  api_key: str = None,
71
47
  ):
72
48
  self.jobs_runner = jobs_runner
49
+
50
+ # The uuid of the job on Coop
73
51
  self.job_uuid = job_uuid
52
+
74
53
  self.base_url = f"{endpoint_url}"
54
+
55
+ self.start_time = time.time()
56
+ self.completed_interviews = []
75
57
  self.refresh_rate = refresh_rate
76
58
  self.statistics = [
77
59
  "elapsed_time",
78
60
  "total_interviews_requested",
79
61
  "completed_interviews",
62
+ # "percent_complete",
80
63
  "average_time_per_interview",
64
+ # "task_remaining",
81
65
  "estimated_time_remaining",
82
66
  "exceptions",
83
67
  "unfixed_exceptions",
84
68
  "throughput",
85
69
  ]
86
- self.num_total_interviews = n * len(self.jobs_runner)
70
+ self.num_total_interviews = n * len(self.jobs_runner.interviews)
87
71
 
88
72
  self.distinct_models = list(
89
- set(model.model for model in self.jobs_runner.jobs.models)
73
+ set(i.model.model for i in self.jobs_runner.interviews)
90
74
  )
91
75
 
92
- self.stats_tracker = StatisticsTracker(
93
- total_interviews=self.num_total_interviews,
94
- distinct_models=self.distinct_models,
95
- )
76
+ self.completed_interview_by_model = defaultdict(list)
96
77
 
97
78
  self.api_key = api_key or os.getenv("EXPECTED_PARROT_API_KEY")
98
79
 
99
80
  @abstractmethod
100
81
  def has_ep_api_key(self):
101
- """Checks if the user has an Expected Parrot API key."""
82
+ """
83
+ Checks if the user has an Expected Parrot API key.
84
+ """
102
85
  pass
103
86
 
104
87
  def get_status_dict(self) -> Dict[str, Any]:
105
- """Converts current status into a JSON-serializable dictionary."""
88
+ """
89
+ Converts current status into a JSON-serializable dictionary.
90
+ """
106
91
  # Get all statistics
107
92
  stats = {}
108
93
  for stat_name in self.statistics:
@@ -110,46 +95,42 @@ class JobsRunnerStatusBase(ABC):
110
95
  name, value = list(stat.items())[0]
111
96
  stats[name] = value
112
97
 
98
+ # Calculate overall progress
99
+ total_interviews = len(self.jobs_runner.total_interviews)
100
+ completed = len(self.completed_interviews)
101
+
113
102
  # Get model-specific progress
114
103
  model_progress = {}
115
- target_per_model = int(self.num_total_interviews / len(self.distinct_models))
116
-
117
104
  for model in self.distinct_models:
118
- completed = self.stats_tracker.completed_by_model[model]
105
+ completed_for_model = len(self.completed_interview_by_model[model])
106
+ target_for_model = int(
107
+ self.num_total_interviews / len(self.distinct_models)
108
+ )
119
109
  model_progress[model] = {
120
- "completed": completed,
121
- "total": target_per_model,
110
+ "completed": completed_for_model,
111
+ "total": target_for_model,
122
112
  "percent": (
123
- (completed / target_per_model * 100) if target_per_model > 0 else 0
113
+ (completed_for_model / target_for_model * 100)
114
+ if target_for_model > 0
115
+ else 0
124
116
  ),
125
117
  }
126
118
 
127
119
  status_dict = {
128
120
  "overall_progress": {
129
- "completed": self.stats_tracker.completed_count,
130
- "total": self.num_total_interviews,
121
+ "completed": completed,
122
+ "total": total_interviews,
131
123
  "percent": (
132
- (
133
- self.stats_tracker.completed_count
134
- / self.num_total_interviews
135
- * 100
136
- )
137
- if self.num_total_interviews > 0
138
- else 0
124
+ (completed / total_interviews * 100) if total_interviews > 0 else 0
139
125
  ),
140
126
  },
141
127
  "language_model_progress": model_progress,
142
128
  "statistics": stats,
143
- "status": (
144
- "completed"
145
- if self.stats_tracker.completed_count >= self.num_total_interviews
146
- else "running"
147
- ),
129
+ "status": "completed" if completed >= total_interviews else "running",
148
130
  }
149
131
 
150
132
  model_queues = {}
151
- # for model, bucket in self.jobs_runner.bucket_collection.items():
152
- for model, bucket in self.jobs_runner.environment.bucket_collection.items():
133
+ for model, bucket in self.jobs_runner.bucket_collection.items():
153
134
  model_name = model.model
154
135
  model_queues[model_name] = {
155
136
  "language_model_name": model_name,
@@ -171,67 +152,98 @@ class JobsRunnerStatusBase(ABC):
171
152
  status_dict["language_model_queues"] = model_queues
172
153
  return status_dict
173
154
 
174
- def add_completed_interview(self, result):
175
- """Records a completed interview without storing the full interview data."""
176
- self.stats_tracker.add_completed_interview(
177
- model=result.model.model,
178
- num_exceptions=(
179
- len(result.exceptions) if hasattr(result, "exceptions") else 0
180
- ),
181
- num_unfixed=(
182
- result.exceptions.num_unfixed() if hasattr(result, "exceptions") else 0
183
- ),
184
- )
185
-
186
- def _compute_statistic(self, stat_name: str):
187
- """Computes individual statistics based on the stats tracker."""
188
- if stat_name == "elapsed_time":
189
- value = self.stats_tracker.get_elapsed_time()
190
- return {"elapsed_time": (value, 1, "sec.")}
191
-
192
- elif stat_name == "total_interviews_requested":
193
- return {"total_interviews_requested": (self.num_total_interviews, None, "")}
155
+ @abstractmethod
156
+ def setup(self):
157
+ """
158
+ Conducts any setup that needs to happen prior to sending status updates.
194
159
 
195
- elif stat_name == "completed_interviews":
196
- return {
197
- "completed_interviews": (self.stats_tracker.completed_count, None, "")
198
- }
160
+ Ex. For a local job, creates a job in the Coop database.
161
+ """
162
+ pass
199
163
 
200
- elif stat_name == "average_time_per_interview":
201
- value = self.stats_tracker.get_average_time_per_interview()
202
- return {"average_time_per_interview": (value, 2, "sec.")}
164
+ @abstractmethod
165
+ def send_status_update(self):
166
+ """
167
+ Updates the current status of the job.
168
+ """
169
+ pass
203
170
 
204
- elif stat_name == "estimated_time_remaining":
205
- value = self.stats_tracker.get_estimated_time_remaining()
206
- return {"estimated_time_remaining": (value, 1, "sec.")}
171
+ def add_completed_interview(self, result):
172
+ self.completed_interviews.append(result.interview_hash)
207
173
 
208
- elif stat_name == "exceptions":
209
- return {"exceptions": (self.stats_tracker.total_exceptions, None, "")}
174
+ relevant_model = result.model.model
175
+ self.completed_interview_by_model[relevant_model].append(result.interview_hash)
210
176
 
211
- elif stat_name == "unfixed_exceptions":
212
- return {
213
- "unfixed_exceptions": (self.stats_tracker.unfixed_exceptions, None, "")
214
- }
177
+ def _compute_statistic(self, stat_name: str):
178
+ completed_tasks = self.completed_interviews
179
+ elapsed_time = time.time() - self.start_time
180
+ interviews = self.jobs_runner.total_interviews
215
181
 
216
- elif stat_name == "throughput":
217
- value = self.stats_tracker.get_throughput()
218
- return {"throughput": (value, 2, "interviews/sec.")}
182
+ stat_definitions = {
183
+ "elapsed_time": lambda: InterviewStatistic(
184
+ "elapsed_time", value=elapsed_time, digits=1, units="sec."
185
+ ),
186
+ "total_interviews_requested": lambda: InterviewStatistic(
187
+ "total_interviews_requested", value=len(interviews), units=""
188
+ ),
189
+ "completed_interviews": lambda: InterviewStatistic(
190
+ "completed_interviews", value=len(completed_tasks), units=""
191
+ ),
192
+ "percent_complete": lambda: InterviewStatistic(
193
+ "percent_complete",
194
+ value=(
195
+ len(completed_tasks) / len(interviews) * 100
196
+ if len(interviews) > 0
197
+ else 0
198
+ ),
199
+ digits=1,
200
+ units="%",
201
+ ),
202
+ "average_time_per_interview": lambda: InterviewStatistic(
203
+ "average_time_per_interview",
204
+ value=elapsed_time / len(completed_tasks) if completed_tasks else 0,
205
+ digits=2,
206
+ units="sec.",
207
+ ),
208
+ "task_remaining": lambda: InterviewStatistic(
209
+ "task_remaining", value=len(interviews) - len(completed_tasks), units=""
210
+ ),
211
+ "estimated_time_remaining": lambda: InterviewStatistic(
212
+ "estimated_time_remaining",
213
+ value=(
214
+ (len(interviews) - len(completed_tasks))
215
+ * (elapsed_time / len(completed_tasks))
216
+ if len(completed_tasks) > 0
217
+ else 0
218
+ ),
219
+ digits=1,
220
+ units="sec.",
221
+ ),
222
+ "exceptions": lambda: InterviewStatistic(
223
+ "exceptions",
224
+ value=sum(len(i.exceptions) for i in interviews),
225
+ units="",
226
+ ),
227
+ "unfixed_exceptions": lambda: InterviewStatistic(
228
+ "unfixed_exceptions",
229
+ value=sum(i.exceptions.num_unfixed() for i in interviews),
230
+ units="",
231
+ ),
232
+ "throughput": lambda: InterviewStatistic(
233
+ "throughput",
234
+ value=len(completed_tasks) / elapsed_time if elapsed_time > 0 else 0,
235
+ digits=2,
236
+ units="interviews/sec.",
237
+ ),
238
+ }
239
+ return stat_definitions[stat_name]()
219
240
 
220
241
  def update_progress(self, stop_event):
221
242
  while not stop_event.is_set():
222
243
  self.send_status_update()
223
244
  time.sleep(self.refresh_rate)
224
- self.send_status_update()
225
245
 
226
- @abstractmethod
227
- def setup(self):
228
- """Conducts any setup needed prior to sending status updates."""
229
- pass
230
-
231
- @abstractmethod
232
- def send_status_update(self):
233
- """Updates the current status of the job."""
234
- pass
246
+ self.send_status_update()
235
247
 
236
248
 
237
249
  class JobsRunnerStatus(JobsRunnerStatusBase):
@@ -248,35 +260,49 @@ class JobsRunnerStatus(JobsRunnerStatusBase):
248
260
  return f"{self.base_url}/api/v0/local-job/{str(self.job_uuid)}"
249
261
 
250
262
  def setup(self) -> None:
251
- """Creates a local job on Coop if one does not already exist."""
252
- headers = {
253
- "Content-Type": "application/json",
254
- "Authorization": f"Bearer {self.api_key or 'None'}",
255
- }
263
+ """
264
+ Creates a local job on Coop if one does not already exist.
265
+ """
266
+
267
+ headers = {"Content-Type": "application/json"}
268
+
269
+ if self.api_key:
270
+ headers["Authorization"] = f"Bearer {self.api_key}"
271
+ else:
272
+ headers["Authorization"] = f"Bearer None"
256
273
 
257
274
  if self.job_uuid is None:
275
+ # Create a new local job
258
276
  response = requests.post(
259
277
  self.create_url,
260
278
  headers=headers,
261
279
  timeout=1,
262
280
  )
263
- response.raise_for_status()
264
- data = response.json()
265
- self.job_uuid = data.get("job_uuid")
281
+ response.raise_for_status()
282
+ data = response.json()
283
+ self.job_uuid = data.get("job_uuid")
266
284
 
267
285
  print(f"Running with progress bar. View progress at {self.viewing_url}")
268
286
 
269
287
  def send_status_update(self) -> None:
270
- """Sends current status to the web endpoint using the instance's job_uuid."""
288
+ """
289
+ Sends current status to the web endpoint using the instance's job_uuid.
290
+ """
271
291
  try:
292
+ # Get the status dictionary and add the job_id
272
293
  status_dict = self.get_status_dict()
294
+
295
+ # Make the UUID JSON serializable
273
296
  status_dict["job_id"] = str(self.job_uuid)
274
297
 
275
- headers = {
276
- "Content-Type": "application/json",
277
- "Authorization": f"Bearer {self.api_key or 'None'}",
278
- }
298
+ headers = {"Content-Type": "application/json"}
299
+
300
+ if self.api_key:
301
+ headers["Authorization"] = f"Bearer {self.api_key}"
302
+ else:
303
+ headers["Authorization"] = f"Bearer None"
279
304
 
305
+ # Send the update
280
306
  response = requests.patch(
281
307
  self.update_url,
282
308
  json=status_dict,
@@ -288,8 +314,14 @@ class JobsRunnerStatus(JobsRunnerStatusBase):
288
314
  print(f"Failed to send status update for job {self.job_uuid}: {e}")
289
315
 
290
316
  def has_ep_api_key(self) -> bool:
291
- """Returns True if the user has an Expected Parrot API key."""
292
- return self.api_key is not None
317
+ """
318
+ Returns True if the user has an Expected Parrot API key. Otherwise, returns False.
319
+ """
320
+
321
+ if self.api_key is not None:
322
+ return True
323
+ else:
324
+ return False
293
325
 
294
326
 
295
327
  if __name__ == "__main__":
@@ -1,17 +1,17 @@
1
1
  import asyncio
2
- from typing import Callable, Union, List, TYPE_CHECKING
2
+ from typing import Callable, Union, List
3
3
  from collections import UserList, UserDict
4
4
 
5
- from edsl.exceptions.jobs import InterviewErrorPriorTaskCanceled
5
+ from edsl.jobs.buckets import ModelBuckets
6
+ from edsl.exceptions import InterviewErrorPriorTaskCanceled
6
7
 
8
+ from edsl.jobs.interviews.InterviewStatusDictionary import InterviewStatusDictionary
7
9
  from edsl.jobs.tasks.task_status_enum import TaskStatus, TaskStatusDescriptor
8
10
  from edsl.jobs.tasks.TaskStatusLog import TaskStatusLog
11
+ from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
9
12
  from edsl.jobs.tokens.TokenUsage import TokenUsage
10
13
  from edsl.jobs.Answers import Answers
11
-
12
- if TYPE_CHECKING:
13
- from edsl.questions.QuestionBase import QuestionBase
14
- from edsl.jobs.buckets import ModelBuckets
14
+ from edsl.questions.QuestionBase import QuestionBase
15
15
 
16
16
 
17
17
  class TokensUsed(UserDict):
@@ -24,6 +24,7 @@ class TokensUsed(UserDict):
24
24
 
25
25
  class QuestionTaskCreator(UserList):
26
26
  """Class to create and manage a single question and its dependencies.
27
+ The class is an instance of a UserList of tasks that must be completed before the focal task can be run.
27
28
 
28
29
  It is a UserList with all the tasks that must be completed before the focal task can be run.
29
30
  The focal task is the question that we are interested in answering.
@@ -34,9 +35,9 @@ class QuestionTaskCreator(UserList):
34
35
  def __init__(
35
36
  self,
36
37
  *,
37
- question: "QuestionBase",
38
+ question: QuestionBase,
38
39
  answer_question_func: Callable,
39
- model_buckets: "ModelBuckets",
40
+ model_buckets: ModelBuckets,
40
41
  token_estimator: Union[Callable, None] = None,
41
42
  iteration: int = 0,
42
43
  ):
@@ -50,15 +51,14 @@ class QuestionTaskCreator(UserList):
50
51
 
51
52
  """
52
53
  super().__init__([])
54
+ # answer_question_func is the 'interview.answer_question_and_record_task" method
53
55
  self.answer_question_func = answer_question_func
54
56
  self.question = question
55
57
  self.iteration = iteration
56
58
 
57
59
  self.model_buckets = model_buckets
58
-
59
60
  self.requests_bucket = self.model_buckets.requests_bucket
60
61
  self.tokens_bucket = self.model_buckets.tokens_bucket
61
-
62
62
  self.status_log = TaskStatusLog()
63
63
 
64
64
  def fake_token_estimator(question):
@@ -125,13 +125,11 @@ class QuestionTaskCreator(UserList):
125
125
 
126
126
  await self.tokens_bucket.get_tokens(requested_tokens)
127
127
 
128
- if (estimated_wait_time := self.model_buckets.requests_bucket.wait_time(1)) > 0:
128
+ if (estimated_wait_time := self.requests_bucket.wait_time(1)) > 0:
129
129
  self.waiting = True # do we need this?
130
130
  self.task_status = TaskStatus.WAITING_FOR_REQUEST_CAPACITY
131
131
 
132
- await self.model_buckets.requests_bucket.get_tokens(
133
- 1, cheat_bucket_capacity=True
134
- )
132
+ await self.requests_bucket.get_tokens(1, cheat_bucket_capacity=True)
135
133
 
136
134
  self.task_status = TaskStatus.API_CALL_IN_PROGRESS
137
135
  try:
@@ -144,22 +142,22 @@ class QuestionTaskCreator(UserList):
144
142
  raise e
145
143
 
146
144
  if results.cache_used:
147
- self.model_buckets.tokens_bucket.add_tokens(requested_tokens)
148
- self.model_buckets.requests_bucket.add_tokens(1)
145
+ self.tokens_bucket.add_tokens(requested_tokens)
146
+ self.requests_bucket.add_tokens(1)
149
147
  self.from_cache = True
150
148
  # Turbo mode means that we don't wait for tokens or requests.
151
- self.model_buckets.tokens_bucket.turbo_mode_on()
152
- self.model_buckets.requests_bucket.turbo_mode_on()
149
+ self.tokens_bucket.turbo_mode_on()
150
+ self.requests_bucket.turbo_mode_on()
153
151
  else:
154
- self.model_buckets.tokens_bucket.turbo_mode_off()
155
- self.model_buckets.requests_bucket.turbo_mode_off()
152
+ self.tokens_bucket.turbo_mode_off()
153
+ self.requests_bucket.turbo_mode_off()
156
154
 
157
155
  return results
158
156
 
159
157
  @classmethod
160
158
  def example(cls):
161
159
  """Return an example instance of the class."""
162
- from edsl.questions.QuestionFreeText import QuestionFreeText
160
+ from edsl import QuestionFreeText
163
161
  from edsl.jobs.buckets.ModelBuckets import ModelBuckets
164
162
 
165
163
  m = ModelBuckets.infinity_bucket()
@@ -1,17 +1,18 @@
1
1
  from typing import List, Optional
2
2
  from io import BytesIO
3
+ import webbrowser
4
+ import os
3
5
  import base64
6
+ from importlib import resources
4
7
  from edsl.jobs.tasks.task_status_enum import TaskStatus
5
- from edsl.Base import RepresentationMixin
6
8
 
7
9
 
8
- class TaskHistory(RepresentationMixin):
10
+ class TaskHistory:
9
11
  def __init__(
10
12
  self,
11
- interviews: List["Interview"] = None,
13
+ interviews: List["Interview"],
12
14
  include_traceback: bool = False,
13
15
  max_interviews: int = 10,
14
- interviews_with_exceptions_only: bool = False,
15
16
  ):
16
17
  """
17
18
  The structure of a TaskHistory exception
@@ -21,33 +22,13 @@ class TaskHistory(RepresentationMixin):
21
22
  >>> _ = TaskHistory.example()
22
23
  ...
23
24
  """
24
- self.interviews_with_exceptions_only = interviews_with_exceptions_only
25
- self._interviews = {}
26
- self.total_interviews = []
27
- if interviews is not None:
28
- for interview in interviews:
29
- self.add_interview(interview)
30
25
 
31
- self.include_traceback = include_traceback
32
- self._interviews = {
33
- index: interview for index, interview in enumerate(self.total_interviews)
34
- }
35
- self.max_interviews = max_interviews
36
-
37
- # self.total_interviews = interviews
26
+ self.total_interviews = interviews
38
27
  self.include_traceback = include_traceback
39
28
 
40
- # self._interviews = {index: i for index, i in enumerate(self.total_interviews)}
29
+ self._interviews = {index: i for index, i in enumerate(self.total_interviews)}
41
30
  self.max_interviews = max_interviews
42
31
 
43
- def add_interview(self, interview: "Interview"):
44
- """Add a single interview to the history"""
45
- if self.interviews_with_exceptions_only and interview.exceptions == {}:
46
- return
47
-
48
- self.total_interviews.append(interview)
49
- self._interviews[len(self._interviews)] = interview
50
-
51
32
  @classmethod
52
33
  def example(cls):
53
34
  """ """
@@ -140,6 +121,14 @@ class TaskHistory(RepresentationMixin):
140
121
  """Return True if there are any exceptions."""
141
122
  return len(self.unfixed_exceptions) > 0
142
123
 
124
+ def _repr_html_(self):
125
+ """Return an HTML representation of the TaskHistory."""
126
+ d = self.to_dict(add_edsl_version=False)
127
+ data = [[k, v] for k, v in d.items()]
128
+ from tabulate import tabulate
129
+
130
+ return tabulate(data, headers=["keys", "values"], tablefmt="html")
131
+
143
132
  def show_exceptions(self, tracebacks=False):
144
133
  """Print the exceptions."""
145
134
  for index in self.indices:
@@ -251,15 +240,11 @@ class TaskHistory(RepresentationMixin):
251
240
  plt.show()
252
241
 
253
242
  def css(self):
254
- from importlib import resources
255
-
256
243
  env = resources.files("edsl").joinpath("templates/error_reporting")
257
244
  css = env.joinpath("report.css").read_text()
258
245
  return css
259
246
 
260
247
  def javascript(self):
261
- from importlib import resources
262
-
263
248
  env = resources.files("edsl").joinpath("templates/error_reporting")
264
249
  js = env.joinpath("report.js").read_text()
265
250
  return js
@@ -296,7 +281,7 @@ class TaskHistory(RepresentationMixin):
296
281
  exceptions_by_question_name = {}
297
282
  for interview in self.total_interviews:
298
283
  for question_name, exceptions in interview.exceptions.items():
299
- question_type = interview.survey._get_question_by_name(
284
+ question_type = interview.survey.get_question(
300
285
  question_name
301
286
  ).question_type
302
287
  if (question_name, question_type) not in exceptions_by_question_name:
@@ -345,11 +330,8 @@ class TaskHistory(RepresentationMixin):
345
330
  }
346
331
  return sorted_exceptions_by_model
347
332
 
348
- def generate_html_report(self, css: Optional[str], include_plot=False):
349
- if include_plot:
350
- performance_plot_html = self.plot(num_periods=100, get_embedded_html=True)
351
- else:
352
- performance_plot_html = ""
333
+ def generate_html_report(self, css: Optional[str]):
334
+ performance_plot_html = self.plot(num_periods=100, get_embedded_html=True)
353
335
 
354
336
  if css is None:
355
337
  css = self.css()
@@ -427,8 +409,6 @@ class TaskHistory(RepresentationMixin):
427
409
  print(f"Exception report saved to {filename}")
428
410
 
429
411
  if open_in_browser:
430
- import webbrowser
431
-
432
412
  webbrowser.open(f"file://{os.path.abspath(filename)}")
433
413
 
434
414
  if return_link:
@@ -3,6 +3,8 @@ from collections import UserDict
3
3
  import enum
4
4
  import time
5
5
 
6
+ # from edsl.jobs.tasks.TaskStatusLogEntry import TaskStatusLogEntry
7
+
6
8
 
7
9
  class TaskStatus(enum.Enum):
8
10
  "These are the possible states a task can be in."