edsl 0.1.38.dev3__py3-none-any.whl → 0.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (341) hide show
  1. edsl/Base.py +413 -303
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +57 -49
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +1071 -858
  7. edsl/agents/AgentList.py +551 -362
  8. edsl/agents/Invigilator.py +284 -222
  9. edsl/agents/InvigilatorBase.py +257 -284
  10. edsl/agents/PromptConstructor.py +272 -353
  11. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  12. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  13. edsl/agents/__init__.py +2 -3
  14. edsl/agents/descriptors.py +99 -99
  15. edsl/agents/prompt_helpers.py +129 -129
  16. edsl/agents/question_option_processor.py +172 -0
  17. edsl/auto/AutoStudy.py +130 -117
  18. edsl/auto/StageBase.py +243 -230
  19. edsl/auto/StageGenerateSurvey.py +178 -178
  20. edsl/auto/StageLabelQuestions.py +125 -125
  21. edsl/auto/StagePersona.py +61 -61
  22. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  23. edsl/auto/StagePersonaDimensionValues.py +74 -74
  24. edsl/auto/StagePersonaDimensions.py +69 -69
  25. edsl/auto/StageQuestions.py +74 -73
  26. edsl/auto/SurveyCreatorPipeline.py +21 -21
  27. edsl/auto/utilities.py +218 -224
  28. edsl/base/Base.py +279 -279
  29. edsl/config.py +177 -149
  30. edsl/conversation/Conversation.py +290 -290
  31. edsl/conversation/car_buying.py +59 -58
  32. edsl/conversation/chips.py +95 -95
  33. edsl/conversation/mug_negotiation.py +81 -81
  34. edsl/conversation/next_speaker_utilities.py +93 -93
  35. edsl/coop/CoopFunctionsMixin.py +15 -0
  36. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  37. edsl/coop/PriceFetcher.py +54 -54
  38. edsl/coop/__init__.py +2 -2
  39. edsl/coop/coop.py +1106 -961
  40. edsl/coop/utils.py +131 -131
  41. edsl/data/Cache.py +573 -530
  42. edsl/data/CacheEntry.py +230 -228
  43. edsl/data/CacheHandler.py +168 -149
  44. edsl/data/RemoteCacheSync.py +186 -97
  45. edsl/data/SQLiteDict.py +292 -292
  46. edsl/data/__init__.py +5 -4
  47. edsl/data/orm.py +10 -10
  48. edsl/data_transfer_models.py +74 -73
  49. edsl/enums.py +202 -173
  50. edsl/exceptions/BaseException.py +21 -21
  51. edsl/exceptions/__init__.py +54 -54
  52. edsl/exceptions/agents.py +54 -42
  53. edsl/exceptions/cache.py +5 -5
  54. edsl/exceptions/configuration.py +16 -16
  55. edsl/exceptions/coop.py +10 -10
  56. edsl/exceptions/data.py +14 -14
  57. edsl/exceptions/general.py +34 -34
  58. edsl/exceptions/inference_services.py +5 -0
  59. edsl/exceptions/jobs.py +33 -33
  60. edsl/exceptions/language_models.py +63 -63
  61. edsl/exceptions/prompts.py +15 -15
  62. edsl/exceptions/questions.py +109 -91
  63. edsl/exceptions/results.py +29 -29
  64. edsl/exceptions/scenarios.py +29 -22
  65. edsl/exceptions/surveys.py +37 -37
  66. edsl/inference_services/AnthropicService.py +106 -87
  67. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  68. edsl/inference_services/AvailableModelFetcher.py +215 -0
  69. edsl/inference_services/AwsBedrock.py +118 -120
  70. edsl/inference_services/AzureAI.py +215 -217
  71. edsl/inference_services/DeepInfraService.py +18 -18
  72. edsl/inference_services/GoogleService.py +143 -156
  73. edsl/inference_services/GroqService.py +20 -20
  74. edsl/inference_services/InferenceServiceABC.py +80 -147
  75. edsl/inference_services/InferenceServicesCollection.py +138 -97
  76. edsl/inference_services/MistralAIService.py +120 -123
  77. edsl/inference_services/OllamaService.py +18 -18
  78. edsl/inference_services/OpenAIService.py +236 -224
  79. edsl/inference_services/PerplexityService.py +160 -0
  80. edsl/inference_services/ServiceAvailability.py +135 -0
  81. edsl/inference_services/TestService.py +90 -89
  82. edsl/inference_services/TogetherAIService.py +172 -170
  83. edsl/inference_services/data_structures.py +134 -0
  84. edsl/inference_services/models_available_cache.py +118 -118
  85. edsl/inference_services/rate_limits_cache.py +25 -25
  86. edsl/inference_services/registry.py +41 -39
  87. edsl/inference_services/write_available.py +10 -10
  88. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  89. edsl/jobs/Answers.py +43 -56
  90. edsl/jobs/FetchInvigilator.py +47 -0
  91. edsl/jobs/InterviewTaskManager.py +98 -0
  92. edsl/jobs/InterviewsConstructor.py +50 -0
  93. edsl/jobs/Jobs.py +823 -1358
  94. edsl/jobs/JobsChecks.py +172 -0
  95. edsl/jobs/JobsComponentConstructor.py +189 -0
  96. edsl/jobs/JobsPrompts.py +270 -0
  97. edsl/jobs/JobsRemoteInferenceHandler.py +311 -0
  98. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  99. edsl/jobs/RequestTokenEstimator.py +30 -0
  100. edsl/jobs/__init__.py +1 -1
  101. edsl/jobs/async_interview_runner.py +138 -0
  102. edsl/jobs/buckets/BucketCollection.py +104 -63
  103. edsl/jobs/buckets/ModelBuckets.py +65 -65
  104. edsl/jobs/buckets/TokenBucket.py +283 -251
  105. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  106. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  107. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  108. edsl/jobs/data_structures.py +120 -0
  109. edsl/jobs/decorators.py +35 -0
  110. edsl/jobs/interviews/Interview.py +396 -661
  111. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  112. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  113. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  114. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  115. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  116. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  117. edsl/jobs/interviews/ReportErrors.py +66 -66
  118. edsl/jobs/interviews/interview_status_enum.py +9 -9
  119. edsl/jobs/jobs_status_enums.py +9 -0
  120. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  121. edsl/jobs/results_exceptions_handler.py +98 -0
  122. edsl/jobs/runners/JobsRunnerAsyncio.py +151 -361
  123. edsl/jobs/runners/JobsRunnerStatus.py +298 -332
  124. edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
  125. edsl/jobs/tasks/TaskCreators.py +64 -64
  126. edsl/jobs/tasks/TaskHistory.py +470 -451
  127. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  128. edsl/jobs/tasks/task_status_enum.py +161 -163
  129. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  130. edsl/jobs/tokens/TokenUsage.py +34 -34
  131. edsl/language_models/ComputeCost.py +63 -0
  132. edsl/language_models/LanguageModel.py +626 -708
  133. edsl/language_models/ModelList.py +164 -109
  134. edsl/language_models/PriceManager.py +127 -0
  135. edsl/language_models/RawResponseHandler.py +106 -0
  136. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  137. edsl/language_models/ServiceDataSources.py +0 -0
  138. edsl/language_models/__init__.py +2 -3
  139. edsl/language_models/fake_openai_call.py +15 -15
  140. edsl/language_models/fake_openai_service.py +61 -61
  141. edsl/language_models/key_management/KeyLookup.py +63 -0
  142. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  143. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  144. edsl/language_models/key_management/__init__.py +0 -0
  145. edsl/language_models/key_management/models.py +131 -0
  146. edsl/language_models/model.py +256 -0
  147. edsl/language_models/repair.py +156 -156
  148. edsl/language_models/utilities.py +65 -64
  149. edsl/notebooks/Notebook.py +263 -258
  150. edsl/notebooks/NotebookToLaTeX.py +142 -0
  151. edsl/notebooks/__init__.py +1 -1
  152. edsl/prompts/Prompt.py +352 -357
  153. edsl/prompts/__init__.py +2 -2
  154. edsl/questions/ExceptionExplainer.py +77 -0
  155. edsl/questions/HTMLQuestion.py +103 -0
  156. edsl/questions/QuestionBase.py +518 -660
  157. edsl/questions/QuestionBasePromptsMixin.py +221 -217
  158. edsl/questions/QuestionBudget.py +227 -227
  159. edsl/questions/QuestionCheckBox.py +359 -359
  160. edsl/questions/QuestionExtract.py +180 -183
  161. edsl/questions/QuestionFreeText.py +113 -114
  162. edsl/questions/QuestionFunctional.py +166 -166
  163. edsl/questions/QuestionList.py +223 -231
  164. edsl/questions/QuestionMatrix.py +265 -0
  165. edsl/questions/QuestionMultipleChoice.py +330 -286
  166. edsl/questions/QuestionNumerical.py +151 -153
  167. edsl/questions/QuestionRank.py +314 -324
  168. edsl/questions/Quick.py +41 -41
  169. edsl/questions/SimpleAskMixin.py +74 -73
  170. edsl/questions/__init__.py +27 -26
  171. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
  172. edsl/questions/compose_questions.py +98 -98
  173. edsl/questions/data_structures.py +20 -0
  174. edsl/questions/decorators.py +21 -21
  175. edsl/questions/derived/QuestionLikertFive.py +76 -76
  176. edsl/questions/derived/QuestionLinearScale.py +90 -87
  177. edsl/questions/derived/QuestionTopK.py +93 -93
  178. edsl/questions/derived/QuestionYesNo.py +82 -82
  179. edsl/questions/descriptors.py +427 -413
  180. edsl/questions/loop_processor.py +149 -0
  181. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  182. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  183. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  184. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  185. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  186. edsl/questions/prompt_templates/question_list.jinja +17 -17
  187. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  188. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  189. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
  190. edsl/questions/question_registry.py +177 -147
  191. edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
  192. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
  193. edsl/questions/response_validator_factory.py +34 -0
  194. edsl/questions/settings.py +12 -12
  195. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  196. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  197. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  198. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  199. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  200. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  201. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  202. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  203. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  204. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  205. edsl/questions/templates/list/question_presentation.jinja +5 -5
  206. edsl/questions/templates/matrix/__init__.py +1 -0
  207. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  208. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  209. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  210. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  211. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  212. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  213. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  214. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  215. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  216. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  217. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  218. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  219. edsl/results/CSSParameterizer.py +108 -0
  220. edsl/results/Dataset.py +587 -293
  221. edsl/results/DatasetExportMixin.py +594 -717
  222. edsl/results/DatasetTree.py +295 -145
  223. edsl/results/MarkdownToDocx.py +122 -0
  224. edsl/results/MarkdownToPDF.py +111 -0
  225. edsl/results/Result.py +557 -456
  226. edsl/results/Results.py +1183 -1071
  227. edsl/results/ResultsExportMixin.py +45 -43
  228. edsl/results/ResultsGGMixin.py +121 -121
  229. edsl/results/TableDisplay.py +125 -0
  230. edsl/results/TextEditor.py +50 -0
  231. edsl/results/__init__.py +2 -2
  232. edsl/results/file_exports.py +252 -0
  233. edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
  234. edsl/results/{Selector.py → results_selector.py} +145 -135
  235. edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
  236. edsl/results/smart_objects.py +96 -0
  237. edsl/results/table_data_class.py +12 -0
  238. edsl/results/table_display.css +78 -0
  239. edsl/results/table_renderers.py +118 -0
  240. edsl/results/tree_explore.py +115 -115
  241. edsl/scenarios/ConstructDownloadLink.py +109 -0
  242. edsl/scenarios/DocumentChunker.py +102 -0
  243. edsl/scenarios/DocxScenario.py +16 -0
  244. edsl/scenarios/FileStore.py +543 -458
  245. edsl/scenarios/PdfExtractor.py +40 -0
  246. edsl/scenarios/Scenario.py +498 -544
  247. edsl/scenarios/ScenarioHtmlMixin.py +65 -64
  248. edsl/scenarios/ScenarioList.py +1458 -1112
  249. edsl/scenarios/ScenarioListExportMixin.py +45 -52
  250. edsl/scenarios/ScenarioListPdfMixin.py +239 -261
  251. edsl/scenarios/__init__.py +3 -4
  252. edsl/scenarios/directory_scanner.py +96 -0
  253. edsl/scenarios/file_methods.py +85 -0
  254. edsl/scenarios/handlers/__init__.py +13 -0
  255. edsl/scenarios/handlers/csv.py +49 -0
  256. edsl/scenarios/handlers/docx.py +76 -0
  257. edsl/scenarios/handlers/html.py +37 -0
  258. edsl/scenarios/handlers/json.py +111 -0
  259. edsl/scenarios/handlers/latex.py +5 -0
  260. edsl/scenarios/handlers/md.py +51 -0
  261. edsl/scenarios/handlers/pdf.py +68 -0
  262. edsl/scenarios/handlers/png.py +39 -0
  263. edsl/scenarios/handlers/pptx.py +105 -0
  264. edsl/scenarios/handlers/py.py +294 -0
  265. edsl/scenarios/handlers/sql.py +313 -0
  266. edsl/scenarios/handlers/sqlite.py +149 -0
  267. edsl/scenarios/handlers/txt.py +33 -0
  268. edsl/scenarios/scenario_join.py +131 -0
  269. edsl/scenarios/scenario_selector.py +156 -0
  270. edsl/shared.py +1 -1
  271. edsl/study/ObjectEntry.py +173 -173
  272. edsl/study/ProofOfWork.py +113 -113
  273. edsl/study/SnapShot.py +80 -80
  274. edsl/study/Study.py +521 -528
  275. edsl/study/__init__.py +4 -4
  276. edsl/surveys/ConstructDAG.py +92 -0
  277. edsl/surveys/DAG.py +148 -148
  278. edsl/surveys/EditSurvey.py +221 -0
  279. edsl/surveys/InstructionHandler.py +100 -0
  280. edsl/surveys/Memory.py +31 -31
  281. edsl/surveys/MemoryManagement.py +72 -0
  282. edsl/surveys/MemoryPlan.py +244 -244
  283. edsl/surveys/Rule.py +327 -326
  284. edsl/surveys/RuleCollection.py +385 -387
  285. edsl/surveys/RuleManager.py +172 -0
  286. edsl/surveys/Simulator.py +75 -0
  287. edsl/surveys/Survey.py +1280 -1787
  288. edsl/surveys/SurveyCSS.py +273 -261
  289. edsl/surveys/SurveyExportMixin.py +259 -259
  290. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -121
  291. edsl/surveys/SurveyQualtricsImport.py +284 -284
  292. edsl/surveys/SurveyToApp.py +141 -0
  293. edsl/surveys/__init__.py +5 -3
  294. edsl/surveys/base.py +53 -53
  295. edsl/surveys/descriptors.py +60 -56
  296. edsl/surveys/instructions/ChangeInstruction.py +48 -49
  297. edsl/surveys/instructions/Instruction.py +56 -53
  298. edsl/surveys/instructions/InstructionCollection.py +82 -77
  299. edsl/templates/error_reporting/base.html +23 -23
  300. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  301. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  302. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  303. edsl/templates/error_reporting/interview_details.html +115 -115
  304. edsl/templates/error_reporting/interviews.html +19 -10
  305. edsl/templates/error_reporting/overview.html +4 -4
  306. edsl/templates/error_reporting/performance_plot.html +1 -1
  307. edsl/templates/error_reporting/report.css +73 -73
  308. edsl/templates/error_reporting/report.html +117 -117
  309. edsl/templates/error_reporting/report.js +25 -25
  310. edsl/tools/__init__.py +1 -1
  311. edsl/tools/clusters.py +192 -192
  312. edsl/tools/embeddings.py +27 -27
  313. edsl/tools/embeddings_plotting.py +118 -118
  314. edsl/tools/plotting.py +112 -112
  315. edsl/tools/summarize.py +18 -18
  316. edsl/utilities/PrettyList.py +56 -0
  317. edsl/utilities/SystemInfo.py +28 -28
  318. edsl/utilities/__init__.py +22 -22
  319. edsl/utilities/ast_utilities.py +25 -25
  320. edsl/utilities/data/Registry.py +6 -6
  321. edsl/utilities/data/__init__.py +1 -1
  322. edsl/utilities/data/scooter_results.json +1 -1
  323. edsl/utilities/decorators.py +77 -77
  324. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  325. edsl/utilities/interface.py +627 -627
  326. edsl/utilities/is_notebook.py +18 -0
  327. edsl/utilities/is_valid_variable_name.py +11 -0
  328. edsl/utilities/naming_utilities.py +263 -263
  329. edsl/utilities/remove_edsl_version.py +24 -0
  330. edsl/utilities/repair_functions.py +28 -28
  331. edsl/utilities/restricted_python.py +70 -70
  332. edsl/utilities/utilities.py +436 -409
  333. {edsl-0.1.38.dev3.dist-info → edsl-0.1.39.dist-info}/LICENSE +21 -21
  334. {edsl-0.1.38.dev3.dist-info → edsl-0.1.39.dist-info}/METADATA +13 -10
  335. edsl-0.1.39.dist-info/RECORD +358 -0
  336. {edsl-0.1.38.dev3.dist-info → edsl-0.1.39.dist-info}/WHEEL +1 -1
  337. edsl/language_models/KeyLookup.py +0 -30
  338. edsl/language_models/registry.py +0 -137
  339. edsl/language_models/unused/ReplicateBase.py +0 -83
  340. edsl/results/ResultsDBMixin.py +0 -238
  341. edsl-0.1.38.dev3.dist-info/RECORD +0 -269
@@ -0,0 +1,138 @@
1
+ from collections.abc import AsyncGenerator
2
+ from typing import List, TypeVar, Generator, Tuple, TYPE_CHECKING
3
+ from dataclasses import dataclass
4
+ import asyncio
5
+ from contextlib import asynccontextmanager
6
+ from edsl.data_transfer_models import EDSLResultObjectInput
7
+
8
+ from edsl.results.Result import Result
9
+ from edsl.jobs.interviews.Interview import Interview
10
+
11
+ if TYPE_CHECKING:
12
+ from edsl.jobs.Jobs import Jobs
13
+
14
+
15
+ @dataclass
16
+ class InterviewResult:
17
+ result: Result
18
+ interview: Interview
19
+ order: int
20
+
21
+
22
+ from edsl.jobs.data_structures import RunConfig
23
+
24
+
25
+ class AsyncInterviewRunner:
26
+ MAX_CONCURRENT = 5
27
+
28
+ def __init__(self, jobs: "Jobs", run_config: RunConfig):
29
+ self.jobs = jobs
30
+ self.run_config = run_config
31
+ self._initialized = asyncio.Event()
32
+
33
+ def _expand_interviews(self) -> Generator["Interview", None, None]:
34
+ """Populates self.total_interviews with n copies of each interview.
35
+
36
+ It also has to set the cache for each interview.
37
+
38
+ :param n: how many times to run each interview.
39
+ """
40
+ for interview in self.jobs.generate_interviews():
41
+ for iteration in range(self.run_config.parameters.n):
42
+ if iteration > 0:
43
+ yield interview.duplicate(
44
+ iteration=iteration, cache=self.run_config.environment.cache
45
+ )
46
+ else:
47
+ interview.cache = self.run_config.environment.cache
48
+ yield interview
49
+
50
+ async def _conduct_interview(
51
+ self, interview: "Interview"
52
+ ) -> Tuple["Result", "Interview"]:
53
+ """Conducts an interview and returns the result object, along with the associated interview.
54
+
55
+ We return the interview because it is not populated with exceptions, if any.
56
+
57
+ :param interview: the interview to conduct
58
+ :return: the result of the interview
59
+
60
+ 'extracted_answers' is a dictionary of the answers to the questions in the interview.
61
+ This is not the same as the generated_tokens---it can include substantial cleaning and processing / validation.
62
+ """
63
+ # the model buckets are used to track usage rates
64
+ # model_buckets = self.bucket_collection[interview.model]
65
+ # model_buckets = self.run_config.environment.bucket_collection[interview.model]
66
+
67
+ # get the results of the interview e.g., {'how_are_you':"Good" 'how_are_you_generated_tokens': "Good"}
68
+ extracted_answers: dict[str, str]
69
+ model_response_objects: List[EDSLResultObjectInput]
70
+
71
+ extracted_answers, model_response_objects = (
72
+ await interview.async_conduct_interview(self.run_config)
73
+ )
74
+ result = Result.from_interview(
75
+ interview=interview,
76
+ extracted_answers=extracted_answers,
77
+ model_response_objects=model_response_objects,
78
+ )
79
+ return result, interview
80
+
81
+ async def run(
82
+ self,
83
+ ) -> AsyncGenerator[tuple[Result, Interview], None]:
84
+ """Creates and processes tasks asynchronously, yielding results as they complete.
85
+
86
+ Uses TaskGroup for structured concurrency and automated cleanup.
87
+ Results are yielded as they become available while maintaining controlled concurrency.
88
+ """
89
+ interviews = list(self._expand_interviews())
90
+ self._initialized.set()
91
+
92
+ async def _process_single_interview(
93
+ interview: Interview, idx: int
94
+ ) -> InterviewResult:
95
+ try:
96
+ result, interview = await self._conduct_interview(interview)
97
+ self.run_config.environment.jobs_runner_status.add_completed_interview(
98
+ result
99
+ )
100
+ result.order = idx
101
+ return InterviewResult(result, interview, idx)
102
+ except Exception as e:
103
+ # breakpoint()
104
+ if self.run_config.parameters.stop_on_exception:
105
+ raise
106
+ # logger.error(f"Task failed with error: {e}")
107
+ return None
108
+
109
+ # Process interviews in chunks
110
+ for i in range(0, len(interviews), self.MAX_CONCURRENT):
111
+ chunk = interviews[i : i + self.MAX_CONCURRENT]
112
+ tasks = [
113
+ asyncio.create_task(_process_single_interview(interview, idx))
114
+ for idx, interview in enumerate(chunk, start=i)
115
+ ]
116
+
117
+ try:
118
+ # Wait for all tasks in the chunk to complete
119
+ results = await asyncio.gather(
120
+ *tasks,
121
+ return_exceptions=not self.run_config.parameters.stop_on_exception
122
+ )
123
+
124
+ # Process successful results
125
+ for result in (r for r in results if r is not None):
126
+ yield result.result, result.interview
127
+
128
+ except Exception as e:
129
+ if self.run_config.parameters.stop_on_exception:
130
+ raise
131
+ # logger.error(f"Chunk processing failed with error: {e}")
132
+ continue
133
+
134
+ finally:
135
+ # Clean up any remaining tasks
136
+ for task in tasks:
137
+ if not task.done():
138
+ task.cancel()
@@ -1,63 +1,104 @@
1
- from collections import UserDict
2
- from edsl.jobs.buckets.TokenBucket import TokenBucket
3
- from edsl.jobs.buckets.ModelBuckets import ModelBuckets
4
-
5
-
6
- class BucketCollection(UserDict):
7
- """A Jobs object will have a whole collection of model buckets, as multiple models could be used.
8
-
9
- The keys here are the models, and the values are the ModelBuckets objects.
10
- Models themselves are hashable, so this works.
11
- """
12
-
13
- def __init__(self, infinity_buckets=False):
14
- super().__init__()
15
- self.infinity_buckets = infinity_buckets
16
- self.models_to_services = {}
17
- self.services_to_buckets = {}
18
-
19
- def __repr__(self):
20
- return f"BucketCollection({self.data})"
21
-
22
- def add_model(self, model: "LanguageModel") -> None:
23
- """Adds a model to the bucket collection.
24
-
25
- This will create the token and request buckets for the model."""
26
-
27
- # compute the TPS and RPS from the model
28
- if not self.infinity_buckets:
29
- TPS = model.TPM / 60.0
30
- RPS = model.RPM / 60.0
31
- else:
32
- TPS = float("inf")
33
- RPS = float("inf")
34
-
35
- if model.model not in self.models_to_services:
36
- service = model._inference_service_
37
- if service not in self.services_to_buckets:
38
- requests_bucket = TokenBucket(
39
- bucket_name=service,
40
- bucket_type="requests",
41
- capacity=RPS,
42
- refill_rate=RPS,
43
- )
44
- tokens_bucket = TokenBucket(
45
- bucket_name=service,
46
- bucket_type="tokens",
47
- capacity=TPS,
48
- refill_rate=TPS,
49
- )
50
- self.services_to_buckets[service] = ModelBuckets(
51
- requests_bucket, tokens_bucket
52
- )
53
- self.models_to_services[model.model] = service
54
- self[model] = self.services_to_buckets[service]
55
- else:
56
- self[model] = self.services_to_buckets[self.models_to_services[model.model]]
57
-
58
- def visualize(self) -> dict:
59
- """Visualize the token and request buckets for each model."""
60
- plots = {}
61
- for model in self:
62
- plots[model] = self[model].visualize()
63
- return plots
1
+ from typing import Optional
2
+ from collections import UserDict
3
+ from edsl.jobs.buckets.TokenBucket import TokenBucket
4
+ from edsl.jobs.buckets.ModelBuckets import ModelBuckets
5
+
6
+ # from functools import wraps
7
+ from threading import RLock
8
+
9
+ from edsl.jobs.decorators import synchronized_class
10
+
11
+
12
+ @synchronized_class
13
+ class BucketCollection(UserDict):
14
+ """A Jobs object will have a whole collection of model buckets, as multiple models could be used.
15
+
16
+ The keys here are the models, and the values are the ModelBuckets objects.
17
+ Models themselves are hashable, so this works.
18
+ """
19
+
20
+ def __init__(self, infinity_buckets: bool = False):
21
+ """Create a new BucketCollection.
22
+ An infinity bucket is a bucket that never runs out of tokens or requests.
23
+ """
24
+ super().__init__()
25
+ self.infinity_buckets = infinity_buckets
26
+ self.models_to_services = {}
27
+ self.services_to_buckets = {}
28
+ self._lock = RLock()
29
+
30
+ from edsl.config import CONFIG
31
+ import os
32
+
33
+ url = os.environ.get("EDSL_REMOTE_TOKEN_BUCKET_URL", None)
34
+
35
+ if url == "None" or url is None:
36
+ self.remote_url = None
37
+ # print(f"Using remote token bucket URL: {url}")
38
+ else:
39
+ self.remote_url = url
40
+
41
+ @classmethod
42
+ def from_models(
43
+ cls, models_list: list, infinity_buckets: bool = False
44
+ ) -> "BucketCollection":
45
+ """Create a BucketCollection from a list of models."""
46
+ bucket_collection = cls(infinity_buckets=infinity_buckets)
47
+ for model in models_list:
48
+ bucket_collection.add_model(model)
49
+ return bucket_collection
50
+
51
+ def get_tokens(
52
+ self, model: "LanguageModel", bucket_type: str, num_tokens: int
53
+ ) -> int:
54
+ """Get the number of tokens remaining in the bucket."""
55
+ relevant_bucket = getattr(self[model], bucket_type)
56
+ return relevant_bucket.get_tokens(num_tokens)
57
+
58
+ def __repr__(self):
59
+ return f"BucketCollection({self.data})"
60
+
61
+ def add_model(self, model: "LanguageModel") -> None:
62
+ """Adds a model to the bucket collection.
63
+
64
+ This will create the token and request buckets for the model."""
65
+
66
+ # compute the TPS and RPS from the model
67
+ if not self.infinity_buckets:
68
+ TPS = model.tpm / 60.0
69
+ RPS = model.rpm / 60.0
70
+ else:
71
+ TPS = float("inf")
72
+ RPS = float("inf")
73
+
74
+ if model.model not in self.models_to_services:
75
+ service = model._inference_service_
76
+ if service not in self.services_to_buckets:
77
+ requests_bucket = TokenBucket(
78
+ bucket_name=service,
79
+ bucket_type="requests",
80
+ capacity=RPS,
81
+ refill_rate=RPS,
82
+ remote_url=self.remote_url,
83
+ )
84
+ tokens_bucket = TokenBucket(
85
+ bucket_name=service,
86
+ bucket_type="tokens",
87
+ capacity=TPS,
88
+ refill_rate=TPS,
89
+ remote_url=self.remote_url,
90
+ )
91
+ self.services_to_buckets[service] = ModelBuckets(
92
+ requests_bucket, tokens_bucket
93
+ )
94
+ self.models_to_services[model.model] = service
95
+ self[model] = self.services_to_buckets[service]
96
+ else:
97
+ self[model] = self.services_to_buckets[self.models_to_services[model.model]]
98
+
99
+ def visualize(self) -> dict:
100
+ """Visualize the token and request buckets for each model."""
101
+ plots = {}
102
+ for model in self:
103
+ plots[model] = self[model].visualize()
104
+ return plots
@@ -1,65 +1,65 @@
1
- # from edsl.jobs.buckets.TokenBucket import TokenBucket
2
-
3
-
4
- class ModelBuckets:
5
- """A class to represent the token and request buckets for a model.
6
-
7
- Most LLM model services have limits both on requests-per-minute (RPM) and tokens-per-minute (TPM).
8
- A request is one call to the service. The number of tokens required for a request depends on parameters.
9
- """
10
-
11
- def __init__(self, requests_bucket: "TokenBucket", tokens_bucket: "TokenBucket"):
12
- """Initialize the model buckets.
13
-
14
- The requests bucket captures requests per unit of time.
15
- The tokens bucket captures the number of language model tokens.
16
-
17
- """
18
- self.requests_bucket = requests_bucket
19
- self.tokens_bucket = tokens_bucket
20
-
21
- def __add__(self, other: "ModelBuckets"):
22
- """Combine two model buckets."""
23
- return ModelBuckets(
24
- requests_bucket=self.requests_bucket + other.requests_bucket,
25
- tokens_bucket=self.tokens_bucket + other.tokens_bucket,
26
- )
27
-
28
- def turbo_mode_on(self):
29
- """Set the refill rate to infinity for both buckets."""
30
- self.requests_bucket.turbo_mode_on()
31
- self.tokens_bucket.turbo_mode_on()
32
-
33
- def turbo_mode_off(self):
34
- """Restore the refill rate to its original value for both buckets."""
35
- self.requests_bucket.turbo_mode_off()
36
- self.tokens_bucket.turbo_mode_off()
37
-
38
- @classmethod
39
- def infinity_bucket(cls, model_name: str = "not_specified") -> "ModelBuckets":
40
- """Create a bucket with infinite capacity and refill rate."""
41
- from edsl.jobs.buckets.TokenBucket import TokenBucket
42
-
43
- return cls(
44
- requests_bucket=TokenBucket(
45
- bucket_name=model_name,
46
- bucket_type="requests",
47
- capacity=float("inf"),
48
- refill_rate=float("inf"),
49
- ),
50
- tokens_bucket=TokenBucket(
51
- bucket_name=model_name,
52
- bucket_type="tokens",
53
- capacity=float("inf"),
54
- refill_rate=float("inf"),
55
- ),
56
- )
57
-
58
- def visualize(self):
59
- """Visualize the token and request buckets."""
60
- plot1 = self.requests_bucket.visualize()
61
- plot2 = self.tokens_bucket.visualize()
62
- return plot1, plot2
63
-
64
- def __repr__(self):
65
- return f"ModelBuckets(requests_bucket={self.requests_bucket}, tokens_bucket={self.tokens_bucket})"
1
+ # from edsl.jobs.buckets.TokenBucket import TokenBucket
2
+
3
+
4
+ class ModelBuckets:
5
+ """A class to represent the token and request buckets for a model.
6
+
7
+ Most LLM model services have limits both on requests-per-minute (RPM) and tokens-per-minute (TPM).
8
+ A request is one call to the service. The number of tokens required for a request depends on parameters.
9
+ """
10
+
11
+ def __init__(self, requests_bucket: "TokenBucket", tokens_bucket: "TokenBucket"):
12
+ """Initialize the model buckets.
13
+
14
+ The requests bucket captures requests per unit of time.
15
+ The tokens bucket captures the number of language model tokens.
16
+
17
+ """
18
+ self.requests_bucket = requests_bucket
19
+ self.tokens_bucket = tokens_bucket
20
+
21
+ def __add__(self, other: "ModelBuckets"):
22
+ """Combine two model buckets."""
23
+ return ModelBuckets(
24
+ requests_bucket=self.requests_bucket + other.requests_bucket,
25
+ tokens_bucket=self.tokens_bucket + other.tokens_bucket,
26
+ )
27
+
28
+ def turbo_mode_on(self):
29
+ """Set the refill rate to infinity for both buckets."""
30
+ self.requests_bucket.turbo_mode_on()
31
+ self.tokens_bucket.turbo_mode_on()
32
+
33
+ def turbo_mode_off(self):
34
+ """Restore the refill rate to its original value for both buckets."""
35
+ self.requests_bucket.turbo_mode_off()
36
+ self.tokens_bucket.turbo_mode_off()
37
+
38
+ @classmethod
39
+ def infinity_bucket(cls, model_name: str = "not_specified") -> "ModelBuckets":
40
+ """Create a bucket with infinite capacity and refill rate."""
41
+ from edsl.jobs.buckets.TokenBucket import TokenBucket
42
+
43
+ return cls(
44
+ requests_bucket=TokenBucket(
45
+ bucket_name=model_name,
46
+ bucket_type="requests",
47
+ capacity=float("inf"),
48
+ refill_rate=float("inf"),
49
+ ),
50
+ tokens_bucket=TokenBucket(
51
+ bucket_name=model_name,
52
+ bucket_type="tokens",
53
+ capacity=float("inf"),
54
+ refill_rate=float("inf"),
55
+ ),
56
+ )
57
+
58
+ def visualize(self):
59
+ """Visualize the token and request buckets."""
60
+ plot1 = self.requests_bucket.visualize()
61
+ plot2 = self.tokens_bucket.visualize()
62
+ return plot1, plot2
63
+
64
+ def __repr__(self):
65
+ return f"ModelBuckets(requests_bucket={self.requests_bucket}, tokens_bucket={self.tokens_bucket})"