edsl 0.1.47__py3-none-any.whl → 0.1.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. edsl/__init__.py +44 -39
  2. edsl/__version__.py +1 -1
  3. edsl/agents/__init__.py +4 -2
  4. edsl/agents/{Agent.py → agent.py} +442 -152
  5. edsl/agents/{AgentList.py → agent_list.py} +220 -162
  6. edsl/agents/descriptors.py +46 -7
  7. edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
  8. edsl/base/__init__.py +75 -0
  9. edsl/base/base_class.py +1303 -0
  10. edsl/base/data_transfer_models.py +114 -0
  11. edsl/base/enums.py +215 -0
  12. edsl/base.py +8 -0
  13. edsl/buckets/__init__.py +25 -0
  14. edsl/buckets/bucket_collection.py +324 -0
  15. edsl/buckets/model_buckets.py +206 -0
  16. edsl/buckets/token_bucket.py +502 -0
  17. edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
  18. edsl/buckets/token_bucket_client.py +509 -0
  19. edsl/caching/__init__.py +20 -0
  20. edsl/caching/cache.py +814 -0
  21. edsl/caching/cache_entry.py +427 -0
  22. edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
  23. edsl/caching/exceptions.py +24 -0
  24. edsl/caching/orm.py +30 -0
  25. edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
  26. edsl/caching/sql_dict.py +441 -0
  27. edsl/config/__init__.py +8 -0
  28. edsl/config/config_class.py +177 -0
  29. edsl/config.py +4 -176
  30. edsl/conversation/Conversation.py +7 -7
  31. edsl/conversation/car_buying.py +4 -4
  32. edsl/conversation/chips.py +6 -6
  33. edsl/coop/__init__.py +25 -2
  34. edsl/coop/coop.py +303 -67
  35. edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
  36. edsl/coop/exceptions.py +62 -0
  37. edsl/coop/price_fetcher.py +126 -0
  38. edsl/coop/utils.py +89 -24
  39. edsl/data_transfer_models.py +5 -72
  40. edsl/dataset/__init__.py +10 -0
  41. edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
  42. edsl/{results/DatasetExportMixin.py → dataset/dataset_operations_mixin.py} +606 -122
  43. edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
  44. edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
  45. edsl/{results → dataset/display}/table_renderers.py +58 -2
  46. edsl/{results → dataset}/file_exports.py +4 -5
  47. edsl/{results → dataset}/smart_objects.py +2 -2
  48. edsl/enums.py +5 -205
  49. edsl/inference_services/__init__.py +5 -0
  50. edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
  51. edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
  52. edsl/inference_services/data_structures.py +3 -2
  53. edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
  54. edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
  55. edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
  56. edsl/inference_services/registry.py +4 -41
  57. edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
  58. edsl/inference_services/services/__init__.py +31 -0
  59. edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
  60. edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
  61. edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
  62. edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
  63. edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
  64. edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
  65. edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
  66. edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
  67. edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
  68. edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
  69. edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +3 -7
  70. edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
  71. edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
  72. edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
  73. edsl/inference_services/write_available.py +1 -2
  74. edsl/instructions/__init__.py +6 -0
  75. edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
  76. edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
  77. edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
  78. edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
  79. edsl/interviews/__init__.py +4 -0
  80. edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
  81. edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
  82. edsl/interviews/interview.py +638 -0
  83. edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
  84. edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
  85. edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
  86. edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
  87. edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
  88. edsl/invigilators/__init__.py +38 -0
  89. edsl/invigilators/invigilator_base.py +477 -0
  90. edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
  91. edsl/invigilators/prompt_constructor.py +476 -0
  92. edsl/{agents → invigilators}/prompt_helpers.py +2 -1
  93. edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
  94. edsl/{agents → invigilators}/question_option_processor.py +96 -21
  95. edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
  96. edsl/jobs/__init__.py +7 -1
  97. edsl/jobs/async_interview_runner.py +99 -35
  98. edsl/jobs/check_survey_scenario_compatibility.py +7 -5
  99. edsl/jobs/data_structures.py +153 -22
  100. edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
  101. edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
  102. edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
  103. edsl/jobs/{Jobs.py → jobs.py} +313 -167
  104. edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
  105. edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +19 -17
  106. edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
  107. edsl/jobs/jobs_pricing_estimation.py +347 -0
  108. edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
  109. edsl/jobs/jobs_runner_asyncio.py +282 -0
  110. edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
  111. edsl/jobs/results_exceptions_handler.py +2 -2
  112. edsl/key_management/__init__.py +28 -0
  113. edsl/key_management/key_lookup.py +161 -0
  114. edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
  115. edsl/key_management/key_lookup_collection.py +82 -0
  116. edsl/key_management/models.py +218 -0
  117. edsl/language_models/__init__.py +7 -2
  118. edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
  119. edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
  120. edsl/language_models/language_model.py +1080 -0
  121. edsl/language_models/model.py +10 -25
  122. edsl/language_models/{ModelList.py → model_list.py} +9 -14
  123. edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
  124. edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
  125. edsl/language_models/repair.py +4 -4
  126. edsl/language_models/utilities.py +4 -4
  127. edsl/notebooks/__init__.py +3 -1
  128. edsl/notebooks/{Notebook.py → notebook.py} +7 -8
  129. edsl/prompts/__init__.py +1 -1
  130. edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
  131. edsl/prompts/{Prompt.py → prompt.py} +101 -95
  132. edsl/questions/HTMLQuestion.py +1 -1
  133. edsl/questions/__init__.py +154 -25
  134. edsl/questions/answer_validator_mixin.py +1 -1
  135. edsl/questions/compose_questions.py +4 -3
  136. edsl/questions/derived/question_likert_five.py +166 -0
  137. edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
  138. edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
  139. edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
  140. edsl/questions/descriptors.py +24 -30
  141. edsl/questions/loop_processor.py +65 -19
  142. edsl/questions/question_base.py +881 -0
  143. edsl/questions/question_base_gen_mixin.py +15 -16
  144. edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
  145. edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
  146. edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
  147. edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
  148. edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
  149. edsl/questions/question_free_text.py +282 -0
  150. edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
  151. edsl/questions/{QuestionList.py → question_list.py} +6 -7
  152. edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
  153. edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
  154. edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
  155. edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
  156. edsl/questions/question_registry.py +4 -9
  157. edsl/questions/register_questions_meta.py +8 -4
  158. edsl/questions/response_validator_abc.py +17 -16
  159. edsl/results/__init__.py +4 -1
  160. edsl/{exceptions/results.py → results/exceptions.py} +1 -1
  161. edsl/results/report.py +197 -0
  162. edsl/results/{Result.py → result.py} +131 -45
  163. edsl/results/{Results.py → results.py} +365 -220
  164. edsl/results/results_selector.py +344 -25
  165. edsl/scenarios/__init__.py +30 -3
  166. edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
  167. edsl/scenarios/directory_scanner.py +156 -13
  168. edsl/scenarios/document_chunker.py +186 -0
  169. edsl/scenarios/exceptions.py +101 -0
  170. edsl/scenarios/file_methods.py +2 -3
  171. edsl/scenarios/{FileStore.py → file_store.py} +275 -189
  172. edsl/scenarios/handlers/__init__.py +14 -14
  173. edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
  174. edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
  175. edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
  176. edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
  177. edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
  178. edsl/scenarios/handlers/latex_file_store.py +5 -0
  179. edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
  180. edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
  181. edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
  182. edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
  183. edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
  184. edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
  185. edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
  186. edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
  187. edsl/scenarios/scenario.py +928 -0
  188. edsl/scenarios/scenario_join.py +18 -5
  189. edsl/scenarios/{ScenarioList.py → scenario_list.py} +294 -106
  190. edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
  191. edsl/scenarios/scenario_selector.py +5 -1
  192. edsl/study/ObjectEntry.py +2 -2
  193. edsl/study/SnapShot.py +5 -5
  194. edsl/study/Study.py +18 -19
  195. edsl/study/__init__.py +6 -4
  196. edsl/surveys/__init__.py +7 -4
  197. edsl/surveys/dag/__init__.py +2 -0
  198. edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
  199. edsl/surveys/{DAG.py → dag/dag.py} +13 -10
  200. edsl/surveys/descriptors.py +1 -1
  201. edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
  202. edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
  203. edsl/surveys/memory/__init__.py +3 -0
  204. edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
  205. edsl/surveys/rules/__init__.py +3 -0
  206. edsl/surveys/{Rule.py → rules/rule.py} +103 -43
  207. edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
  208. edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
  209. edsl/surveys/survey.py +1743 -0
  210. edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
  211. edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
  212. edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
  213. edsl/tasks/__init__.py +32 -0
  214. edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
  215. edsl/tasks/task_creators.py +135 -0
  216. edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
  217. edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
  218. edsl/tasks/task_status_log.py +85 -0
  219. edsl/tokens/__init__.py +2 -0
  220. edsl/tokens/interview_token_usage.py +53 -0
  221. edsl/utilities/PrettyList.py +1 -1
  222. edsl/utilities/SystemInfo.py +25 -22
  223. edsl/utilities/__init__.py +29 -21
  224. edsl/utilities/gcp_bucket/__init__.py +2 -0
  225. edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
  226. edsl/utilities/interface.py +44 -536
  227. edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
  228. edsl/utilities/repair_functions.py +1 -1
  229. {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/METADATA +1 -1
  230. edsl-0.1.48.dist-info/RECORD +347 -0
  231. edsl/Base.py +0 -493
  232. edsl/BaseDiff.py +0 -260
  233. edsl/agents/InvigilatorBase.py +0 -260
  234. edsl/agents/PromptConstructor.py +0 -318
  235. edsl/coop/PriceFetcher.py +0 -54
  236. edsl/data/Cache.py +0 -582
  237. edsl/data/CacheEntry.py +0 -238
  238. edsl/data/SQLiteDict.py +0 -292
  239. edsl/data/__init__.py +0 -5
  240. edsl/data/orm.py +0 -10
  241. edsl/exceptions/cache.py +0 -5
  242. edsl/exceptions/coop.py +0 -14
  243. edsl/exceptions/data.py +0 -14
  244. edsl/exceptions/scenarios.py +0 -29
  245. edsl/jobs/Answers.py +0 -43
  246. edsl/jobs/JobsPrompts.py +0 -354
  247. edsl/jobs/buckets/BucketCollection.py +0 -134
  248. edsl/jobs/buckets/ModelBuckets.py +0 -65
  249. edsl/jobs/buckets/TokenBucket.py +0 -283
  250. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  251. edsl/jobs/interviews/Interview.py +0 -395
  252. edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
  253. edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
  254. edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
  255. edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
  256. edsl/jobs/tasks/TaskCreators.py +0 -64
  257. edsl/jobs/tasks/TaskStatusLog.py +0 -23
  258. edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
  259. edsl/language_models/LanguageModel.py +0 -635
  260. edsl/language_models/ServiceDataSources.py +0 -0
  261. edsl/language_models/key_management/KeyLookup.py +0 -63
  262. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  263. edsl/language_models/key_management/models.py +0 -137
  264. edsl/questions/QuestionBase.py +0 -544
  265. edsl/questions/QuestionFreeText.py +0 -130
  266. edsl/questions/derived/QuestionLikertFive.py +0 -76
  267. edsl/results/ResultsExportMixin.py +0 -45
  268. edsl/results/TextEditor.py +0 -50
  269. edsl/results/results_fetch_mixin.py +0 -33
  270. edsl/results/results_tools_mixin.py +0 -98
  271. edsl/scenarios/DocumentChunker.py +0 -104
  272. edsl/scenarios/Scenario.py +0 -548
  273. edsl/scenarios/ScenarioHtmlMixin.py +0 -65
  274. edsl/scenarios/ScenarioListExportMixin.py +0 -45
  275. edsl/scenarios/handlers/latex.py +0 -5
  276. edsl/shared.py +0 -1
  277. edsl/surveys/Survey.py +0 -1301
  278. edsl/surveys/SurveyQualtricsImport.py +0 -284
  279. edsl/surveys/SurveyToApp.py +0 -141
  280. edsl/surveys/instructions/__init__.py +0 -0
  281. edsl/tools/__init__.py +0 -1
  282. edsl/tools/clusters.py +0 -192
  283. edsl/tools/embeddings.py +0 -27
  284. edsl/tools/embeddings_plotting.py +0 -118
  285. edsl/tools/plotting.py +0 -112
  286. edsl/tools/summarize.py +0 -18
  287. edsl/utilities/data/Registry.py +0 -6
  288. edsl/utilities/data/__init__.py +0 -1
  289. edsl/utilities/data/scooter_results.json +0 -1
  290. edsl-0.1.47.dist-info/RECORD +0 -354
  291. /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
  292. /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
  293. /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
  294. /edsl/{results → dataset/display}/table_data_class.py +0 -0
  295. /edsl/{results → dataset/display}/table_display.css +0 -0
  296. /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
  297. /edsl/{results → dataset}/tree_explore.py +0 -0
  298. /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
  299. /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
  300. /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
  301. /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
  302. /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
  303. /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
  304. /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
  305. /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
  306. /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
  307. /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
  308. /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
  309. /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
  310. /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
  311. /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
  312. /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
  313. {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
  314. {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
edsl/jobs/JobsPrompts.py DELETED
@@ -1,354 +0,0 @@
1
- import time
2
- import logging
3
- from typing import List, TYPE_CHECKING
4
-
5
- from edsl.results.Dataset import Dataset
6
-
7
- if TYPE_CHECKING:
8
- from edsl.jobs import Jobs
9
-
10
- # from edsl.jobs.interviews.Interview import Interview
11
- # from edsl.results.Dataset import Dataset
12
- # from edsl.agents.AgentList import AgentList
13
- # from edsl.scenarios.ScenarioList import ScenarioList
14
- # from edsl.surveys.Survey import Survey
15
-
16
- from edsl.jobs.FetchInvigilator import FetchInvigilator
17
- from edsl.data.CacheEntry import CacheEntry
18
-
19
- logger = logging.getLogger(__name__)
20
-
21
-
22
- class JobsPrompts:
23
- def __init__(self, jobs: "Jobs"):
24
- self.interviews = jobs.interviews()
25
- self.agents = jobs.agents
26
- self.scenarios = jobs.scenarios
27
- self.survey = jobs.survey
28
- self._price_lookup = None
29
- self._agent_lookup = {agent: idx for idx, agent in enumerate(self.agents)}
30
- self._scenario_lookup = {
31
- scenario: idx for idx, scenario in enumerate(self.scenarios)
32
- }
33
-
34
- @property
35
- def price_lookup(self):
36
- if self._price_lookup is None:
37
- from edsl.coop.coop import Coop
38
-
39
- c = Coop()
40
- self._price_lookup = c.fetch_prices()
41
- return self._price_lookup
42
-
43
- def prompts(self, iterations=1) -> "Dataset":
44
- """Return a Dataset of prompts that will be used.
45
-
46
- >>> from edsl.jobs import Jobs
47
- >>> Jobs.example().prompts()
48
- Dataset(...)
49
- """
50
- interviews = self.interviews
51
- interview_indices = []
52
- question_names = []
53
- user_prompts = []
54
- system_prompts = []
55
- scenario_indices = []
56
- agent_indices = []
57
- models = []
58
- costs = []
59
- cache_keys = []
60
-
61
- for interview_index, interview in enumerate(interviews):
62
- logger.info(f"Processing interview {interview_index} of {len(interviews)}")
63
- interview_start = time.time()
64
-
65
- # Fetch invigilators timing
66
- invig_start = time.time()
67
- invigilators = [
68
- FetchInvigilator(interview)(question)
69
- for question in interview.survey.questions
70
- ]
71
- invig_end = time.time()
72
- logger.debug(
73
- f"Time taken to fetch invigilators: {invig_end - invig_start:.4f}s"
74
- )
75
-
76
- # Process prompts timing
77
- prompts_start = time.time()
78
- for _, invigilator in enumerate(invigilators):
79
- # Get prompts timing
80
- get_prompts_start = time.time()
81
- prompts = invigilator.get_prompts()
82
- get_prompts_end = time.time()
83
- logger.debug(
84
- f"Time taken to get prompts: {get_prompts_end - get_prompts_start:.4f}s"
85
- )
86
-
87
- user_prompt = prompts["user_prompt"]
88
- system_prompt = prompts["system_prompt"]
89
- user_prompts.append(user_prompt)
90
- system_prompts.append(system_prompt)
91
-
92
- # Index lookups timing
93
- index_start = time.time()
94
- agent_index = self._agent_lookup[invigilator.agent]
95
- agent_indices.append(agent_index)
96
- interview_indices.append(interview_index)
97
- scenario_index = self._scenario_lookup[invigilator.scenario]
98
- scenario_indices.append(scenario_index)
99
- index_end = time.time()
100
- logger.debug(
101
- f"Time taken for index lookups: {index_end - index_start:.4f}s"
102
- )
103
-
104
- # Model and question name assignment timing
105
- assign_start = time.time()
106
- models.append(invigilator.model.model)
107
- question_names.append(invigilator.question.question_name)
108
- assign_end = time.time()
109
- logger.debug(
110
- f"Time taken for assignments: {assign_end - assign_start:.4f}s"
111
- )
112
-
113
- # Cost estimation timing
114
- cost_start = time.time()
115
- prompt_cost = self.estimate_prompt_cost(
116
- system_prompt=system_prompt,
117
- user_prompt=user_prompt,
118
- price_lookup=self.price_lookup,
119
- inference_service=invigilator.model._inference_service_,
120
- model=invigilator.model.model,
121
- )
122
- cost_end = time.time()
123
- logger.debug(
124
- f"Time taken to estimate prompt cost: {cost_end - cost_start:.4f}s"
125
- )
126
- costs.append(prompt_cost["cost_usd"])
127
-
128
- # Cache key generation timing
129
- cache_key_gen_start = time.time()
130
- for iteration in range(iterations):
131
- cache_key = CacheEntry.gen_key(
132
- model=invigilator.model.model,
133
- parameters=invigilator.model.parameters,
134
- system_prompt=system_prompt,
135
- user_prompt=user_prompt,
136
- iteration=iteration,
137
- )
138
- cache_keys.append(cache_key)
139
-
140
- cache_key_gen_end = time.time()
141
- logger.debug(
142
- f"Time taken to generate cache key: {cache_key_gen_end - cache_key_gen_start:.4f}s"
143
- )
144
- logger.debug("-" * 50) # Separator between iterations
145
-
146
- prompts_end = time.time()
147
- logger.info(
148
- f"Time taken to process prompts: {prompts_end - prompts_start:.4f}s"
149
- )
150
-
151
- interview_end = time.time()
152
- logger.info(
153
- f"Overall time taken for interview: {interview_end - interview_start:.4f}s"
154
- )
155
- logger.info("Time breakdown:")
156
- logger.info(f" Invigilators: {invig_end - invig_start:.4f}s")
157
- logger.info(f" Prompts processing: {prompts_end - prompts_start:.4f}s")
158
- logger.info(
159
- f" Other overhead: {(interview_end - interview_start) - ((invig_end - invig_start) + (prompts_end - prompts_start)):.4f}s"
160
- )
161
-
162
- d = Dataset(
163
- [
164
- {"user_prompt": user_prompts},
165
- {"system_prompt": system_prompts},
166
- {"interview_index": interview_indices},
167
- {"question_name": question_names},
168
- {"scenario_index": scenario_indices},
169
- {"agent_index": agent_indices},
170
- {"model": models},
171
- {"estimated_cost": costs},
172
- {"cache_key": cache_keys},
173
- ]
174
- )
175
- return d
176
-
177
- @staticmethod
178
- def estimate_prompt_cost(
179
- system_prompt: str,
180
- user_prompt: str,
181
- price_lookup: dict,
182
- inference_service: str,
183
- model: str,
184
- ) -> dict:
185
- """Estimates the cost of a prompt. Takes piping into account."""
186
- import math
187
-
188
- def get_piping_multiplier(prompt: str):
189
- """Returns 2 if a prompt includes Jinja braces, and 1 otherwise."""
190
-
191
- if "{{" in prompt and "}}" in prompt:
192
- return 2
193
- return 1
194
-
195
- # Look up prices per token
196
- key = (inference_service, model)
197
-
198
- try:
199
- relevant_prices = price_lookup[key]
200
-
201
- service_input_token_price = float(
202
- relevant_prices["input"]["service_stated_token_price"]
203
- )
204
- service_input_token_qty = float(
205
- relevant_prices["input"]["service_stated_token_qty"]
206
- )
207
- input_price_per_token = service_input_token_price / service_input_token_qty
208
-
209
- service_output_token_price = float(
210
- relevant_prices["output"]["service_stated_token_price"]
211
- )
212
- service_output_token_qty = float(
213
- relevant_prices["output"]["service_stated_token_qty"]
214
- )
215
- output_price_per_token = (
216
- service_output_token_price / service_output_token_qty
217
- )
218
-
219
- except KeyError:
220
- # A KeyError is likely to occur if we cannot retrieve prices (the price_lookup dict is empty)
221
- # Use a sensible default
222
-
223
- import warnings
224
-
225
- warnings.warn(
226
- "Price data could not be retrieved. Using default estimates for input and output token prices. Input: $1.00 / 1M tokens; Output: $1.00 / 1M tokens"
227
- )
228
- input_price_per_token = 0.000001 # $1.00 / 1M tokens
229
- output_price_per_token = 0.000001 # $1.00 / 1M tokens
230
-
231
- # Compute the number of characters (double if the question involves piping)
232
- user_prompt_chars = len(str(user_prompt)) * get_piping_multiplier(
233
- str(user_prompt)
234
- )
235
- system_prompt_chars = len(str(system_prompt)) * get_piping_multiplier(
236
- str(system_prompt)
237
- )
238
-
239
- # Convert into tokens (1 token approx. equals 4 characters)
240
- input_tokens = (user_prompt_chars + system_prompt_chars) // 4
241
-
242
- output_tokens = math.ceil(0.75 * input_tokens)
243
-
244
- cost = (
245
- input_tokens * input_price_per_token
246
- + output_tokens * output_price_per_token
247
- )
248
-
249
- return {
250
- "input_tokens": input_tokens,
251
- "output_tokens": output_tokens,
252
- "cost_usd": cost,
253
- }
254
-
255
- def estimate_job_cost_from_external_prices(
256
- self, price_lookup: dict, iterations: int = 1
257
- ) -> dict:
258
- """
259
- Estimates the cost of a job according to the following assumptions:
260
-
261
- - 1 token = 4 characters.
262
- - For each prompt, output tokens = input tokens * 0.75, rounded up to the nearest integer.
263
-
264
- price_lookup is an external pricing dictionary.
265
- """
266
-
267
- import pandas as pd
268
-
269
- interviews = self.interviews
270
- data = []
271
- for interview in interviews:
272
- invigilators = [
273
- FetchInvigilator(interview)(question)
274
- for question in self.survey.questions
275
- ]
276
- for invigilator in invigilators:
277
- prompts = invigilator.get_prompts()
278
-
279
- # By this point, agent and scenario data has already been added to the prompts
280
- user_prompt = prompts["user_prompt"]
281
- system_prompt = prompts["system_prompt"]
282
- inference_service = invigilator.model._inference_service_
283
- model = invigilator.model.model
284
-
285
- prompt_cost = self.estimate_prompt_cost(
286
- system_prompt=system_prompt,
287
- user_prompt=user_prompt,
288
- price_lookup=price_lookup,
289
- inference_service=inference_service,
290
- model=model,
291
- )
292
-
293
- data.append(
294
- {
295
- "user_prompt": user_prompt,
296
- "system_prompt": system_prompt,
297
- "estimated_input_tokens": prompt_cost["input_tokens"],
298
- "estimated_output_tokens": prompt_cost["output_tokens"],
299
- "estimated_cost_usd": prompt_cost["cost_usd"],
300
- "inference_service": inference_service,
301
- "model": model,
302
- }
303
- )
304
-
305
- df = pd.DataFrame.from_records(data)
306
-
307
- df = (
308
- df.groupby(["inference_service", "model"])
309
- .agg(
310
- {
311
- "estimated_cost_usd": "sum",
312
- "estimated_input_tokens": "sum",
313
- "estimated_output_tokens": "sum",
314
- }
315
- )
316
- .reset_index()
317
- )
318
- df["estimated_cost_usd"] = df["estimated_cost_usd"] * iterations
319
- df["estimated_input_tokens"] = df["estimated_input_tokens"] * iterations
320
- df["estimated_output_tokens"] = df["estimated_output_tokens"] * iterations
321
-
322
- estimated_costs_by_model = df.to_dict("records")
323
-
324
- estimated_total_cost = sum(
325
- model["estimated_cost_usd"] for model in estimated_costs_by_model
326
- )
327
- estimated_total_input_tokens = sum(
328
- model["estimated_input_tokens"] for model in estimated_costs_by_model
329
- )
330
- estimated_total_output_tokens = sum(
331
- model["estimated_output_tokens"] for model in estimated_costs_by_model
332
- )
333
-
334
- output = {
335
- "estimated_total_cost_usd": estimated_total_cost,
336
- "estimated_total_input_tokens": estimated_total_input_tokens,
337
- "estimated_total_output_tokens": estimated_total_output_tokens,
338
- "model_costs": estimated_costs_by_model,
339
- }
340
-
341
- return output
342
-
343
- def estimate_job_cost(self, iterations: int = 1) -> dict:
344
- """
345
- Estimates the cost of a job according to the following assumptions:
346
-
347
- - 1 token = 4 characters.
348
- - For each prompt, output tokens = input tokens * 0.75, rounded up to the nearest integer.
349
-
350
- Fetches prices from Coop.
351
- """
352
- return self.estimate_job_cost_from_external_prices(
353
- price_lookup=self.price_lookup, iterations=iterations
354
- )
@@ -1,134 +0,0 @@
1
- from typing import Optional
2
- from collections import UserDict
3
- from edsl.jobs.buckets.TokenBucket import TokenBucket
4
- from edsl.jobs.buckets.ModelBuckets import ModelBuckets
5
-
6
- # from functools import wraps
7
- from threading import RLock
8
-
9
- from edsl.jobs.decorators import synchronized_class
10
-
11
-
12
- @synchronized_class
13
- class BucketCollection(UserDict):
14
- """A Jobs object will have a whole collection of model buckets, as multiple models could be used.
15
-
16
- The keys here are the models, and the values are the ModelBuckets objects.
17
- Models themselves are hashable, so this works.
18
- """
19
-
20
- def __init__(self, infinity_buckets: bool = False):
21
- """Create a new BucketCollection.
22
- An infinity bucket is a bucket that never runs out of tokens or requests.
23
- """
24
- super().__init__()
25
- self.infinity_buckets = infinity_buckets
26
- self.models_to_services = {}
27
- self.services_to_buckets = {}
28
- self._lock = RLock()
29
-
30
- from edsl.config import CONFIG
31
- import os
32
-
33
- url = os.environ.get("EDSL_REMOTE_TOKEN_BUCKET_URL", None)
34
-
35
- if url == "None" or url is None:
36
- self.remote_url = None
37
- # print(f"Using remote token bucket URL: {url}")
38
- else:
39
- self.remote_url = url
40
-
41
- @classmethod
42
- def from_models(
43
- cls, models_list: list, infinity_buckets: bool = False
44
- ) -> "BucketCollection":
45
- """Create a BucketCollection from a list of models."""
46
- bucket_collection = cls(infinity_buckets=infinity_buckets)
47
- for model in models_list:
48
- bucket_collection.add_model(model)
49
- return bucket_collection
50
-
51
- def get_tokens(
52
- self, model: "LanguageModel", bucket_type: str, num_tokens: int
53
- ) -> int:
54
- """Get the number of tokens remaining in the bucket."""
55
- relevant_bucket = getattr(self[model], bucket_type)
56
- return relevant_bucket.get_tokens(num_tokens)
57
-
58
- def __repr__(self):
59
- return f"BucketCollection({self.data})"
60
-
61
- def add_model(self, model: "LanguageModel") -> None:
62
- """Adds a model to the bucket collection.
63
-
64
- This will create the token and request buckets for the model."""
65
-
66
- # compute the TPS and RPS from the model
67
- if not self.infinity_buckets:
68
- TPS = model.tpm / 60.0
69
- RPS = model.rpm / 60.0
70
- else:
71
- TPS = float("inf")
72
- RPS = float("inf")
73
-
74
- if model.model not in self.models_to_services:
75
- service = model._inference_service_
76
- if service not in self.services_to_buckets:
77
- requests_bucket = TokenBucket(
78
- bucket_name=service,
79
- bucket_type="requests",
80
- capacity=RPS,
81
- refill_rate=RPS,
82
- remote_url=self.remote_url,
83
- )
84
- tokens_bucket = TokenBucket(
85
- bucket_name=service,
86
- bucket_type="tokens",
87
- capacity=TPS,
88
- refill_rate=TPS,
89
- remote_url=self.remote_url,
90
- )
91
- self.services_to_buckets[service] = ModelBuckets(
92
- requests_bucket, tokens_bucket
93
- )
94
- self.models_to_services[model.model] = service
95
- self[model] = self.services_to_buckets[service]
96
- else:
97
- self[model] = self.services_to_buckets[self.models_to_services[model.model]]
98
-
99
- def update_from_key_lookup(self, key_lookup: "KeyLookup") -> None:
100
- """Updates the bucket collection rates based on model RPM/TPM from KeyLookup"""
101
-
102
- for model_name, service in self.models_to_services.items():
103
- if service in key_lookup and not self.infinity_buckets:
104
-
105
- if key_lookup[service].rpm is not None:
106
- new_rps = key_lookup[service].rpm / 60.0
107
- new_requests_bucket = TokenBucket(
108
- bucket_name=service,
109
- bucket_type="requests",
110
- capacity=new_rps,
111
- refill_rate=new_rps,
112
- remote_url=self.remote_url,
113
- )
114
- self.services_to_buckets[service].requests_bucket = (
115
- new_requests_bucket
116
- )
117
-
118
- if key_lookup[service].tpm is not None:
119
- new_tps = key_lookup[service].tpm / 60.0
120
- new_tokens_bucket = TokenBucket(
121
- bucket_name=service,
122
- bucket_type="tokens",
123
- capacity=new_tps,
124
- refill_rate=new_tps,
125
- remote_url=self.remote_url,
126
- )
127
- self.services_to_buckets[service].tokens_bucket = new_tokens_bucket
128
-
129
- def visualize(self) -> dict:
130
- """Visualize the token and request buckets for each model."""
131
- plots = {}
132
- for model in self:
133
- plots[model] = self[model].visualize()
134
- return plots
@@ -1,65 +0,0 @@
1
- # from edsl.jobs.buckets.TokenBucket import TokenBucket
2
-
3
-
4
- class ModelBuckets:
5
- """A class to represent the token and request buckets for a model.
6
-
7
- Most LLM model services have limits both on requests-per-minute (RPM) and tokens-per-minute (TPM).
8
- A request is one call to the service. The number of tokens required for a request depends on parameters.
9
- """
10
-
11
- def __init__(self, requests_bucket: "TokenBucket", tokens_bucket: "TokenBucket"):
12
- """Initialize the model buckets.
13
-
14
- The requests bucket captures requests per unit of time.
15
- The tokens bucket captures the number of language model tokens.
16
-
17
- """
18
- self.requests_bucket = requests_bucket
19
- self.tokens_bucket = tokens_bucket
20
-
21
- def __add__(self, other: "ModelBuckets"):
22
- """Combine two model buckets."""
23
- return ModelBuckets(
24
- requests_bucket=self.requests_bucket + other.requests_bucket,
25
- tokens_bucket=self.tokens_bucket + other.tokens_bucket,
26
- )
27
-
28
- def turbo_mode_on(self):
29
- """Set the refill rate to infinity for both buckets."""
30
- self.requests_bucket.turbo_mode_on()
31
- self.tokens_bucket.turbo_mode_on()
32
-
33
- def turbo_mode_off(self):
34
- """Restore the refill rate to its original value for both buckets."""
35
- self.requests_bucket.turbo_mode_off()
36
- self.tokens_bucket.turbo_mode_off()
37
-
38
- @classmethod
39
- def infinity_bucket(cls, model_name: str = "not_specified") -> "ModelBuckets":
40
- """Create a bucket with infinite capacity and refill rate."""
41
- from edsl.jobs.buckets.TokenBucket import TokenBucket
42
-
43
- return cls(
44
- requests_bucket=TokenBucket(
45
- bucket_name=model_name,
46
- bucket_type="requests",
47
- capacity=float("inf"),
48
- refill_rate=float("inf"),
49
- ),
50
- tokens_bucket=TokenBucket(
51
- bucket_name=model_name,
52
- bucket_type="tokens",
53
- capacity=float("inf"),
54
- refill_rate=float("inf"),
55
- ),
56
- )
57
-
58
- def visualize(self):
59
- """Visualize the token and request buckets."""
60
- plot1 = self.requests_bucket.visualize()
61
- plot2 = self.tokens_bucket.visualize()
62
- return plot1, plot2
63
-
64
- def __repr__(self):
65
- return f"ModelBuckets(requests_bucket={self.requests_bucket}, tokens_bucket={self.tokens_bucket})"