edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. edsl/__init__.py +44 -39
  2. edsl/__version__.py +1 -1
  3. edsl/agents/__init__.py +4 -2
  4. edsl/agents/{Agent.py → agent.py} +442 -152
  5. edsl/agents/{AgentList.py → agent_list.py} +220 -162
  6. edsl/agents/descriptors.py +46 -7
  7. edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
  8. edsl/base/__init__.py +75 -0
  9. edsl/base/base_class.py +1303 -0
  10. edsl/base/data_transfer_models.py +114 -0
  11. edsl/base/enums.py +215 -0
  12. edsl/base.py +8 -0
  13. edsl/buckets/__init__.py +25 -0
  14. edsl/buckets/bucket_collection.py +324 -0
  15. edsl/buckets/model_buckets.py +206 -0
  16. edsl/buckets/token_bucket.py +502 -0
  17. edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
  18. edsl/buckets/token_bucket_client.py +509 -0
  19. edsl/caching/__init__.py +20 -0
  20. edsl/caching/cache.py +814 -0
  21. edsl/caching/cache_entry.py +427 -0
  22. edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
  23. edsl/caching/exceptions.py +24 -0
  24. edsl/caching/orm.py +30 -0
  25. edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
  26. edsl/caching/sql_dict.py +441 -0
  27. edsl/config/__init__.py +8 -0
  28. edsl/config/config_class.py +177 -0
  29. edsl/config.py +4 -176
  30. edsl/conversation/Conversation.py +7 -7
  31. edsl/conversation/car_buying.py +4 -4
  32. edsl/conversation/chips.py +6 -6
  33. edsl/coop/__init__.py +25 -2
  34. edsl/coop/coop.py +430 -113
  35. edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
  36. edsl/coop/exceptions.py +62 -0
  37. edsl/coop/price_fetcher.py +126 -0
  38. edsl/coop/utils.py +89 -24
  39. edsl/data_transfer_models.py +5 -72
  40. edsl/dataset/__init__.py +10 -0
  41. edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
  42. edsl/dataset/dataset_operations_mixin.py +1492 -0
  43. edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
  44. edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
  45. edsl/{results → dataset/display}/table_renderers.py +58 -2
  46. edsl/{results → dataset}/file_exports.py +4 -5
  47. edsl/{results → dataset}/smart_objects.py +2 -2
  48. edsl/enums.py +5 -205
  49. edsl/inference_services/__init__.py +5 -0
  50. edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
  51. edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
  52. edsl/inference_services/data_structures.py +3 -2
  53. edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
  54. edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
  55. edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
  56. edsl/inference_services/registry.py +4 -41
  57. edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
  58. edsl/inference_services/services/__init__.py +31 -0
  59. edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
  60. edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
  61. edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
  62. edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
  63. edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
  64. edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
  65. edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
  66. edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
  67. edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
  68. edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
  69. edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
  70. edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
  71. edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
  72. edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
  73. edsl/inference_services/write_available.py +1 -2
  74. edsl/instructions/__init__.py +6 -0
  75. edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
  76. edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
  77. edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
  78. edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
  79. edsl/interviews/__init__.py +4 -0
  80. edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
  81. edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
  82. edsl/interviews/interview.py +638 -0
  83. edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
  84. edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
  85. edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
  86. edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
  87. edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
  88. edsl/invigilators/__init__.py +38 -0
  89. edsl/invigilators/invigilator_base.py +477 -0
  90. edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
  91. edsl/invigilators/prompt_constructor.py +476 -0
  92. edsl/{agents → invigilators}/prompt_helpers.py +2 -1
  93. edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
  94. edsl/{agents → invigilators}/question_option_processor.py +96 -21
  95. edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
  96. edsl/jobs/__init__.py +7 -1
  97. edsl/jobs/async_interview_runner.py +99 -35
  98. edsl/jobs/check_survey_scenario_compatibility.py +7 -5
  99. edsl/jobs/data_structures.py +153 -22
  100. edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
  101. edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
  102. edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
  103. edsl/jobs/{Jobs.py → jobs.py} +321 -155
  104. edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
  105. edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
  106. edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
  107. edsl/jobs/jobs_pricing_estimation.py +347 -0
  108. edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
  109. edsl/jobs/jobs_runner_asyncio.py +282 -0
  110. edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
  111. edsl/jobs/results_exceptions_handler.py +2 -2
  112. edsl/key_management/__init__.py +28 -0
  113. edsl/key_management/key_lookup.py +161 -0
  114. edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
  115. edsl/key_management/key_lookup_collection.py +82 -0
  116. edsl/key_management/models.py +218 -0
  117. edsl/language_models/__init__.py +7 -2
  118. edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
  119. edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
  120. edsl/language_models/language_model.py +1080 -0
  121. edsl/language_models/model.py +10 -25
  122. edsl/language_models/{ModelList.py → model_list.py} +9 -14
  123. edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
  124. edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
  125. edsl/language_models/repair.py +4 -4
  126. edsl/language_models/utilities.py +4 -4
  127. edsl/notebooks/__init__.py +3 -1
  128. edsl/notebooks/{Notebook.py → notebook.py} +7 -8
  129. edsl/prompts/__init__.py +1 -1
  130. edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
  131. edsl/prompts/{Prompt.py → prompt.py} +101 -95
  132. edsl/questions/HTMLQuestion.py +1 -1
  133. edsl/questions/__init__.py +154 -25
  134. edsl/questions/answer_validator_mixin.py +1 -1
  135. edsl/questions/compose_questions.py +4 -3
  136. edsl/questions/derived/question_likert_five.py +166 -0
  137. edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
  138. edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
  139. edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
  140. edsl/questions/descriptors.py +24 -30
  141. edsl/questions/loop_processor.py +65 -19
  142. edsl/questions/question_base.py +881 -0
  143. edsl/questions/question_base_gen_mixin.py +15 -16
  144. edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
  145. edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
  146. edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
  147. edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
  148. edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
  149. edsl/questions/question_free_text.py +282 -0
  150. edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
  151. edsl/questions/{QuestionList.py → question_list.py} +6 -7
  152. edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
  153. edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
  154. edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
  155. edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
  156. edsl/questions/question_registry.py +10 -16
  157. edsl/questions/register_questions_meta.py +8 -4
  158. edsl/questions/response_validator_abc.py +17 -16
  159. edsl/results/__init__.py +4 -1
  160. edsl/{exceptions/results.py → results/exceptions.py} +1 -1
  161. edsl/results/report.py +197 -0
  162. edsl/results/{Result.py → result.py} +131 -45
  163. edsl/results/{Results.py → results.py} +420 -216
  164. edsl/results/results_selector.py +344 -25
  165. edsl/scenarios/__init__.py +30 -3
  166. edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
  167. edsl/scenarios/directory_scanner.py +156 -13
  168. edsl/scenarios/document_chunker.py +186 -0
  169. edsl/scenarios/exceptions.py +101 -0
  170. edsl/scenarios/file_methods.py +2 -3
  171. edsl/scenarios/file_store.py +755 -0
  172. edsl/scenarios/handlers/__init__.py +14 -14
  173. edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
  174. edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
  175. edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
  176. edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
  177. edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
  178. edsl/scenarios/handlers/latex_file_store.py +5 -0
  179. edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
  180. edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
  181. edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
  182. edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
  183. edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
  184. edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
  185. edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
  186. edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
  187. edsl/scenarios/scenario.py +928 -0
  188. edsl/scenarios/scenario_join.py +18 -5
  189. edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
  190. edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
  191. edsl/scenarios/scenario_selector.py +5 -1
  192. edsl/study/ObjectEntry.py +2 -2
  193. edsl/study/SnapShot.py +5 -5
  194. edsl/study/Study.py +20 -21
  195. edsl/study/__init__.py +6 -4
  196. edsl/surveys/__init__.py +7 -4
  197. edsl/surveys/dag/__init__.py +2 -0
  198. edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
  199. edsl/surveys/{DAG.py → dag/dag.py} +13 -10
  200. edsl/surveys/descriptors.py +1 -1
  201. edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
  202. edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
  203. edsl/surveys/memory/__init__.py +3 -0
  204. edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
  205. edsl/surveys/rules/__init__.py +3 -0
  206. edsl/surveys/{Rule.py → rules/rule.py} +103 -43
  207. edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
  208. edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
  209. edsl/surveys/survey.py +1743 -0
  210. edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
  211. edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
  212. edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
  213. edsl/tasks/__init__.py +32 -0
  214. edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
  215. edsl/tasks/task_creators.py +135 -0
  216. edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
  217. edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
  218. edsl/tasks/task_status_log.py +85 -0
  219. edsl/tokens/__init__.py +2 -0
  220. edsl/tokens/interview_token_usage.py +53 -0
  221. edsl/utilities/PrettyList.py +1 -1
  222. edsl/utilities/SystemInfo.py +25 -22
  223. edsl/utilities/__init__.py +29 -21
  224. edsl/utilities/gcp_bucket/__init__.py +2 -0
  225. edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
  226. edsl/utilities/interface.py +44 -536
  227. edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
  228. edsl/utilities/repair_functions.py +1 -1
  229. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
  230. edsl-0.1.48.dist-info/RECORD +347 -0
  231. edsl/Base.py +0 -426
  232. edsl/BaseDiff.py +0 -260
  233. edsl/agents/InvigilatorBase.py +0 -260
  234. edsl/agents/PromptConstructor.py +0 -318
  235. edsl/auto/AutoStudy.py +0 -130
  236. edsl/auto/StageBase.py +0 -243
  237. edsl/auto/StageGenerateSurvey.py +0 -178
  238. edsl/auto/StageLabelQuestions.py +0 -125
  239. edsl/auto/StagePersona.py +0 -61
  240. edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
  241. edsl/auto/StagePersonaDimensionValues.py +0 -74
  242. edsl/auto/StagePersonaDimensions.py +0 -69
  243. edsl/auto/StageQuestions.py +0 -74
  244. edsl/auto/SurveyCreatorPipeline.py +0 -21
  245. edsl/auto/utilities.py +0 -218
  246. edsl/base/Base.py +0 -279
  247. edsl/coop/PriceFetcher.py +0 -54
  248. edsl/data/Cache.py +0 -580
  249. edsl/data/CacheEntry.py +0 -230
  250. edsl/data/SQLiteDict.py +0 -292
  251. edsl/data/__init__.py +0 -5
  252. edsl/data/orm.py +0 -10
  253. edsl/exceptions/cache.py +0 -5
  254. edsl/exceptions/coop.py +0 -14
  255. edsl/exceptions/data.py +0 -14
  256. edsl/exceptions/scenarios.py +0 -29
  257. edsl/jobs/Answers.py +0 -43
  258. edsl/jobs/JobsPrompts.py +0 -354
  259. edsl/jobs/buckets/BucketCollection.py +0 -134
  260. edsl/jobs/buckets/ModelBuckets.py +0 -65
  261. edsl/jobs/buckets/TokenBucket.py +0 -283
  262. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  263. edsl/jobs/interviews/Interview.py +0 -395
  264. edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
  265. edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
  266. edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
  267. edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
  268. edsl/jobs/tasks/TaskCreators.py +0 -64
  269. edsl/jobs/tasks/TaskStatusLog.py +0 -23
  270. edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
  271. edsl/language_models/LanguageModel.py +0 -635
  272. edsl/language_models/ServiceDataSources.py +0 -0
  273. edsl/language_models/key_management/KeyLookup.py +0 -63
  274. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  275. edsl/language_models/key_management/models.py +0 -137
  276. edsl/questions/QuestionBase.py +0 -539
  277. edsl/questions/QuestionFreeText.py +0 -130
  278. edsl/questions/derived/QuestionLikertFive.py +0 -76
  279. edsl/results/DatasetExportMixin.py +0 -911
  280. edsl/results/ResultsExportMixin.py +0 -45
  281. edsl/results/TextEditor.py +0 -50
  282. edsl/results/results_fetch_mixin.py +0 -33
  283. edsl/results/results_tools_mixin.py +0 -98
  284. edsl/scenarios/DocumentChunker.py +0 -104
  285. edsl/scenarios/FileStore.py +0 -564
  286. edsl/scenarios/Scenario.py +0 -548
  287. edsl/scenarios/ScenarioHtmlMixin.py +0 -65
  288. edsl/scenarios/ScenarioListExportMixin.py +0 -45
  289. edsl/scenarios/handlers/latex.py +0 -5
  290. edsl/shared.py +0 -1
  291. edsl/surveys/Survey.py +0 -1306
  292. edsl/surveys/SurveyQualtricsImport.py +0 -284
  293. edsl/surveys/SurveyToApp.py +0 -141
  294. edsl/surveys/instructions/__init__.py +0 -0
  295. edsl/tools/__init__.py +0 -1
  296. edsl/tools/clusters.py +0 -192
  297. edsl/tools/embeddings.py +0 -27
  298. edsl/tools/embeddings_plotting.py +0 -118
  299. edsl/tools/plotting.py +0 -112
  300. edsl/tools/summarize.py +0 -18
  301. edsl/utilities/data/Registry.py +0 -6
  302. edsl/utilities/data/__init__.py +0 -1
  303. edsl/utilities/data/scooter_results.json +0 -1
  304. edsl-0.1.46.dist-info/RECORD +0 -366
  305. /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
  306. /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
  307. /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
  308. /edsl/{results → dataset/display}/table_data_class.py +0 -0
  309. /edsl/{results → dataset/display}/table_display.css +0 -0
  310. /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
  311. /edsl/{results → dataset}/tree_explore.py +0 -0
  312. /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
  313. /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
  314. /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
  315. /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
  316. /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
  317. /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
  318. /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
  319. /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
  320. /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
  321. /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
  322. /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
  323. /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
  324. /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
  325. /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
  326. /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
  327. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
  328. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -0,0 +1,502 @@
1
+ from typing import Union, List, Any, Optional
2
+ import asyncio
3
+ import time
4
+ from threading import RLock
5
+
6
+ from ..jobs.decorators import synchronized_class
7
+
8
+
9
+ @synchronized_class
10
+ class TokenBucket:
11
+ """Token bucket algorithm implementation for rate limiting.
12
+
13
+ The token bucket is a rate limiting algorithm that allows for controlled access to
14
+ resources by maintaining a bucket of tokens that are consumed when requests are made
15
+ and replenished at a constant rate over time.
16
+
17
+ Features:
18
+ - Supports both local and remote operation via factory method
19
+ - Thread-safe implementation
20
+ - Configurable capacity and refill rates
21
+ - Ability to track usage patterns
22
+ - Visualization of token usage over time
23
+ - Turbo mode for temporarily bypassing rate limits
24
+
25
+ Typical use cases:
26
+ - Respecting API rate limits (e.g., OpenAI, AWS, etc.)
27
+ - Controlling resource utilization
28
+ - Managing concurrent access to limited resources
29
+ - Testing systems under various rate limiting conditions
30
+
31
+ Example:
32
+ >>> bucket = TokenBucket(
33
+ ... bucket_name="openai-gpt4",
34
+ ... bucket_type="api",
35
+ ... capacity=3500, # 3500 tokens per minute capacity
36
+ ... refill_rate=58.33 # 3500/60 tokens per second
37
+ ... )
38
+ >>> bucket.capacity
39
+ 3500
40
+ >>> bucket.refill_rate
41
+ 58.33
42
+ """
43
+
44
+ def __new__(
45
+ cls,
46
+ *,
47
+ bucket_name: str,
48
+ bucket_type: str,
49
+ capacity: Union[int, float],
50
+ refill_rate: Union[int, float],
51
+ remote_url: Optional[str] = None,
52
+ ):
53
+ """Factory method to create either a local or remote token bucket.
54
+
55
+ This method determines whether to create a local TokenBucket instance or
56
+ a remote TokenBucketClient instance based on the provided parameters.
57
+
58
+ Args:
59
+ bucket_name: Name of the bucket for identification
60
+ bucket_type: Type of the bucket (e.g., 'api', 'database', etc.)
61
+ capacity: Maximum number of tokens the bucket can hold
62
+ refill_rate: Rate at which tokens are refilled (tokens per second)
63
+ remote_url: If provided, creates a remote token bucket client
64
+
65
+ Returns:
66
+ Either a TokenBucket instance (local) or a TokenBucketClient instance (remote)
67
+
68
+ Example:
69
+ >>> # Local bucket
70
+ >>> local_bucket = TokenBucket(
71
+ ... bucket_name="local-rate-limit",
72
+ ... bucket_type="api",
73
+ ... capacity=100,
74
+ ... refill_rate=10
75
+ ... )
76
+ >>> isinstance(local_bucket, TokenBucket)
77
+ True
78
+ >>> local_bucket.bucket_name
79
+ 'local-rate-limit'
80
+ """
81
+ if remote_url is not None:
82
+ # Import here to avoid circular imports
83
+ from ..buckets import TokenBucketClient
84
+
85
+ return TokenBucketClient(
86
+ bucket_name=bucket_name,
87
+ bucket_type=bucket_type,
88
+ capacity=capacity,
89
+ refill_rate=refill_rate,
90
+ api_base_url=remote_url,
91
+ )
92
+
93
+ # Create a local token bucket
94
+ instance = super(TokenBucket, cls).__new__(cls)
95
+ return instance
96
+
97
+ def __init__(
98
+ self,
99
+ *,
100
+ bucket_name: str,
101
+ bucket_type: str,
102
+ capacity: Union[int, float],
103
+ refill_rate: Union[int, float],
104
+ remote_url: Optional[str] = None,
105
+ ):
106
+ """Initialize a new token bucket instance.
107
+
108
+ Sets up the initial state of the token bucket with the specified parameters.
109
+
110
+ Args:
111
+ bucket_name: Name of the bucket for identification
112
+ bucket_type: Type of the bucket (e.g., 'api', 'database', etc.)
113
+ capacity: Maximum number of tokens the bucket can hold
114
+ refill_rate: Rate at which tokens are refilled (tokens per second)
115
+ remote_url: If provided, initialization is skipped (handled by __new__)
116
+
117
+ Note:
118
+ - The bucket starts full (tokens = capacity)
119
+ - The target_rate is calculated in tokens per minute
120
+ - A log of token levels over time is maintained for visualization
121
+
122
+ Example:
123
+ >>> bucket = TokenBucket(bucket_name="test-init", bucket_type="api", capacity=50, refill_rate=5)
124
+ >>> bucket.tokens == bucket.capacity
125
+ True
126
+ >>> bucket.target_rate == bucket.capacity * 60 # Target rate in tokens per minute
127
+ True
128
+ """
129
+ # Skip initialization if this is a remote bucket
130
+ if remote_url is not None:
131
+ return
132
+
133
+ self.bucket_name = bucket_name
134
+ self.bucket_type = bucket_type
135
+ self.capacity = capacity
136
+ self.added_tokens = 0
137
+ self._lock = RLock()
138
+
139
+ self.target_rate = (
140
+ capacity * 60
141
+ ) # set this here because it can change with turbo mode
142
+
143
+ self._old_capacity = capacity
144
+ self.tokens = capacity # Current number of available tokens
145
+ self.refill_rate = refill_rate # Rate at which tokens are refilled
146
+ self._old_refill_rate = refill_rate
147
+ self.last_refill = time.monotonic() # Last refill time
148
+ self.log: List[Any] = []
149
+ self.turbo_mode = False
150
+
151
+ self.creation_time = time.monotonic()
152
+
153
+ self.num_requests = 0
154
+ self.num_released = 0
155
+ self.tokens_returned = 0
156
+
157
+ def turbo_mode_on(self) -> None:
158
+ """Enable turbo mode to bypass rate limiting.
159
+
160
+ Sets the capacity and refill rate to infinity, effectively disabling rate
161
+ limiting. This can be useful for testing or emergency situations where
162
+ rate limits need to be temporarily ignored.
163
+
164
+ Note:
165
+ The original capacity and refill rate values are preserved and can be
166
+ restored by calling turbo_mode_off()
167
+
168
+ Example:
169
+ >>> bucket = TokenBucket(bucket_name="api", bucket_type="test", capacity=10, refill_rate=1)
170
+ >>> bucket.turbo_mode_on()
171
+ >>> bucket.capacity
172
+ inf
173
+ >>> bucket.refill_rate
174
+ inf
175
+ """
176
+ if self.turbo_mode:
177
+ pass
178
+ else:
179
+ self.turbo_mode = True
180
+ self.capacity = float("inf")
181
+ self.refill_rate = float("inf")
182
+
183
+ def turbo_mode_off(self) -> None:
184
+ """Disable turbo mode and restore normal rate limiting.
185
+
186
+ Restores the original capacity and refill rate values that were in effect
187
+ before turbo_mode_on() was called.
188
+
189
+ Example:
190
+ >>> bucket = TokenBucket(bucket_name="api", bucket_type="test", capacity=10, refill_rate=1)
191
+ >>> original_capacity = bucket.capacity
192
+ >>> bucket.turbo_mode_on()
193
+ >>> bucket.capacity
194
+ inf
195
+ >>> bucket.turbo_mode_off()
196
+ >>> bucket.capacity == original_capacity
197
+ True
198
+ """
199
+ self.turbo_mode = False
200
+ self.capacity = self._old_capacity
201
+ self.refill_rate = self._old_refill_rate
202
+
203
+ def __add__(self, other) -> "TokenBucket":
204
+ """Combine two token buckets to create a more restrictive bucket.
205
+
206
+ The resulting bucket has the minimum capacity and refill rate of the two input buckets.
207
+ This operation is useful when multiple rate limits need to be respected simultaneously.
208
+
209
+ Args:
210
+ other: Another TokenBucket instance to combine with this one
211
+
212
+ Returns:
213
+ A new TokenBucket instance with the more restrictive parameters
214
+
215
+ Example:
216
+ >>> model_bucket = TokenBucket(bucket_name="gpt4", bucket_type="model", capacity=10000, refill_rate=100)
217
+ >>> global_bucket = TokenBucket(bucket_name="openai", bucket_type="global", capacity=5000, refill_rate=50)
218
+ >>> combined_bucket = model_bucket + global_bucket
219
+ >>> combined_bucket.capacity
220
+ 5000
221
+ >>> combined_bucket.refill_rate
222
+ 50
223
+ """
224
+ return TokenBucket(
225
+ bucket_name=self.bucket_name,
226
+ bucket_type=self.bucket_type,
227
+ capacity=min(self.capacity, other.capacity),
228
+ refill_rate=min(self.refill_rate, other.refill_rate),
229
+ )
230
+
231
+ def __repr__(self):
232
+ """Return a string representation of the TokenBucket instance.
233
+
234
+ Returns:
235
+ A string containing the essential parameters of the bucket
236
+
237
+ Example:
238
+ >>> bucket = TokenBucket(bucket_name="repr-test", bucket_type="api", capacity=100, refill_rate=10)
239
+ >>> repr(bucket)
240
+ "TokenBucket(bucket_name=repr-test, bucket_type='api', capacity=100, refill_rate=10)"
241
+ """
242
+ return f"TokenBucket(bucket_name={self.bucket_name}, bucket_type='{self.bucket_type}', capacity={self.capacity}, refill_rate={self.refill_rate})"
243
+
244
+ def add_tokens(self, tokens: Union[int, float]) -> None:
245
+ """Add tokens to the bucket, up to the maximum capacity.
246
+
247
+ This method is typically used when tokens are returned after a request
248
+ used fewer tokens than initially requested.
249
+
250
+ Args:
251
+ tokens: The number of tokens to add to the bucket
252
+
253
+ Note:
254
+ - The tokens will be capped at the bucket's capacity
255
+ - This operation is logged for visualization purposes
256
+ - The tokens_returned counter is incremented
257
+
258
+ Example:
259
+ >>> bucket = TokenBucket(bucket_name="test", bucket_type="test", capacity=10, refill_rate=1)
260
+ >>> bucket.tokens = 5 # Set current tokens to 5
261
+ >>> bucket.add_tokens(3)
262
+ >>> bucket.tokens
263
+ 8
264
+ >>> bucket.add_tokens(10) # Should be capped at capacity
265
+ >>> bucket.tokens
266
+ 10
267
+ """
268
+ self.tokens_returned += tokens
269
+ self.tokens = min(self.capacity, self.tokens + tokens)
270
+ self.log.append((time.monotonic(), self.tokens))
271
+
272
+ def refill(self) -> None:
273
+ """Refill the bucket with new tokens based on elapsed time.
274
+
275
+ Calculates the number of tokens to add based on the time elapsed since the
276
+ last refill and the current refill rate. Updates the token count and records
277
+ the new level for logging purposes.
278
+
279
+ Note:
280
+ - This method is called internally by get_tokens() before checking token availability
281
+ - The refill amount is proportional to the time elapsed: amount = elapsed_time * refill_rate
282
+ - Tokens are capped at the bucket's capacity
283
+
284
+ Example:
285
+ >>> bucket = TokenBucket(bucket_name="test", bucket_type="test", capacity=10, refill_rate=2)
286
+ >>> bucket.tokens = 5
287
+ >>> bucket.last_refill = time.monotonic() - 1 # Simulate 1 second passing
288
+ >>> bucket.refill()
289
+ >>> 6.9 < bucket.tokens < 7.1 # Should be around 7 (5 + 2*1)
290
+ True
291
+ """
292
+ now = time.monotonic()
293
+ elapsed = now - self.last_refill
294
+ refill_amount = elapsed * self.refill_rate
295
+ self.tokens = min(self.capacity, self.tokens + refill_amount)
296
+ self.last_refill = now
297
+
298
+ if self.tokens < self.capacity:
299
+ pass
300
+
301
+ self.log.append((now, self.tokens))
302
+
303
+ def wait_time(self, requested_tokens: Union[float, int]) -> float:
304
+ """Calculate the time to wait for the requested number of tokens to become available.
305
+
306
+ Args:
307
+ requested_tokens: The number of tokens needed
308
+
309
+ Returns:
310
+ The time in seconds to wait before the requested tokens will be available
311
+
312
+ Note:
313
+ Returns 0 if the requested tokens are already available
314
+
315
+ Example:
316
+ >>> bucket = TokenBucket(bucket_name="test", bucket_type="test", capacity=10, refill_rate=2)
317
+ >>> bucket.tokens = 3
318
+ >>> wait_time = bucket.wait_time(5)
319
+ >>> 0.9 < wait_time < 1.1 # Should be around 1.0 (need 2 more tokens at 2 tokens/sec)
320
+ True
321
+ >>> bucket.tokens = 10
322
+ >>> bucket.wait_time(5) # No wait needed when we have enough tokens
323
+ 0
324
+ """
325
+ if self.tokens >= requested_tokens:
326
+ return 0
327
+ return (requested_tokens - self.tokens) / self.refill_rate
328
+
329
+ async def get_tokens(
330
+ self, amount: Union[int, float] = 1, cheat_bucket_capacity=True
331
+ ) -> None:
332
+ """Wait for the specified number of tokens to become available.
333
+
334
+ This is the primary method for consuming tokens from the bucket. It will block
335
+ asynchronously until the requested tokens are available, then deduct them
336
+ from the bucket.
337
+
338
+ Args:
339
+ amount: The number of tokens to consume
340
+ cheat_bucket_capacity: If True and the requested amount exceeds capacity,
341
+ automatically increase the bucket capacity to accommodate
342
+ the request. If False, raise a ValueError.
343
+
344
+ Raises:
345
+ ValueError: If amount exceeds capacity and cheat_bucket_capacity is False
346
+
347
+ Note:
348
+ - This method blocks asynchronously using asyncio.sleep() if tokens are not available
349
+ - The bucket is refilled based on elapsed time before checking token availability
350
+ - Usage statistics and token levels are logged for tracking purposes
351
+
352
+ Example:
353
+ >>> bucket = TokenBucket(bucket_name="api", bucket_type="test", capacity=100, refill_rate=10)
354
+ >>> bucket.tokens = 100
355
+ >>> import asyncio
356
+ >>> asyncio.run(bucket.get_tokens(30))
357
+ >>> bucket.tokens
358
+ 70
359
+
360
+ >>> # Example with capacity cheating
361
+ >>> bucket = TokenBucket(bucket_name="api", bucket_type="test", capacity=10, refill_rate=1)
362
+ >>> asyncio.run(bucket.get_tokens(15, cheat_bucket_capacity=True))
363
+ >>> bucket.capacity > 15 # Capacity should have been increased
364
+ True
365
+
366
+ >>> # Example raising ValueError
367
+ >>> bucket = TokenBucket(bucket_name="api", bucket_type="test", capacity=10, refill_rate=1)
368
+ >>> try:
369
+ ... asyncio.run(bucket.get_tokens(15, cheat_bucket_capacity=False))
370
+ ... except ValueError as e:
371
+ ... print("ValueError raised")
372
+ ValueError raised
373
+ """
374
+ self.num_requests += amount
375
+ if amount >= self.capacity:
376
+ if not cheat_bucket_capacity:
377
+ msg = f"Requested amount exceeds bucket capacity. Bucket capacity: {self.capacity}, requested amount: {amount}. As the bucket never overflows, the requested amount will never be available."
378
+ raise ValueError(msg)
379
+ else:
380
+ self.capacity = amount * 1.10
381
+ self._old_capacity = self.capacity
382
+
383
+ start_time = time.monotonic()
384
+ while True:
385
+ self.refill() # Refill based on elapsed time
386
+ if self.tokens >= amount:
387
+ self.tokens -= amount
388
+ break
389
+
390
+ wait_time = self.wait_time(amount)
391
+ if wait_time > 0:
392
+ await asyncio.sleep(wait_time)
393
+
394
+ self.num_released += amount
395
+ now = time.monotonic()
396
+ self.log.append((now, self.tokens))
397
+ return None
398
+
399
+ def get_log(self) -> list[tuple]:
400
+ """Return the token level log for analysis or visualization.
401
+
402
+ Returns:
403
+ A list of (timestamp, token_level) tuples representing the token history
404
+
405
+ Example:
406
+ >>> bucket = TokenBucket(bucket_name="test", bucket_type="test", capacity=10, refill_rate=1)
407
+ >>> import asyncio
408
+ >>> asyncio.run(bucket.get_tokens(5))
409
+ >>> log = bucket.get_log()
410
+ >>> len(log) > 0 # Should have at least one log entry
411
+ True
412
+ >>> isinstance(log[0], tuple) and len(log[0]) == 2 # Each entry should be a (timestamp, tokens) tuple
413
+ True
414
+ """
415
+ return self.log
416
+
417
+ def visualize(self):
418
+ """Visualize the token bucket usage over time as a line chart.
419
+
420
+ Creates and displays a matplotlib plot showing token levels over time.
421
+ This can be useful for analyzing rate limit behavior and usage patterns.
422
+
423
+ Note:
424
+ Requires matplotlib to be installed
425
+
426
+ Example:
427
+ >>> bucket = TokenBucket(bucket_name="api", bucket_type="test", capacity=100, refill_rate=10)
428
+ >>> # In practice, you would use the bucket and then visualize:
429
+ >>> # import asyncio
430
+ >>> # for i in range(5):
431
+ >>> # asyncio.run(bucket.get_tokens(10))
432
+ >>> # asyncio.sleep(0.2)
433
+ >>> # bucket.visualize() # This would display a matplotlib chart
434
+ """
435
+ times, tokens = zip(*self.get_log())
436
+ start_time = times[0]
437
+ times = [t - start_time for t in times] # Normalize time to start from 0
438
+ from matplotlib import pyplot as plt
439
+
440
+ plt.figure(figsize=(10, 6))
441
+ plt.plot(times, tokens, label="Tokens Available")
442
+ plt.xlabel("Time (seconds)", fontsize=12)
443
+ plt.ylabel("Number of Tokens", fontsize=12)
444
+ details = f"{self.bucket_name} ({self.bucket_type}) Bucket Usage Over Time\nCapacity: {self.capacity:.1f}, Refill Rate: {self.refill_rate:.1f}/second"
445
+ plt.title(details, fontsize=14)
446
+
447
+ plt.legend()
448
+ plt.grid(True)
449
+ plt.tight_layout()
450
+ plt.show()
451
+
452
+ def get_throughput(self, time_window: Optional[float] = None) -> float:
453
+ """Calculate the empirical bucket throughput in tokens per minute.
454
+
455
+ Determines the actual usage rate of the bucket over the specified time window,
456
+ which can be useful for monitoring and adjusting rate limits.
457
+
458
+ Args:
459
+ time_window: The time window in seconds to calculate the throughput for.
460
+ If None, uses the entire bucket lifetime.
461
+
462
+ Returns:
463
+ The throughput in tokens per minute
464
+
465
+ Note:
466
+ The throughput is based on tokens that were successfully released from
467
+ the bucket, not on tokens that were requested.
468
+
469
+ Example:
470
+ >>> bucket = TokenBucket(bucket_name="api", bucket_type="test", capacity=100, refill_rate=30)
471
+ >>> import asyncio
472
+ >>> # Consume some tokens
473
+ >>> bucket.num_released = 0 # Reset for testing
474
+ >>> asyncio.run(bucket.get_tokens(50))
475
+ >>> # Fast-forward the creation time to simulate passage of time
476
+ >>> bucket.creation_time = time.monotonic() - 60 # Simulate 1 minute passing
477
+ >>> throughput = bucket.get_throughput()
478
+ >>> throughput > 49 # Should a little less than 50 tokens per minute
479
+ True
480
+ """
481
+ now = time.monotonic()
482
+
483
+ if time_window is None:
484
+ start_time = self.creation_time
485
+ else:
486
+ start_time = now - time_window
487
+
488
+ if start_time < self.creation_time:
489
+ start_time = self.creation_time
490
+
491
+ elapsed_time = now - start_time
492
+
493
+ if elapsed_time == 0:
494
+ return self.num_released / 0.001
495
+
496
+ return (self.num_released / elapsed_time) * 60
497
+
498
+
499
+ if __name__ == "__main__":
500
+ import doctest
501
+
502
+ doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -3,8 +3,8 @@ from pydantic import BaseModel
3
3
  from typing import Union, Dict
4
4
  from typing import Union, List, Any, Optional
5
5
  from threading import RLock
6
- from edsl.jobs.buckets.TokenBucket import TokenBucket # Original implementation
7
6
 
7
+ from .token_bucket import TokenBucket # Original implementation
8
8
 
9
9
  def safe_float_for_json(value: float) -> Union[float, str]:
10
10
  """Convert float('inf') to 'infinity' for JSON serialization.