edsl 0.1.32__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. edsl/Base.py +9 -3
  2. edsl/TemplateLoader.py +24 -0
  3. edsl/__init__.py +8 -3
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +40 -8
  6. edsl/agents/AgentList.py +43 -0
  7. edsl/agents/Invigilator.py +135 -219
  8. edsl/agents/InvigilatorBase.py +148 -59
  9. edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +138 -89
  10. edsl/agents/__init__.py +1 -0
  11. edsl/auto/AutoStudy.py +117 -0
  12. edsl/auto/StageBase.py +230 -0
  13. edsl/auto/StageGenerateSurvey.py +178 -0
  14. edsl/auto/StageLabelQuestions.py +125 -0
  15. edsl/auto/StagePersona.py +61 -0
  16. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  17. edsl/auto/StagePersonaDimensionValues.py +74 -0
  18. edsl/auto/StagePersonaDimensions.py +69 -0
  19. edsl/auto/StageQuestions.py +73 -0
  20. edsl/auto/SurveyCreatorPipeline.py +21 -0
  21. edsl/auto/utilities.py +224 -0
  22. edsl/config.py +47 -56
  23. edsl/coop/PriceFetcher.py +58 -0
  24. edsl/coop/coop.py +50 -7
  25. edsl/data/Cache.py +35 -1
  26. edsl/data_transfer_models.py +73 -38
  27. edsl/enums.py +4 -0
  28. edsl/exceptions/language_models.py +25 -1
  29. edsl/exceptions/questions.py +62 -5
  30. edsl/exceptions/results.py +4 -0
  31. edsl/inference_services/AnthropicService.py +13 -11
  32. edsl/inference_services/AwsBedrock.py +19 -17
  33. edsl/inference_services/AzureAI.py +37 -20
  34. edsl/inference_services/GoogleService.py +16 -12
  35. edsl/inference_services/GroqService.py +2 -0
  36. edsl/inference_services/InferenceServiceABC.py +58 -3
  37. edsl/inference_services/MistralAIService.py +120 -0
  38. edsl/inference_services/OpenAIService.py +48 -54
  39. edsl/inference_services/TestService.py +80 -0
  40. edsl/inference_services/TogetherAIService.py +170 -0
  41. edsl/inference_services/models_available_cache.py +0 -6
  42. edsl/inference_services/registry.py +6 -0
  43. edsl/jobs/Answers.py +10 -12
  44. edsl/jobs/FailedQuestion.py +78 -0
  45. edsl/jobs/Jobs.py +37 -22
  46. edsl/jobs/buckets/BucketCollection.py +24 -15
  47. edsl/jobs/buckets/TokenBucket.py +93 -14
  48. edsl/jobs/interviews/Interview.py +366 -78
  49. edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +14 -68
  50. edsl/jobs/interviews/InterviewExceptionEntry.py +85 -19
  51. edsl/jobs/runners/JobsRunnerAsyncio.py +146 -175
  52. edsl/jobs/runners/JobsRunnerStatus.py +331 -0
  53. edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
  54. edsl/jobs/tasks/TaskHistory.py +148 -213
  55. edsl/language_models/LanguageModel.py +261 -156
  56. edsl/language_models/ModelList.py +2 -2
  57. edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
  58. edsl/language_models/fake_openai_call.py +15 -0
  59. edsl/language_models/fake_openai_service.py +61 -0
  60. edsl/language_models/registry.py +23 -6
  61. edsl/language_models/repair.py +0 -19
  62. edsl/language_models/utilities.py +61 -0
  63. edsl/notebooks/Notebook.py +20 -2
  64. edsl/prompts/Prompt.py +52 -2
  65. edsl/questions/AnswerValidatorMixin.py +23 -26
  66. edsl/questions/QuestionBase.py +330 -249
  67. edsl/questions/QuestionBaseGenMixin.py +133 -0
  68. edsl/questions/QuestionBasePromptsMixin.py +266 -0
  69. edsl/questions/QuestionBudget.py +99 -41
  70. edsl/questions/QuestionCheckBox.py +227 -35
  71. edsl/questions/QuestionExtract.py +98 -27
  72. edsl/questions/QuestionFreeText.py +52 -29
  73. edsl/questions/QuestionFunctional.py +7 -0
  74. edsl/questions/QuestionList.py +141 -22
  75. edsl/questions/QuestionMultipleChoice.py +159 -65
  76. edsl/questions/QuestionNumerical.py +88 -46
  77. edsl/questions/QuestionRank.py +182 -24
  78. edsl/questions/Quick.py +41 -0
  79. edsl/questions/RegisterQuestionsMeta.py +31 -12
  80. edsl/questions/ResponseValidatorABC.py +170 -0
  81. edsl/questions/__init__.py +3 -4
  82. edsl/questions/decorators.py +21 -0
  83. edsl/questions/derived/QuestionLikertFive.py +10 -5
  84. edsl/questions/derived/QuestionLinearScale.py +15 -2
  85. edsl/questions/derived/QuestionTopK.py +10 -1
  86. edsl/questions/derived/QuestionYesNo.py +24 -3
  87. edsl/questions/descriptors.py +43 -7
  88. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  89. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  90. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  91. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  92. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  93. edsl/questions/prompt_templates/question_list.jinja +17 -0
  94. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  95. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  96. edsl/questions/question_registry.py +6 -2
  97. edsl/questions/templates/__init__.py +0 -0
  98. edsl/questions/templates/budget/__init__.py +0 -0
  99. edsl/questions/templates/budget/answering_instructions.jinja +7 -0
  100. edsl/questions/templates/budget/question_presentation.jinja +7 -0
  101. edsl/questions/templates/checkbox/__init__.py +0 -0
  102. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  103. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  104. edsl/questions/templates/extract/__init__.py +0 -0
  105. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  106. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  107. edsl/questions/templates/free_text/__init__.py +0 -0
  108. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  109. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  110. edsl/questions/templates/likert_five/__init__.py +0 -0
  111. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  112. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  113. edsl/questions/templates/linear_scale/__init__.py +0 -0
  114. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  115. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  116. edsl/questions/templates/list/__init__.py +0 -0
  117. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  118. edsl/questions/templates/list/question_presentation.jinja +5 -0
  119. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  120. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  121. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  122. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  123. edsl/questions/templates/numerical/__init__.py +0 -0
  124. edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
  125. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  126. edsl/questions/templates/rank/__init__.py +0 -0
  127. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  128. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  129. edsl/questions/templates/top_k/__init__.py +0 -0
  130. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  131. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  132. edsl/questions/templates/yes_no/__init__.py +0 -0
  133. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  134. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  135. edsl/results/Dataset.py +20 -0
  136. edsl/results/DatasetExportMixin.py +46 -48
  137. edsl/results/DatasetTree.py +145 -0
  138. edsl/results/Result.py +32 -5
  139. edsl/results/Results.py +135 -46
  140. edsl/results/ResultsDBMixin.py +3 -3
  141. edsl/results/Selector.py +118 -0
  142. edsl/results/tree_explore.py +115 -0
  143. edsl/scenarios/FileStore.py +71 -10
  144. edsl/scenarios/Scenario.py +96 -25
  145. edsl/scenarios/ScenarioImageMixin.py +2 -2
  146. edsl/scenarios/ScenarioList.py +361 -39
  147. edsl/scenarios/ScenarioListExportMixin.py +9 -0
  148. edsl/scenarios/ScenarioListPdfMixin.py +150 -4
  149. edsl/study/SnapShot.py +8 -1
  150. edsl/study/Study.py +32 -0
  151. edsl/surveys/Rule.py +10 -1
  152. edsl/surveys/RuleCollection.py +21 -5
  153. edsl/surveys/Survey.py +637 -311
  154. edsl/surveys/SurveyExportMixin.py +71 -9
  155. edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
  156. edsl/surveys/SurveyQualtricsImport.py +75 -4
  157. edsl/surveys/instructions/ChangeInstruction.py +47 -0
  158. edsl/surveys/instructions/Instruction.py +34 -0
  159. edsl/surveys/instructions/InstructionCollection.py +77 -0
  160. edsl/surveys/instructions/__init__.py +0 -0
  161. edsl/templates/error_reporting/base.html +24 -0
  162. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  163. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  164. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  165. edsl/templates/error_reporting/interview_details.html +116 -0
  166. edsl/templates/error_reporting/interviews.html +10 -0
  167. edsl/templates/error_reporting/overview.html +5 -0
  168. edsl/templates/error_reporting/performance_plot.html +2 -0
  169. edsl/templates/error_reporting/report.css +74 -0
  170. edsl/templates/error_reporting/report.html +118 -0
  171. edsl/templates/error_reporting/report.js +25 -0
  172. edsl/utilities/utilities.py +9 -1
  173. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/METADATA +5 -2
  174. edsl-0.1.33.dist-info/RECORD +295 -0
  175. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -286
  176. edsl/jobs/interviews/retry_management.py +0 -37
  177. edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -333
  178. edsl/utilities/gcp_bucket/simple_example.py +0 -9
  179. edsl-0.1.32.dist-info/RECORD +0 -209
  180. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
  181. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0
@@ -1,4 +1,4 @@
1
- from typing import Union, List, Any
1
+ from typing import Union, List, Any, Optional
2
2
  import asyncio
3
3
  import time
4
4
 
@@ -17,6 +17,12 @@ class TokenBucket:
17
17
  self.bucket_name = bucket_name
18
18
  self.bucket_type = bucket_type
19
19
  self.capacity = capacity # Maximum number of tokens
20
+ self.added_tokens = 0
21
+
22
+ self.target_rate = (
23
+ capacity * 60
24
+ ) # set this here because it can change with turbo mode
25
+
20
26
  self._old_capacity = capacity
21
27
  self.tokens = capacity # Current number of available tokens
22
28
  self.refill_rate = refill_rate # Rate at which tokens are refilled
@@ -25,6 +31,12 @@ class TokenBucket:
25
31
  self.log: List[Any] = []
26
32
  self.turbo_mode = False
27
33
 
34
+ self.creation_time = time.monotonic()
35
+
36
+ self.num_requests = 0
37
+ self.num_released = 0
38
+ self.tokens_returned = 0
39
+
28
40
  def turbo_mode_on(self):
29
41
  """Set the refill rate to infinity."""
30
42
  if self.turbo_mode:
@@ -69,6 +81,7 @@ class TokenBucket:
69
81
  >>> bucket.tokens
70
82
  10
71
83
  """
84
+ self.tokens_returned += tokens
72
85
  self.tokens = min(self.capacity, self.tokens + tokens)
73
86
  self.log.append((time.monotonic(), self.tokens))
74
87
 
@@ -82,23 +95,30 @@ class TokenBucket:
82
95
  >>> bucket.refill()
83
96
  >>> bucket.tokens > 0
84
97
  True
85
-
86
98
  """
99
+ """Refill the bucket with new tokens based on elapsed time."""
87
100
  now = time.monotonic()
101
+ # print(f"Time is now: {now}; Last refill time: {self.last_refill}")
88
102
  elapsed = now - self.last_refill
103
+ # print("Elapsed time: ", elapsed)
89
104
  refill_amount = elapsed * self.refill_rate
90
105
  self.tokens = min(self.capacity, self.tokens + refill_amount)
91
106
  self.last_refill = now
92
107
 
108
+ if self.tokens < self.capacity:
109
+ pass
110
+ # print(f"Refilled. Current tokens: {self.tokens:.4f}")
111
+ # print(f"Elapsed time: {elapsed:.4f} seconds")
112
+ # print(f"Refill amount: {refill_amount:.4f}")
113
+
93
114
  self.log.append((now, self.tokens))
94
115
 
95
116
  def wait_time(self, requested_tokens: Union[float, int]) -> float:
96
117
  """Calculate the time to wait for the requested number of tokens."""
97
- now = time.monotonic()
98
- elapsed = now - self.last_refill
99
- refill_amount = elapsed * self.refill_rate
100
- available_tokens = min(self.capacity, self.tokens + refill_amount)
101
- return max(0, requested_tokens - available_tokens) / self.refill_rate
118
+ # self.refill() # Update the current token count
119
+ if self.tokens >= requested_tokens:
120
+ return 0
121
+ return (requested_tokens - self.tokens) / self.refill_rate
102
122
 
103
123
  async def get_tokens(
104
124
  self, amount: Union[int, float] = 1, cheat_bucket_capacity=True
@@ -123,22 +143,33 @@ class TokenBucket:
123
143
  ...
124
144
  ValueError: Requested amount exceeds bucket capacity. Bucket capacity: 10, requested amount: 11. As the bucket never overflows, the requested amount will never be available.
125
145
  >>> asyncio.run(bucket.get_tokens(11, cheat_bucket_capacity=True))
146
+ >>> bucket.capacity
147
+ 12.100000000000001
126
148
  """
127
- if amount > self.capacity:
149
+ self.num_requests += amount
150
+ if amount >= self.capacity:
128
151
  if not cheat_bucket_capacity:
129
152
  msg = f"Requested amount exceeds bucket capacity. Bucket capacity: {self.capacity}, requested amount: {amount}. As the bucket never overflows, the requested amount will never be available."
130
153
  raise ValueError(msg)
131
154
  else:
132
- self.tokens = 0 # clear the bucket but let it go through
133
- return
155
+ self.capacity = amount * 1.10
156
+ self._old_capacity = self.capacity
134
157
 
135
- while self.tokens < amount:
136
- self.refill()
137
- await asyncio.sleep(0.01) # Sleep briefly to prevent busy waiting
138
- self.tokens -= amount
158
+ start_time = time.monotonic()
159
+ while True:
160
+ self.refill() # Refill based on elapsed time
161
+ if self.tokens >= amount:
162
+ self.tokens -= amount
163
+ break
139
164
 
165
+ wait_time = self.wait_time(amount)
166
+ if wait_time > 0:
167
+ await asyncio.sleep(wait_time)
168
+
169
+ self.num_released += amount
140
170
  now = time.monotonic()
141
171
  self.log.append((now, self.tokens))
172
+ return None
142
173
 
143
174
  def get_log(self) -> list[tuple]:
144
175
  return self.log
@@ -162,6 +193,54 @@ class TokenBucket:
162
193
  plt.tight_layout()
163
194
  plt.show()
164
195
 
196
+ def get_throughput(self, time_window: Optional[float] = None) -> float:
197
+ """
198
+ Calculate the empirical bucket throughput in tokens per minute for the specified time window.
199
+
200
+ :param time_window: The time window in seconds to calculate the throughput for.
201
+ :return: The throughput in tokens per minute.
202
+
203
+ >>> bucket = TokenBucket(bucket_name="test", bucket_type="test", capacity=100, refill_rate=10)
204
+ >>> asyncio.run(bucket.get_tokens(50))
205
+ >>> time.sleep(1) # Wait for 1 second
206
+ >>> asyncio.run(bucket.get_tokens(30))
207
+ >>> throughput = bucket.get_throughput(1)
208
+ >>> 4750 < throughput < 4850
209
+ True
210
+ """
211
+ now = time.monotonic()
212
+
213
+ if time_window is None:
214
+ start_time = self.creation_time
215
+ else:
216
+ start_time = now - time_window
217
+
218
+ if start_time < self.creation_time:
219
+ start_time = self.creation_time
220
+
221
+ elapsed_time = now - start_time
222
+
223
+ return (self.num_released / elapsed_time) * 60
224
+
225
+ # # Filter log entries within the time window
226
+ # relevant_log = [(t, tokens) for t, tokens in self.log if t >= start_time]
227
+
228
+ # if len(relevant_log) < 2:
229
+ # return 0 # Not enough data points to calculate throughput
230
+
231
+ # # Calculate total tokens used
232
+ # initial_tokens = relevant_log[0][1]
233
+ # final_tokens = relevant_log[-1][1]
234
+ # tokens_used = self.num_released - (final_tokens - initial_tokens)
235
+
236
+ # # Calculate actual time elapsed
237
+ # actual_time_elapsed = relevant_log[-1][0] - relevant_log[0][0]
238
+
239
+ # # Calculate throughput in tokens per minute
240
+ # throughput = (tokens_used / actual_time_elapsed) * 60
241
+
242
+ # return throughput
243
+
165
244
 
166
245
  if __name__ == "__main__":
167
246
  import doctest