PyPI - edsl - Versions diffs - 0.1.32__py3-none-any.whl → 0.1.33__py3-none-any.whl - Mend

edsl 0.1.32py3-none-any.whl → 0.1.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

edsl/Base.py +9 -3
edsl/TemplateLoader.py +24 -0
edsl/__init__.py +8 -3
edsl/__version__.py +1 -1
edsl/agents/Agent.py +40 -8
edsl/agents/AgentList.py +43 -0
edsl/agents/Invigilator.py +135 -219
edsl/agents/InvigilatorBase.py +148 -59
edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +138 -89
edsl/agents/__init__.py +1 -0
edsl/auto/AutoStudy.py +117 -0
edsl/auto/StageBase.py +230 -0
edsl/auto/StageGenerateSurvey.py +178 -0
edsl/auto/StageLabelQuestions.py +125 -0
edsl/auto/StagePersona.py +61 -0
edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
edsl/auto/StagePersonaDimensionValues.py +74 -0
edsl/auto/StagePersonaDimensions.py +69 -0
edsl/auto/StageQuestions.py +73 -0
edsl/auto/SurveyCreatorPipeline.py +21 -0
edsl/auto/utilities.py +224 -0
edsl/config.py +47 -56
edsl/coop/PriceFetcher.py +58 -0
edsl/coop/coop.py +50 -7
edsl/data/Cache.py +35 -1
edsl/data_transfer_models.py +73 -38
edsl/enums.py +4 -0
edsl/exceptions/language_models.py +25 -1
edsl/exceptions/questions.py +62 -5
edsl/exceptions/results.py +4 -0
edsl/inference_services/AnthropicService.py +13 -11
edsl/inference_services/AwsBedrock.py +19 -17
edsl/inference_services/AzureAI.py +37 -20
edsl/inference_services/GoogleService.py +16 -12
edsl/inference_services/GroqService.py +2 -0
edsl/inference_services/InferenceServiceABC.py +58 -3
edsl/inference_services/MistralAIService.py +120 -0
edsl/inference_services/OpenAIService.py +48 -54
edsl/inference_services/TestService.py +80 -0
edsl/inference_services/TogetherAIService.py +170 -0
edsl/inference_services/models_available_cache.py +0 -6
edsl/inference_services/registry.py +6 -0
edsl/jobs/Answers.py +10 -12
edsl/jobs/FailedQuestion.py +78 -0
edsl/jobs/Jobs.py +37 -22
edsl/jobs/buckets/BucketCollection.py +24 -15
edsl/jobs/buckets/TokenBucket.py +93 -14
edsl/jobs/interviews/Interview.py +366 -78
edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +14 -68
edsl/jobs/interviews/InterviewExceptionEntry.py +85 -19
edsl/jobs/runners/JobsRunnerAsyncio.py +146 -175
edsl/jobs/runners/JobsRunnerStatus.py +331 -0
edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
edsl/jobs/tasks/TaskHistory.py +148 -213
edsl/language_models/LanguageModel.py +261 -156
edsl/language_models/ModelList.py +2 -2
edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
edsl/language_models/fake_openai_call.py +15 -0
edsl/language_models/fake_openai_service.py +61 -0
edsl/language_models/registry.py +23 -6
edsl/language_models/repair.py +0 -19
edsl/language_models/utilities.py +61 -0
edsl/notebooks/Notebook.py +20 -2
edsl/prompts/Prompt.py +52 -2
edsl/questions/AnswerValidatorMixin.py +23 -26
edsl/questions/QuestionBase.py +330 -249
edsl/questions/QuestionBaseGenMixin.py +133 -0
edsl/questions/QuestionBasePromptsMixin.py +266 -0
edsl/questions/QuestionBudget.py +99 -41
edsl/questions/QuestionCheckBox.py +227 -35
edsl/questions/QuestionExtract.py +98 -27
edsl/questions/QuestionFreeText.py +52 -29
edsl/questions/QuestionFunctional.py +7 -0
edsl/questions/QuestionList.py +141 -22
edsl/questions/QuestionMultipleChoice.py +159 -65
edsl/questions/QuestionNumerical.py +88 -46
edsl/questions/QuestionRank.py +182 -24
edsl/questions/Quick.py +41 -0
edsl/questions/RegisterQuestionsMeta.py +31 -12
edsl/questions/ResponseValidatorABC.py +170 -0
edsl/questions/__init__.py +3 -4
edsl/questions/decorators.py +21 -0
edsl/questions/derived/QuestionLikertFive.py +10 -5
edsl/questions/derived/QuestionLinearScale.py +15 -2
edsl/questions/derived/QuestionTopK.py +10 -1
edsl/questions/derived/QuestionYesNo.py +24 -3
edsl/questions/descriptors.py +43 -7
edsl/questions/prompt_templates/question_budget.jinja +13 -0
edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
edsl/questions/prompt_templates/question_extract.jinja +11 -0
edsl/questions/prompt_templates/question_free_text.jinja +3 -0
edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
edsl/questions/prompt_templates/question_list.jinja +17 -0
edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
edsl/questions/prompt_templates/question_numerical.jinja +37 -0
edsl/questions/question_registry.py +6 -2
edsl/questions/templates/__init__.py +0 -0
edsl/questions/templates/budget/__init__.py +0 -0
edsl/questions/templates/budget/answering_instructions.jinja +7 -0
edsl/questions/templates/budget/question_presentation.jinja +7 -0
edsl/questions/templates/checkbox/__init__.py +0 -0
edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
edsl/questions/templates/extract/__init__.py +0 -0
edsl/questions/templates/extract/answering_instructions.jinja +7 -0
edsl/questions/templates/extract/question_presentation.jinja +1 -0
edsl/questions/templates/free_text/__init__.py +0 -0
edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
edsl/questions/templates/free_text/question_presentation.jinja +1 -0
edsl/questions/templates/likert_five/__init__.py +0 -0
edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
edsl/questions/templates/linear_scale/__init__.py +0 -0
edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
edsl/questions/templates/list/__init__.py +0 -0
edsl/questions/templates/list/answering_instructions.jinja +4 -0
edsl/questions/templates/list/question_presentation.jinja +5 -0
edsl/questions/templates/multiple_choice/__init__.py +0 -0
edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
edsl/questions/templates/multiple_choice/html.jinja +0 -0
edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
edsl/questions/templates/numerical/__init__.py +0 -0
edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
edsl/questions/templates/numerical/question_presentation.jinja +7 -0
edsl/questions/templates/rank/__init__.py +0 -0
edsl/questions/templates/rank/answering_instructions.jinja +11 -0
edsl/questions/templates/rank/question_presentation.jinja +15 -0
edsl/questions/templates/top_k/__init__.py +0 -0
edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
edsl/questions/templates/top_k/question_presentation.jinja +22 -0
edsl/questions/templates/yes_no/__init__.py +0 -0
edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
edsl/results/Dataset.py +20 -0
edsl/results/DatasetExportMixin.py +46 -48
edsl/results/DatasetTree.py +145 -0
edsl/results/Result.py +32 -5
edsl/results/Results.py +135 -46
edsl/results/ResultsDBMixin.py +3 -3
edsl/results/Selector.py +118 -0
edsl/results/tree_explore.py +115 -0
edsl/scenarios/FileStore.py +71 -10
edsl/scenarios/Scenario.py +96 -25
edsl/scenarios/ScenarioImageMixin.py +2 -2
edsl/scenarios/ScenarioList.py +361 -39
edsl/scenarios/ScenarioListExportMixin.py +9 -0
edsl/scenarios/ScenarioListPdfMixin.py +150 -4
edsl/study/SnapShot.py +8 -1
edsl/study/Study.py +32 -0
edsl/surveys/Rule.py +10 -1
edsl/surveys/RuleCollection.py +21 -5
edsl/surveys/Survey.py +637 -311
edsl/surveys/SurveyExportMixin.py +71 -9
edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
edsl/surveys/SurveyQualtricsImport.py +75 -4
edsl/surveys/instructions/ChangeInstruction.py +47 -0
edsl/surveys/instructions/Instruction.py +34 -0
edsl/surveys/instructions/InstructionCollection.py +77 -0
edsl/surveys/instructions/__init__.py +0 -0
edsl/templates/error_reporting/base.html +24 -0
edsl/templates/error_reporting/exceptions_by_model.html +35 -0
edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
edsl/templates/error_reporting/exceptions_by_type.html +17 -0
edsl/templates/error_reporting/interview_details.html +116 -0
edsl/templates/error_reporting/interviews.html +10 -0
edsl/templates/error_reporting/overview.html +5 -0
edsl/templates/error_reporting/performance_plot.html +2 -0
edsl/templates/error_reporting/report.css +74 -0
edsl/templates/error_reporting/report.html +118 -0
edsl/templates/error_reporting/report.js +25 -0
edsl/utilities/utilities.py +9 -1
{edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/METADATA +5 -2
edsl-0.1.33.dist-info/RECORD +295 -0
edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -286
edsl/jobs/interviews/retry_management.py +0 -37
edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -333
edsl/utilities/gcp_bucket/simple_example.py +0 -9
edsl-0.1.32.dist-info/RECORD +0 -209
{edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
{edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0

edsl/jobs/buckets/TokenBucket.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Union, List, Any
+from typing import Union, List, Any, Optional
 import asyncio
 import time
@@ -17,6 +17,12 @@ class TokenBucket:
         self.bucket_name = bucket_name
         self.bucket_type = bucket_type
         self.capacity = capacity  # Maximum number of tokens
+        self.added_tokens = 0
+        self.target_rate = (
+            capacity * 60
+        )  # set this here because it can change with turbo mode
         self._old_capacity = capacity
         self.tokens = capacity  # Current number of available tokens
         self.refill_rate = refill_rate  # Rate at which tokens are refilled
@@ -25,6 +31,12 @@ class TokenBucket:
         self.log: List[Any] = []
         self.turbo_mode = False
+        self.creation_time = time.monotonic()
+        self.num_requests = 0
+        self.num_released = 0
+        self.tokens_returned = 0
     def turbo_mode_on(self):
         """Set the refill rate to infinity."""
         if self.turbo_mode:
@@ -69,6 +81,7 @@ class TokenBucket:
         >>> bucket.tokens
         10
         """
+        self.tokens_returned += tokens
         self.tokens = min(self.capacity, self.tokens + tokens)
         self.log.append((time.monotonic(), self.tokens))
@@ -82,23 +95,30 @@ class TokenBucket:
         >>> bucket.refill()
         >>> bucket.tokens > 0
         True
         """
+        """Refill the bucket with new tokens based on elapsed time."""
         now = time.monotonic()
+        # print(f"Time is now: {now}; Last refill time: {self.last_refill}")
         elapsed = now - self.last_refill
+        # print("Elapsed time: ", elapsed)
         refill_amount = elapsed * self.refill_rate
         self.tokens = min(self.capacity, self.tokens + refill_amount)
         self.last_refill = now
+        if self.tokens < self.capacity:
+            pass
+            # print(f"Refilled. Current tokens: {self.tokens:.4f}")
+            # print(f"Elapsed time: {elapsed:.4f} seconds")
+            # print(f"Refill amount: {refill_amount:.4f}")
         self.log.append((now, self.tokens))
     def wait_time(self, requested_tokens: Union[float, int]) -> float:
         """Calculate the time to wait for the requested number of tokens."""
-        now = time.monotonic()
-        elapsed = now - self.last_refill
-        refill_amount = elapsed * self.refill_rate
-        available_tokens = min(self.capacity, self.tokens + refill_amount)
-        return max(0, requested_tokens - available_tokens) / self.refill_rate
+        # self.refill()  # Update the current token count
+        if self.tokens >= requested_tokens:
+            return 0
+        return (requested_tokens - self.tokens) / self.refill_rate
     async def get_tokens(
         self, amount: Union[int, float] = 1, cheat_bucket_capacity=True
@@ -123,22 +143,33 @@ class TokenBucket:
         ...
         ValueError: Requested amount exceeds bucket capacity. Bucket capacity: 10, requested amount: 11. As the bucket never overflows, the requested amount will never be available.
         >>> asyncio.run(bucket.get_tokens(11, cheat_bucket_capacity=True))
+        >>> bucket.capacity
+        12.100000000000001
         """
-        if amount > self.capacity:
+        self.num_requests += amount
+        if amount >= self.capacity:
             if not cheat_bucket_capacity:
                 msg = f"Requested amount exceeds bucket capacity. Bucket capacity: {self.capacity}, requested amount: {amount}. As the bucket never overflows, the requested amount will never be available."
                 raise ValueError(msg)
             else:
-                self.tokens = 0  # clear the bucket but let it go through
-                return
+                self.capacity = amount * 1.10
+                self._old_capacity = self.capacity
-        while self.tokens < amount:
-            self.refill()
-            await asyncio.sleep(0.01)  # Sleep briefly to prevent busy waiting
-        self.tokens -= amount
+        start_time = time.monotonic()
+        while True:
+            self.refill()  # Refill based on elapsed time
+            if self.tokens >= amount:
+                self.tokens -= amount
+                break
+            wait_time = self.wait_time(amount)
+            if wait_time > 0:
+                await asyncio.sleep(wait_time)
+        self.num_released += amount
         now = time.monotonic()
         self.log.append((now, self.tokens))
+        return None
     def get_log(self) -> list[tuple]:
         return self.log
@@ -162,6 +193,54 @@ class TokenBucket:
         plt.tight_layout()
         plt.show()
+    def get_throughput(self, time_window: Optional[float] = None) -> float:
+        """
+        Calculate the empirical bucket throughput in tokens per minute for the specified time window.
+        :param time_window: The time window in seconds to calculate the throughput for.
+        :return: The throughput in tokens per minute.
+        >>> bucket = TokenBucket(bucket_name="test", bucket_type="test", capacity=100, refill_rate=10)
+        >>> asyncio.run(bucket.get_tokens(50))
+        >>> time.sleep(1)  # Wait for 1 second
+        >>> asyncio.run(bucket.get_tokens(30))
+        >>> throughput = bucket.get_throughput(1)
+        >>> 4750 < throughput < 4850
+        True
+        """
+        now = time.monotonic()
+        if time_window is None:
+            start_time = self.creation_time
+        else:
+            start_time = now - time_window
+        if start_time < self.creation_time:
+            start_time = self.creation_time
+        elapsed_time = now - start_time
+        return (self.num_released / elapsed_time) * 60
+        # # Filter log entries within the time window
+        # relevant_log = [(t, tokens) for t, tokens in self.log if t >= start_time]
+        # if len(relevant_log) < 2:
+        #     return 0  # Not enough data points to calculate throughput
+        # # Calculate total tokens used
+        # initial_tokens = relevant_log[0][1]
+        # final_tokens = relevant_log[-1][1]
+        # tokens_used = self.num_released - (final_tokens - initial_tokens)
+        # # Calculate actual time elapsed
+        # actual_time_elapsed = relevant_log[-1][0] - relevant_log[0][0]
+        # # Calculate throughput in tokens per minute
+        # throughput = (tokens_used / actual_time_elapsed) * 60
+        # return throughput
 if __name__ == "__main__":
     import doctest

edsl 0.1.32__py3-none-any.whl → 0.1.33__py3-none-any.whl

edsl 0.1.32py3-none-any.whl → 0.1.33py3-none-any.whl