PyPI - edsl - Versions diffs - 0.1.33__py3-none-any.whl → 0.1.33.dev1__py3-none-any.whl - Mend

edsl 0.1.33py3-none-any.whl → 0.1.33.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

edsl/Base.py +3 -9
edsl/__init__.py +3 -8
edsl/__version__.py +1 -1
edsl/agents/Agent.py +8 -40
edsl/agents/AgentList.py +0 -43
edsl/agents/Invigilator.py +219 -135
edsl/agents/InvigilatorBase.py +59 -148
edsl/agents/{PromptConstructor.py → PromptConstructionMixin.py} +89 -138
edsl/agents/__init__.py +0 -1
edsl/config.py +56 -47
edsl/coop/coop.py +7 -50
edsl/data/Cache.py +1 -35
edsl/data_transfer_models.py +38 -73
edsl/enums.py +0 -4
edsl/exceptions/language_models.py +1 -25
edsl/exceptions/questions.py +5 -62
edsl/exceptions/results.py +0 -4
edsl/inference_services/AnthropicService.py +11 -13
edsl/inference_services/AwsBedrock.py +17 -19
edsl/inference_services/AzureAI.py +20 -37
edsl/inference_services/GoogleService.py +12 -16
edsl/inference_services/GroqService.py +0 -2
edsl/inference_services/InferenceServiceABC.py +3 -58
edsl/inference_services/OpenAIService.py +54 -48
edsl/inference_services/models_available_cache.py +6 -0
edsl/inference_services/registry.py +0 -6
edsl/jobs/Answers.py +12 -10
edsl/jobs/Jobs.py +21 -36
edsl/jobs/buckets/BucketCollection.py +15 -24
edsl/jobs/buckets/TokenBucket.py +14 -93
edsl/jobs/interviews/Interview.py +78 -366
edsl/jobs/interviews/InterviewExceptionEntry.py +19 -85
edsl/jobs/interviews/InterviewTaskBuildingMixin.py +286 -0
edsl/jobs/interviews/{InterviewExceptionCollection.py → interview_exception_tracking.py} +68 -14
edsl/jobs/interviews/retry_management.py +37 -0
edsl/jobs/runners/JobsRunnerAsyncio.py +175 -146
edsl/jobs/runners/JobsRunnerStatusMixin.py +333 -0
edsl/jobs/tasks/QuestionTaskCreator.py +23 -30
edsl/jobs/tasks/TaskHistory.py +213 -148
edsl/language_models/LanguageModel.py +156 -261
edsl/language_models/ModelList.py +2 -2
edsl/language_models/RegisterLanguageModelsMeta.py +29 -14
edsl/language_models/registry.py +6 -23
edsl/language_models/repair.py +19 -0
edsl/prompts/Prompt.py +2 -52
edsl/questions/AnswerValidatorMixin.py +26 -23
edsl/questions/QuestionBase.py +249 -329
edsl/questions/QuestionBudget.py +41 -99
edsl/questions/QuestionCheckBox.py +35 -227
edsl/questions/QuestionExtract.py +27 -98
edsl/questions/QuestionFreeText.py +29 -52
edsl/questions/QuestionFunctional.py +0 -7
edsl/questions/QuestionList.py +22 -141
edsl/questions/QuestionMultipleChoice.py +65 -159
edsl/questions/QuestionNumerical.py +46 -88
edsl/questions/QuestionRank.py +24 -182
edsl/questions/RegisterQuestionsMeta.py +12 -31
edsl/questions/__init__.py +4 -3
edsl/questions/derived/QuestionLikertFive.py +5 -10
edsl/questions/derived/QuestionLinearScale.py +2 -15
edsl/questions/derived/QuestionTopK.py +1 -10
edsl/questions/derived/QuestionYesNo.py +3 -24
edsl/questions/descriptors.py +7 -43
edsl/questions/question_registry.py +2 -6
edsl/results/Dataset.py +0 -20
edsl/results/DatasetExportMixin.py +48 -46
edsl/results/Result.py +5 -32
edsl/results/Results.py +46 -135
edsl/results/ResultsDBMixin.py +3 -3
edsl/scenarios/FileStore.py +10 -71
edsl/scenarios/Scenario.py +25 -96
edsl/scenarios/ScenarioImageMixin.py +2 -2
edsl/scenarios/ScenarioList.py +39 -361
edsl/scenarios/ScenarioListExportMixin.py +0 -9
edsl/scenarios/ScenarioListPdfMixin.py +4 -150
edsl/study/SnapShot.py +1 -8
edsl/study/Study.py +0 -32
edsl/surveys/Rule.py +1 -10
edsl/surveys/RuleCollection.py +5 -21
edsl/surveys/Survey.py +310 -636
edsl/surveys/SurveyExportMixin.py +9 -71
edsl/surveys/SurveyFlowVisualizationMixin.py +1 -2
edsl/surveys/SurveyQualtricsImport.py +4 -75
edsl/utilities/gcp_bucket/simple_example.py +9 -0
edsl/utilities/utilities.py +1 -9
{edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/METADATA +2 -5
edsl-0.1.33.dev1.dist-info/RECORD +209 -0
edsl/TemplateLoader.py +0 -24
edsl/auto/AutoStudy.py +0 -117
edsl/auto/StageBase.py +0 -230
edsl/auto/StageGenerateSurvey.py +0 -178
edsl/auto/StageLabelQuestions.py +0 -125
edsl/auto/StagePersona.py +0 -61
edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
edsl/auto/StagePersonaDimensionValues.py +0 -74
edsl/auto/StagePersonaDimensions.py +0 -69
edsl/auto/StageQuestions.py +0 -73
edsl/auto/SurveyCreatorPipeline.py +0 -21
edsl/auto/utilities.py +0 -224
edsl/coop/PriceFetcher.py +0 -58
edsl/inference_services/MistralAIService.py +0 -120
edsl/inference_services/TestService.py +0 -80
edsl/inference_services/TogetherAIService.py +0 -170
edsl/jobs/FailedQuestion.py +0 -78
edsl/jobs/runners/JobsRunnerStatus.py +0 -331
edsl/language_models/fake_openai_call.py +0 -15
edsl/language_models/fake_openai_service.py +0 -61
edsl/language_models/utilities.py +0 -61
edsl/questions/QuestionBaseGenMixin.py +0 -133
edsl/questions/QuestionBasePromptsMixin.py +0 -266
edsl/questions/Quick.py +0 -41
edsl/questions/ResponseValidatorABC.py +0 -170
edsl/questions/decorators.py +0 -21
edsl/questions/prompt_templates/question_budget.jinja +0 -13
edsl/questions/prompt_templates/question_checkbox.jinja +0 -32
edsl/questions/prompt_templates/question_extract.jinja +0 -11
edsl/questions/prompt_templates/question_free_text.jinja +0 -3
edsl/questions/prompt_templates/question_linear_scale.jinja +0 -11
edsl/questions/prompt_templates/question_list.jinja +0 -17
edsl/questions/prompt_templates/question_multiple_choice.jinja +0 -33
edsl/questions/prompt_templates/question_numerical.jinja +0 -37
edsl/questions/templates/__init__.py +0 -0
edsl/questions/templates/budget/__init__.py +0 -0
edsl/questions/templates/budget/answering_instructions.jinja +0 -7
edsl/questions/templates/budget/question_presentation.jinja +0 -7
edsl/questions/templates/checkbox/__init__.py +0 -0
edsl/questions/templates/checkbox/answering_instructions.jinja +0 -10
edsl/questions/templates/checkbox/question_presentation.jinja +0 -22
edsl/questions/templates/extract/__init__.py +0 -0
edsl/questions/templates/extract/answering_instructions.jinja +0 -7
edsl/questions/templates/extract/question_presentation.jinja +0 -1
edsl/questions/templates/free_text/__init__.py +0 -0
edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
edsl/questions/templates/free_text/question_presentation.jinja +0 -1
edsl/questions/templates/likert_five/__init__.py +0 -0
edsl/questions/templates/likert_five/answering_instructions.jinja +0 -10
edsl/questions/templates/likert_five/question_presentation.jinja +0 -12
edsl/questions/templates/linear_scale/__init__.py +0 -0
edsl/questions/templates/linear_scale/answering_instructions.jinja +0 -5
edsl/questions/templates/linear_scale/question_presentation.jinja +0 -5
edsl/questions/templates/list/__init__.py +0 -0
edsl/questions/templates/list/answering_instructions.jinja +0 -4
edsl/questions/templates/list/question_presentation.jinja +0 -5
edsl/questions/templates/multiple_choice/__init__.py +0 -0
edsl/questions/templates/multiple_choice/answering_instructions.jinja +0 -9
edsl/questions/templates/multiple_choice/html.jinja +0 -0
edsl/questions/templates/multiple_choice/question_presentation.jinja +0 -12
edsl/questions/templates/numerical/__init__.py +0 -0
edsl/questions/templates/numerical/answering_instructions.jinja +0 -8
edsl/questions/templates/numerical/question_presentation.jinja +0 -7
edsl/questions/templates/rank/__init__.py +0 -0
edsl/questions/templates/rank/answering_instructions.jinja +0 -11
edsl/questions/templates/rank/question_presentation.jinja +0 -15
edsl/questions/templates/top_k/__init__.py +0 -0
edsl/questions/templates/top_k/answering_instructions.jinja +0 -8
edsl/questions/templates/top_k/question_presentation.jinja +0 -22
edsl/questions/templates/yes_no/__init__.py +0 -0
edsl/questions/templates/yes_no/answering_instructions.jinja +0 -6
edsl/questions/templates/yes_no/question_presentation.jinja +0 -12
edsl/results/DatasetTree.py +0 -145
edsl/results/Selector.py +0 -118
edsl/results/tree_explore.py +0 -115
edsl/surveys/instructions/ChangeInstruction.py +0 -47
edsl/surveys/instructions/Instruction.py +0 -34
edsl/surveys/instructions/InstructionCollection.py +0 -77
edsl/surveys/instructions/__init__.py +0 -0
edsl/templates/error_reporting/base.html +0 -24
edsl/templates/error_reporting/exceptions_by_model.html +0 -35
edsl/templates/error_reporting/exceptions_by_question_name.html +0 -17
edsl/templates/error_reporting/exceptions_by_type.html +0 -17
edsl/templates/error_reporting/interview_details.html +0 -116
edsl/templates/error_reporting/interviews.html +0 -10
edsl/templates/error_reporting/overview.html +0 -5
edsl/templates/error_reporting/performance_plot.html +0 -2
edsl/templates/error_reporting/report.css +0 -74
edsl/templates/error_reporting/report.html +0 -118
edsl/templates/error_reporting/report.js +0 -25
edsl-0.1.33.dist-info/RECORD +0 -295
{edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/LICENSE +0 -0
{edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/WHEEL +0 -0

edsl/jobs/Jobs.py CHANGED Viewed

@@ -156,11 +156,7 @@ class Jobs(Base):
         from edsl.results.Dataset import Dataset
         for interview_index, interview in enumerate(interviews):
-            invigilators = [
-                interview._get_invigilator(question)
-                for question in self.survey.questions
-            ]
-            # list(interview._build_invigilators(debug=False))
+            invigilators = list(interview._build_invigilators(debug=False))
             for _, invigilator in enumerate(invigilators):
                 prompts = invigilator.get_prompts()
                 user_prompts.append(prompts["user_prompt"])
@@ -348,7 +344,6 @@ class Jobs(Base):
                 scenario=scenario,
                 model=model,
                 skip_retry=self.skip_retry,
-                raise_validation_errors=self.raise_validation_errors,
             )
     def create_bucket_collection(self) -> BucketCollection:
@@ -460,44 +455,33 @@ class Jobs(Base):
                 if warn:
                     warnings.warn(message)
-        if self.scenarios.has_jinja_braces:
-            warnings.warn(
-                "The scenarios have Jinja braces ({{ and }}). Converting to '<<' and '>>'. If you want a different conversion, use the convert_jinja_braces method first to modify the scenario."
-            )
-            self.scenarios = self.scenarios.convert_jinja_braces()
     @property
     def skip_retry(self):
         if not hasattr(self, "_skip_retry"):
             return False
         return self._skip_retry
-    @property
-    def raise_validation_errors(self):
-        if not hasattr(self, "_raise_validation_errors"):
-            return False
-        return self._raise_validation_errors
     def run(
         self,
         n: int = 1,
+        debug: bool = False,
         progress_bar: bool = False,
         stop_on_exception: bool = False,
         cache: Union[Cache, bool] = None,
         check_api_keys: bool = False,
         sidecar_model: Optional[LanguageModel] = None,
+        batch_mode: Optional[bool] = None,
         verbose: bool = False,
         print_exceptions=True,
         remote_cache_description: Optional[str] = None,
         remote_inference_description: Optional[str] = None,
         skip_retry: bool = False,
-        raise_validation_errors: bool = False,
-        disable_remote_inference: bool = False,
     ) -> Results:
         """
         Runs the Job: conducts Interviews and returns their results.
         :param n: how many times to run each interview
+        :param debug: prints debug messages
         :param progress_bar: shows a progress bar
         :param stop_on_exception: stops the job if an exception is raised
         :param cache: a cache object to store results
@@ -511,21 +495,22 @@ class Jobs(Base):
         self._check_parameters()
         self._skip_retry = skip_retry
-        self._raise_validation_errors = raise_validation_errors
-        self.verbose = verbose
+        if batch_mode is not None:
+            raise NotImplementedError(
+                "Batch mode is deprecated. Please update your code to not include 'batch_mode' in the 'run' method."
+            )
-        remote_cache = False
-        remote_inference = False
+        self.verbose = verbose
-        if not disable_remote_inference:
-            try:
-                coop = Coop()
-                user_edsl_settings = Coop().edsl_settings
-                remote_cache = user_edsl_settings.get("remote_caching", False)
-                remote_inference = user_edsl_settings.get("remote_inference", False)
-            except Exception:
-                pass
+        try:
+            coop = Coop()
+            user_edsl_settings = coop.edsl_settings
+            remote_cache = user_edsl_settings["remote_caching"]
+            remote_inference = user_edsl_settings["remote_inference"]
+        except Exception:
+            remote_cache = False
+            remote_inference = False
         if remote_inference:
             import time
@@ -602,7 +587,7 @@ class Jobs(Base):
                         )
         # handle cache
-        if cache is None or cache is True:
+        if cache is None:
             from edsl.data.CacheHandler import CacheHandler
             cache = CacheHandler().get_cache()
@@ -614,12 +599,12 @@ class Jobs(Base):
         if not remote_cache:
             results = self._run_local(
                 n=n,
+                debug=debug,
                 progress_bar=progress_bar,
                 cache=cache,
                 stop_on_exception=stop_on_exception,
                 sidecar_model=sidecar_model,
                 print_exceptions=print_exceptions,
-                raise_validation_errors=raise_validation_errors,
             )
             results.cache = cache.new_entries_cache()
@@ -658,12 +643,12 @@ class Jobs(Base):
             self._output("Running job...")
             results = self._run_local(
                 n=n,
+                debug=debug,
                 progress_bar=progress_bar,
                 cache=cache,
                 stop_on_exception=stop_on_exception,
                 sidecar_model=sidecar_model,
                 print_exceptions=print_exceptions,
-                raise_validation_errors=raise_validation_errors,
             )
             self._output("Job completed!")
@@ -898,7 +883,7 @@ def main():
     job = Jobs.example()
     len(job) == 8
-    results = job.run(cache=Cache())
+    results = job.run(debug=True, cache=Cache())
     len(results) == 8
     results

edsl/jobs/buckets/BucketCollection.py CHANGED Viewed

@@ -13,8 +13,6 @@ class BucketCollection(UserDict):
     def __init__(self, infinity_buckets=False):
         super().__init__()
         self.infinity_buckets = infinity_buckets
-        self.models_to_services = {}
-        self.services_to_buckets = {}
     def __repr__(self):
         return f"BucketCollection({self.data})"
@@ -23,7 +21,6 @@ class BucketCollection(UserDict):
         """Adds a model to the bucket collection.
         This will create the token and request buckets for the model."""
         # compute the TPS and RPS from the model
         if not self.infinity_buckets:
             TPS = model.TPM / 60.0
@@ -32,28 +29,22 @@ class BucketCollection(UserDict):
             TPS = float("inf")
             RPS = float("inf")
-        if model.model not in self.models_to_services:
-            service = model._inference_service_
-            if service not in self.services_to_buckets:
-                requests_bucket = TokenBucket(
-                    bucket_name=service,
-                    bucket_type="requests",
-                    capacity=RPS,
-                    refill_rate=RPS,
-                )
-                tokens_bucket = TokenBucket(
-                    bucket_name=service,
-                    bucket_type="tokens",
-                    capacity=TPS,
-                    refill_rate=TPS,
-                )
-                self.services_to_buckets[service] = ModelBuckets(
-                    requests_bucket, tokens_bucket
-                )
-            self.models_to_services[model.model] = service
-            self[model] = self.services_to_buckets[service]
+        # create the buckets
+        requests_bucket = TokenBucket(
+            bucket_name=model.model,
+            bucket_type="requests",
+            capacity=RPS,
+            refill_rate=RPS,
+        )
+        tokens_bucket = TokenBucket(
+            bucket_name=model.model, bucket_type="tokens", capacity=TPS, refill_rate=TPS
+        )
+        model_buckets = ModelBuckets(requests_bucket, tokens_bucket)
+        if model in self:
+            # it if already exists, combine the buckets
+            self[model] += model_buckets
         else:
-            self[model] = self.services_to_buckets[self.models_to_services[model.model]]
+            self[model] = model_buckets
     def visualize(self) -> dict:
         """Visualize the token and request buckets for each model."""

edsl/jobs/buckets/TokenBucket.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Union, List, Any, Optional
+from typing import Union, List, Any
 import asyncio
 import time
@@ -17,12 +17,6 @@ class TokenBucket:
         self.bucket_name = bucket_name
         self.bucket_type = bucket_type
         self.capacity = capacity  # Maximum number of tokens
-        self.added_tokens = 0
-        self.target_rate = (
-            capacity * 60
-        )  # set this here because it can change with turbo mode
         self._old_capacity = capacity
         self.tokens = capacity  # Current number of available tokens
         self.refill_rate = refill_rate  # Rate at which tokens are refilled
@@ -31,12 +25,6 @@ class TokenBucket:
         self.log: List[Any] = []
         self.turbo_mode = False
-        self.creation_time = time.monotonic()
-        self.num_requests = 0
-        self.num_released = 0
-        self.tokens_returned = 0
     def turbo_mode_on(self):
         """Set the refill rate to infinity."""
         if self.turbo_mode:
@@ -81,7 +69,6 @@ class TokenBucket:
         >>> bucket.tokens
         10
         """
-        self.tokens_returned += tokens
         self.tokens = min(self.capacity, self.tokens + tokens)
         self.log.append((time.monotonic(), self.tokens))
@@ -95,30 +82,23 @@ class TokenBucket:
         >>> bucket.refill()
         >>> bucket.tokens > 0
         True
         """
-        """Refill the bucket with new tokens based on elapsed time."""
         now = time.monotonic()
-        # print(f"Time is now: {now}; Last refill time: {self.last_refill}")
         elapsed = now - self.last_refill
-        # print("Elapsed time: ", elapsed)
         refill_amount = elapsed * self.refill_rate
         self.tokens = min(self.capacity, self.tokens + refill_amount)
         self.last_refill = now
-        if self.tokens < self.capacity:
-            pass
-            # print(f"Refilled. Current tokens: {self.tokens:.4f}")
-            # print(f"Elapsed time: {elapsed:.4f} seconds")
-            # print(f"Refill amount: {refill_amount:.4f}")
         self.log.append((now, self.tokens))
     def wait_time(self, requested_tokens: Union[float, int]) -> float:
         """Calculate the time to wait for the requested number of tokens."""
-        # self.refill()  # Update the current token count
-        if self.tokens >= requested_tokens:
-            return 0
-        return (requested_tokens - self.tokens) / self.refill_rate
+        now = time.monotonic()
+        elapsed = now - self.last_refill
+        refill_amount = elapsed * self.refill_rate
+        available_tokens = min(self.capacity, self.tokens + refill_amount)
+        return max(0, requested_tokens - available_tokens) / self.refill_rate
     async def get_tokens(
         self, amount: Union[int, float] = 1, cheat_bucket_capacity=True
@@ -143,33 +123,22 @@ class TokenBucket:
         ...
         ValueError: Requested amount exceeds bucket capacity. Bucket capacity: 10, requested amount: 11. As the bucket never overflows, the requested amount will never be available.
         >>> asyncio.run(bucket.get_tokens(11, cheat_bucket_capacity=True))
-        >>> bucket.capacity
-        12.100000000000001
         """
-        self.num_requests += amount
-        if amount >= self.capacity:
+        if amount > self.capacity:
             if not cheat_bucket_capacity:
                 msg = f"Requested amount exceeds bucket capacity. Bucket capacity: {self.capacity}, requested amount: {amount}. As the bucket never overflows, the requested amount will never be available."
                 raise ValueError(msg)
             else:
-                self.capacity = amount * 1.10
-                self._old_capacity = self.capacity
+                self.tokens = 0  # clear the bucket but let it go through
+                return
-        start_time = time.monotonic()
-        while True:
-            self.refill()  # Refill based on elapsed time
-            if self.tokens >= amount:
-                self.tokens -= amount
-                break
+        while self.tokens < amount:
+            self.refill()
+            await asyncio.sleep(0.01)  # Sleep briefly to prevent busy waiting
+        self.tokens -= amount
-            wait_time = self.wait_time(amount)
-            if wait_time > 0:
-                await asyncio.sleep(wait_time)
-        self.num_released += amount
         now = time.monotonic()
         self.log.append((now, self.tokens))
-        return None
     def get_log(self) -> list[tuple]:
         return self.log
@@ -193,54 +162,6 @@ class TokenBucket:
         plt.tight_layout()
         plt.show()
-    def get_throughput(self, time_window: Optional[float] = None) -> float:
-        """
-        Calculate the empirical bucket throughput in tokens per minute for the specified time window.
-        :param time_window: The time window in seconds to calculate the throughput for.
-        :return: The throughput in tokens per minute.
-        >>> bucket = TokenBucket(bucket_name="test", bucket_type="test", capacity=100, refill_rate=10)
-        >>> asyncio.run(bucket.get_tokens(50))
-        >>> time.sleep(1)  # Wait for 1 second
-        >>> asyncio.run(bucket.get_tokens(30))
-        >>> throughput = bucket.get_throughput(1)
-        >>> 4750 < throughput < 4850
-        True
-        """
-        now = time.monotonic()
-        if time_window is None:
-            start_time = self.creation_time
-        else:
-            start_time = now - time_window
-        if start_time < self.creation_time:
-            start_time = self.creation_time
-        elapsed_time = now - start_time
-        return (self.num_released / elapsed_time) * 60
-        # # Filter log entries within the time window
-        # relevant_log = [(t, tokens) for t, tokens in self.log if t >= start_time]
-        # if len(relevant_log) < 2:
-        #     return 0  # Not enough data points to calculate throughput
-        # # Calculate total tokens used
-        # initial_tokens = relevant_log[0][1]
-        # final_tokens = relevant_log[-1][1]
-        # tokens_used = self.num_released - (final_tokens - initial_tokens)
-        # # Calculate actual time elapsed
-        # actual_time_elapsed = relevant_log[-1][0] - relevant_log[0][0]
-        # # Calculate throughput in tokens per minute
-        # throughput = (tokens_used / actual_time_elapsed) * 60
-        # return throughput
 if __name__ == "__main__":
     import doctest

edsl 0.1.33__py3-none-any.whl → 0.1.33.dev1__py3-none-any.whl

edsl 0.1.33py3-none-any.whl → 0.1.33.dev1py3-none-any.whl