PyPI - edsl - Versions diffs - 0.1.38.dev4__py3-none-any.whl → 0.1.39__py3-none-any.whl - Mend

edsl 0.1.38.dev4py3-none-any.whl → 0.1.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (212) hide show

edsl/Base.py +197 -116
edsl/__init__.py +15 -7
edsl/__version__.py +1 -1
edsl/agents/Agent.py +351 -147
edsl/agents/AgentList.py +211 -73
edsl/agents/Invigilator.py +101 -50
edsl/agents/InvigilatorBase.py +62 -70
edsl/agents/PromptConstructor.py +143 -225
edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
edsl/agents/__init__.py +0 -1
edsl/agents/prompt_helpers.py +3 -3
edsl/agents/question_option_processor.py +172 -0
edsl/auto/AutoStudy.py +18 -5
edsl/auto/StageBase.py +53 -40
edsl/auto/StageQuestions.py +2 -1
edsl/auto/utilities.py +0 -6
edsl/config.py +22 -2
edsl/conversation/car_buying.py +2 -1
edsl/coop/CoopFunctionsMixin.py +15 -0
edsl/coop/ExpectedParrotKeyHandler.py +125 -0
edsl/coop/PriceFetcher.py +1 -1
edsl/coop/coop.py +125 -47
edsl/coop/utils.py +14 -14
edsl/data/Cache.py +45 -27
edsl/data/CacheEntry.py +12 -15
edsl/data/CacheHandler.py +31 -12
edsl/data/RemoteCacheSync.py +154 -46
edsl/data/__init__.py +4 -3
edsl/data_transfer_models.py +2 -1
edsl/enums.py +27 -0
edsl/exceptions/__init__.py +50 -50
edsl/exceptions/agents.py +12 -0
edsl/exceptions/inference_services.py +5 -0
edsl/exceptions/questions.py +24 -6
edsl/exceptions/scenarios.py +7 -0
edsl/inference_services/AnthropicService.py +38 -19
edsl/inference_services/AvailableModelCacheHandler.py +184 -0
edsl/inference_services/AvailableModelFetcher.py +215 -0
edsl/inference_services/AwsBedrock.py +0 -2
edsl/inference_services/AzureAI.py +0 -2
edsl/inference_services/GoogleService.py +7 -12
edsl/inference_services/InferenceServiceABC.py +18 -85
edsl/inference_services/InferenceServicesCollection.py +120 -79
edsl/inference_services/MistralAIService.py +0 -3
edsl/inference_services/OpenAIService.py +47 -35
edsl/inference_services/PerplexityService.py +0 -3
edsl/inference_services/ServiceAvailability.py +135 -0
edsl/inference_services/TestService.py +11 -10
edsl/inference_services/TogetherAIService.py +5 -3
edsl/inference_services/data_structures.py +134 -0
edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
edsl/jobs/Answers.py +1 -14
edsl/jobs/FetchInvigilator.py +47 -0
edsl/jobs/InterviewTaskManager.py +98 -0
edsl/jobs/InterviewsConstructor.py +50 -0
edsl/jobs/Jobs.py +356 -431
edsl/jobs/JobsChecks.py +35 -10
edsl/jobs/JobsComponentConstructor.py +189 -0
edsl/jobs/JobsPrompts.py +6 -4
edsl/jobs/JobsRemoteInferenceHandler.py +205 -133
edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
edsl/jobs/RequestTokenEstimator.py +30 -0
edsl/jobs/async_interview_runner.py +138 -0
edsl/jobs/buckets/BucketCollection.py +44 -3
edsl/jobs/buckets/TokenBucket.py +53 -21
edsl/jobs/buckets/TokenBucketAPI.py +211 -0
edsl/jobs/buckets/TokenBucketClient.py +191 -0
edsl/jobs/check_survey_scenario_compatibility.py +85 -0
edsl/jobs/data_structures.py +120 -0
edsl/jobs/decorators.py +35 -0
edsl/jobs/interviews/Interview.py +143 -408
edsl/jobs/jobs_status_enums.py +9 -0
edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
edsl/jobs/results_exceptions_handler.py +98 -0
edsl/jobs/runners/JobsRunnerAsyncio.py +88 -403
edsl/jobs/runners/JobsRunnerStatus.py +133 -165
edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
edsl/jobs/tasks/TaskHistory.py +38 -18
edsl/jobs/tasks/task_status_enum.py +0 -2
edsl/language_models/ComputeCost.py +63 -0
edsl/language_models/LanguageModel.py +194 -236
edsl/language_models/ModelList.py +28 -19
edsl/language_models/PriceManager.py +127 -0
edsl/language_models/RawResponseHandler.py +106 -0
edsl/language_models/ServiceDataSources.py +0 -0
edsl/language_models/__init__.py +1 -2
edsl/language_models/key_management/KeyLookup.py +63 -0
edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
edsl/language_models/key_management/KeyLookupCollection.py +38 -0
edsl/language_models/key_management/__init__.py +0 -0
edsl/language_models/key_management/models.py +131 -0
edsl/language_models/model.py +256 -0
edsl/language_models/repair.py +2 -2
edsl/language_models/utilities.py +5 -4
edsl/notebooks/Notebook.py +19 -14
edsl/notebooks/NotebookToLaTeX.py +142 -0
edsl/prompts/Prompt.py +29 -39
edsl/questions/ExceptionExplainer.py +77 -0
edsl/questions/HTMLQuestion.py +103 -0
edsl/questions/QuestionBase.py +68 -214
edsl/questions/QuestionBasePromptsMixin.py +7 -3
edsl/questions/QuestionBudget.py +1 -1
edsl/questions/QuestionCheckBox.py +3 -3
edsl/questions/QuestionExtract.py +5 -7
edsl/questions/QuestionFreeText.py +2 -3
edsl/questions/QuestionList.py +10 -18
edsl/questions/QuestionMatrix.py +265 -0
edsl/questions/QuestionMultipleChoice.py +67 -23
edsl/questions/QuestionNumerical.py +2 -4
edsl/questions/QuestionRank.py +7 -17
edsl/questions/SimpleAskMixin.py +4 -3
edsl/questions/__init__.py +2 -1
edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +47 -2
edsl/questions/data_structures.py +20 -0
edsl/questions/derived/QuestionLinearScale.py +6 -3
edsl/questions/derived/QuestionTopK.py +1 -1
edsl/questions/descriptors.py +17 -3
edsl/questions/loop_processor.py +149 -0
edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +57 -50
edsl/questions/question_registry.py +1 -1
edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +40 -26
edsl/questions/response_validator_factory.py +34 -0
edsl/questions/templates/matrix/__init__.py +1 -0
edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
edsl/questions/templates/matrix/question_presentation.jinja +20 -0
edsl/results/CSSParameterizer.py +1 -1
edsl/results/Dataset.py +170 -7
edsl/results/DatasetExportMixin.py +168 -305
edsl/results/DatasetTree.py +28 -8
edsl/results/MarkdownToDocx.py +122 -0
edsl/results/MarkdownToPDF.py +111 -0
edsl/results/Result.py +298 -206
edsl/results/Results.py +149 -131
edsl/results/ResultsExportMixin.py +2 -0
edsl/results/TableDisplay.py +98 -171
edsl/results/TextEditor.py +50 -0
edsl/results/__init__.py +1 -1
edsl/results/file_exports.py +252 -0
edsl/results/{Selector.py → results_selector.py} +23 -13
edsl/results/smart_objects.py +96 -0
edsl/results/table_data_class.py +12 -0
edsl/results/table_renderers.py +118 -0
edsl/scenarios/ConstructDownloadLink.py +109 -0
edsl/scenarios/DocumentChunker.py +102 -0
edsl/scenarios/DocxScenario.py +16 -0
edsl/scenarios/FileStore.py +150 -239
edsl/scenarios/PdfExtractor.py +40 -0
edsl/scenarios/Scenario.py +90 -193
edsl/scenarios/ScenarioHtmlMixin.py +4 -3
edsl/scenarios/ScenarioList.py +415 -244
edsl/scenarios/ScenarioListExportMixin.py +0 -7
edsl/scenarios/ScenarioListPdfMixin.py +15 -37
edsl/scenarios/__init__.py +1 -2
edsl/scenarios/directory_scanner.py +96 -0
edsl/scenarios/file_methods.py +85 -0
edsl/scenarios/handlers/__init__.py +13 -0
edsl/scenarios/handlers/csv.py +49 -0
edsl/scenarios/handlers/docx.py +76 -0
edsl/scenarios/handlers/html.py +37 -0
edsl/scenarios/handlers/json.py +111 -0
edsl/scenarios/handlers/latex.py +5 -0
edsl/scenarios/handlers/md.py +51 -0
edsl/scenarios/handlers/pdf.py +68 -0
edsl/scenarios/handlers/png.py +39 -0
edsl/scenarios/handlers/pptx.py +105 -0
edsl/scenarios/handlers/py.py +294 -0
edsl/scenarios/handlers/sql.py +313 -0
edsl/scenarios/handlers/sqlite.py +149 -0
edsl/scenarios/handlers/txt.py +33 -0
edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +10 -6
edsl/scenarios/scenario_selector.py +156 -0
edsl/study/ObjectEntry.py +1 -1
edsl/study/SnapShot.py +1 -1
edsl/study/Study.py +5 -12
edsl/surveys/ConstructDAG.py +92 -0
edsl/surveys/EditSurvey.py +221 -0
edsl/surveys/InstructionHandler.py +100 -0
edsl/surveys/MemoryManagement.py +72 -0
edsl/surveys/Rule.py +5 -4
edsl/surveys/RuleCollection.py +25 -27
edsl/surveys/RuleManager.py +172 -0
edsl/surveys/Simulator.py +75 -0
edsl/surveys/Survey.py +270 -791
edsl/surveys/SurveyCSS.py +20 -8
edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
edsl/surveys/SurveyToApp.py +141 -0
edsl/surveys/__init__.py +4 -2
edsl/surveys/descriptors.py +6 -2
edsl/surveys/instructions/ChangeInstruction.py +1 -2
edsl/surveys/instructions/Instruction.py +4 -13
edsl/surveys/instructions/InstructionCollection.py +11 -6
edsl/templates/error_reporting/interview_details.html +1 -1
edsl/templates/error_reporting/report.html +1 -1
edsl/tools/plotting.py +1 -1
edsl/utilities/PrettyList.py +56 -0
edsl/utilities/is_notebook.py +18 -0
edsl/utilities/is_valid_variable_name.py +11 -0
edsl/utilities/remove_edsl_version.py +24 -0
edsl/utilities/utilities.py +35 -23
{edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/METADATA +12 -10
edsl-0.1.39.dist-info/RECORD +358 -0
{edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/WHEEL +1 -1
edsl/language_models/KeyLookup.py +0 -30
edsl/language_models/registry.py +0 -190
edsl/language_models/unused/ReplicateBase.py +0 -83
edsl/results/ResultsDBMixin.py +0 -238
edsl-0.1.38.dev4.dist-info/RECORD +0 -277
/edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +0 -0
/edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +0 -0
/edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +0 -0
{edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/LICENSE +0 -0

edsl/results/Dataset.py CHANGED Viewed

@@ -1,19 +1,22 @@
 """A module to represent a dataset of observations."""
 from __future__ import annotations
-import random
+import sys
 import json
+import random
 from collections import UserList
 from typing import Any, Union, Optional
-import sys
-import numpy as np
 from edsl.results.ResultsExportMixin import ResultsExportMixin
 from edsl.results.DatasetTree import Tree
 from edsl.results.TableDisplay import TableDisplay
+from edsl.Base import PersistenceMixin, HashingMixin
-class Dataset(UserList, ResultsExportMixin):
+from edsl.results.smart_objects import FirstObject
+class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
     """A class to represent a dataset of observations."""
     def __init__(
@@ -36,6 +39,46 @@ class Dataset(UserList, ResultsExportMixin):
         _, values = list(self.data[0].items())[0]
         return len(values)
+    def tail(self, n: int = 5) -> Dataset:
+        """Return the last n observations in the dataset.
+        >>> d = Dataset([{'a.b':[1,2,3,4]}])
+        >>> d.tail(2)
+        Dataset([{'a.b': [3, 4]}])
+        """
+        new_data = []
+        for observation in self.data:
+            key, values = list(observation.items())[0]
+            new_data.append({key: values[-n:]})
+        return Dataset(new_data)
+    def head(self, n: int = 5) -> Dataset:
+        """Return the first n observations in the dataset.
+        >>> d = Dataset([{'a.b':[1,2,3,4]}])
+        >>> d.head(2)
+        Dataset([{'a.b': [1, 2]}])
+        """
+        new_data = []
+        for observation in self.data:
+            key, values = list(observation.items())[0]
+            new_data.append({key: values[:n]})
+        return Dataset(new_data)
+    def expand(self, field):
+        return self.to_scenario_list().expand(field)
+    def view(self):
+        from perspective.widget import PerspectiveWidget
+        w = PerspectiveWidget(
+            self.to_pandas(),
+            plugin="Datagrid",
+            aggregates={"datetime": "any"},
+            sort=[["date", "desc"]],
+        )
+        return w
     def keys(self) -> list[str]:
         """Return the keys of the first observation in the dataset.
@@ -48,6 +91,79 @@ class Dataset(UserList, ResultsExportMixin):
     def filter(self, expression):
         return self.to_scenario_list().filter(expression).to_dataset()
+    def long(self, exclude_fields: list[str] = None) -> Dataset:
+        headers, data = self._tabular()
+        exclude_fields = exclude_fields or []
+        # Initialize result dictionaries for each column
+        result_dict = {}
+        for index, row in enumerate(data):
+            row_values = dict(zip(headers, row))
+            excluded_values = {field: row_values[field] for field in exclude_fields}
+            # Transform non-excluded fields to long format
+            for header, value in row_values.items():
+                if header not in exclude_fields:
+                    # Initialize lists in result_dict if needed
+                    if not result_dict:
+                        result_dict = {
+                            "row": [],
+                            "key": [],
+                            "value": [],
+                            **{field: [] for field in exclude_fields},
+                        }
+                    # Add values to each column
+                    result_dict["row"].append(index)
+                    result_dict["key"].append(header)
+                    result_dict["value"].append(value)
+                    for field in exclude_fields:
+                        result_dict[field].append(excluded_values[field])
+        return Dataset([{k: v} for k, v in result_dict.items()])
+    def wide(self) -> "Dataset":
+        """
+        Convert a long-format dataset (with row, key, value columns) to wide format.
+        Expected input format:
+        - A dataset with three columns containing dictionaries:
+          - row: list of row indices
+          - key: list of column names
+          - value: list of values
+        Returns:
+        - Dataset: A new dataset with columns corresponding to unique keys
+        """
+        # Extract the component arrays
+        row_dict = next(col for col in self if "row" in col)
+        key_dict = next(col for col in self if "key" in col)
+        value_dict = next(col for col in self if "value" in col)
+        rows = row_dict["row"]
+        keys = key_dict["key"]
+        values = value_dict["value"]
+        if not (len(rows) == len(keys) == len(values)):
+            raise ValueError("All input arrays must have the same length")
+        # Get unique keys and row indices
+        unique_keys = sorted(set(keys))
+        unique_rows = sorted(set(rows))
+        # Create a dictionary to store the result
+        result = {key: [None] * len(unique_rows) for key in unique_keys}
+        # Populate the result dictionary
+        for row_idx, key, value in zip(rows, keys, values):
+            # Find the position in the output array for this row
+            output_row_idx = unique_rows.index(row_idx)
+            result[key][output_row_idx] = value
+        # Convert to list of column dictionaries format
+        return Dataset([{key: values} for key, values in result.items()])
     def __repr__(self) -> str:
         """Return a string representation of the dataset."""
         return f"Dataset({self.data})"
@@ -126,7 +242,21 @@ class Dataset(UserList, ResultsExportMixin):
             """Get the values of the first key in the dictionary."""
             return list(d.values())[0]
-        return get_values(self.data[0])[0]
+        return FirstObject(get_values(self.data[0])[0])
+    def latex(self, **kwargs):
+        return self.table().latex()
+    def remove_prefix(self) -> Dataset:
+        new_data = []
+        for observation in self.data:
+            key, values = list(observation.items())[0]
+            if "." in key:
+                new_key = key.split(".")[1]
+                new_data.append({new_key: values})
+            else:
+                new_data.append({key: values})
+        return Dataset(new_data)
     def print(self, pretty_labels=None, **kwargs):
         if "format" in kwargs:
@@ -146,6 +276,25 @@ class Dataset(UserList, ResultsExportMixin):
             new_data.append({new_key: values})
         return Dataset(new_data)
+    def merge(self, other: Dataset, by_x, by_y) -> Dataset:
+        """Merge the dataset with another dataset on the given keys.""
+        merged_df = df1.merge(df2, how="left", on=["key1", "key2"])
+        """
+        df1 = self.to_pandas()
+        df2 = other.to_pandas()
+        merged_df = df1.merge(df2, how="left", left_on=by_x, right_on=by_y)
+        return Dataset.from_pandas_dataframe(merged_df)
+    def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "Jobs":
+        from edsl.surveys.Survey import Survey
+        from edsl.questions.QuestionBase import QuestionBase
+        if isinstance(survey_or_question, Survey):
+            return survey_or_question.by(self.to_scenario_list())
+        elif isinstance(survey_or_question, QuestionBase):
+            return Survey([survey_or_question]).by(self.to_scenario_list())
     def select(self, *keys) -> Dataset:
         """Return a new dataset with only the selected keys.
@@ -281,6 +430,7 @@ class Dataset(UserList, ResultsExportMixin):
         """
+        import numpy as np
         def sort_indices(lst: list[Any]) -> list[int]:
             """
@@ -409,13 +559,26 @@ class Dataset(UserList, ResultsExportMixin):
         return Dataset([{"num_observations": [len(self)], "keys": [self.keys()]}])
     @classmethod
-    def example(self):
+    def example(self, n: int = None):
         """Return an example dataset.
         >>> Dataset.example()
         Dataset([{'a': [1, 2, 3, 4]}, {'b': [4, 3, 2, 1]}])
         """
-        return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
+        if n is None:
+            return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
+        else:
+            return Dataset([{"a": [1] * n}, {"b": [2] * n}])
+    @classmethod
+    def from_edsl_object(cls, object):
+        d = object.to_dict(add_edsl_version=False)
+        return cls([{"key": list(d.keys())}, {"value": list(d.values())}])
+    @classmethod
+    def from_pandas_dataframe(cls, df):
+        result = cls([{col: df[col].tolist()} for col in df.columns])
+        return result
 if __name__ == "__main__":

edsl 0.1.38.dev4__py3-none-any.whl → 0.1.39__py3-none-any.whl

edsl 0.1.38.dev4py3-none-any.whl → 0.1.39py3-none-any.whl