PyPI - edsl - Versions diffs - 0.1.39__py3-none-any.whl → 0.1.39.dev1__py3-none-any.whl - Mend

edsl 0.1.39py3-none-any.whl → 0.1.39.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (212) hide show

edsl/Base.py +116 -197
edsl/__init__.py +7 -15
edsl/__version__.py +1 -1
edsl/agents/Agent.py +147 -351
edsl/agents/AgentList.py +73 -211
edsl/agents/Invigilator.py +50 -101
edsl/agents/InvigilatorBase.py +70 -62
edsl/agents/PromptConstructor.py +225 -143
edsl/agents/__init__.py +1 -0
edsl/agents/prompt_helpers.py +3 -3
edsl/auto/AutoStudy.py +5 -18
edsl/auto/StageBase.py +40 -53
edsl/auto/StageQuestions.py +1 -2
edsl/auto/utilities.py +6 -0
edsl/config.py +2 -22
edsl/conversation/car_buying.py +1 -2
edsl/coop/PriceFetcher.py +1 -1
edsl/coop/coop.py +47 -125
edsl/coop/utils.py +14 -14
edsl/data/Cache.py +27 -45
edsl/data/CacheEntry.py +15 -12
edsl/data/CacheHandler.py +12 -31
edsl/data/RemoteCacheSync.py +46 -154
edsl/data/__init__.py +3 -4
edsl/data_transfer_models.py +1 -2
edsl/enums.py +0 -27
edsl/exceptions/__init__.py +50 -50
edsl/exceptions/agents.py +0 -12
edsl/exceptions/questions.py +6 -24
edsl/exceptions/scenarios.py +0 -7
edsl/inference_services/AnthropicService.py +19 -38
edsl/inference_services/AwsBedrock.py +2 -0
edsl/inference_services/AzureAI.py +2 -0
edsl/inference_services/GoogleService.py +12 -7
edsl/inference_services/InferenceServiceABC.py +85 -18
edsl/inference_services/InferenceServicesCollection.py +79 -120
edsl/inference_services/MistralAIService.py +3 -0
edsl/inference_services/OpenAIService.py +35 -47
edsl/inference_services/PerplexityService.py +3 -0
edsl/inference_services/TestService.py +10 -11
edsl/inference_services/TogetherAIService.py +3 -5
edsl/jobs/Answers.py +14 -1
edsl/jobs/Jobs.py +431 -356
edsl/jobs/JobsChecks.py +10 -35
edsl/jobs/JobsPrompts.py +4 -6
edsl/jobs/JobsRemoteInferenceHandler.py +133 -205
edsl/jobs/buckets/BucketCollection.py +3 -44
edsl/jobs/buckets/TokenBucket.py +21 -53
edsl/jobs/interviews/Interview.py +408 -143
edsl/jobs/runners/JobsRunnerAsyncio.py +403 -88
edsl/jobs/runners/JobsRunnerStatus.py +165 -133
edsl/jobs/tasks/QuestionTaskCreator.py +19 -21
edsl/jobs/tasks/TaskHistory.py +18 -38
edsl/jobs/tasks/task_status_enum.py +2 -0
edsl/language_models/KeyLookup.py +30 -0
edsl/language_models/LanguageModel.py +236 -194
edsl/language_models/ModelList.py +19 -28
edsl/language_models/__init__.py +2 -1
edsl/language_models/registry.py +190 -0
edsl/language_models/repair.py +2 -2
edsl/language_models/unused/ReplicateBase.py +83 -0
edsl/language_models/utilities.py +4 -5
edsl/notebooks/Notebook.py +14 -19
edsl/prompts/Prompt.py +39 -29
edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +2 -47
edsl/questions/QuestionBase.py +214 -68
edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +50 -57
edsl/questions/QuestionBasePromptsMixin.py +3 -7
edsl/questions/QuestionBudget.py +1 -1
edsl/questions/QuestionCheckBox.py +3 -3
edsl/questions/QuestionExtract.py +7 -5
edsl/questions/QuestionFreeText.py +3 -2
edsl/questions/QuestionList.py +18 -10
edsl/questions/QuestionMultipleChoice.py +23 -67
edsl/questions/QuestionNumerical.py +4 -2
edsl/questions/QuestionRank.py +17 -7
edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +26 -40
edsl/questions/SimpleAskMixin.py +3 -4
edsl/questions/__init__.py +1 -2
edsl/questions/derived/QuestionLinearScale.py +3 -6
edsl/questions/derived/QuestionTopK.py +1 -1
edsl/questions/descriptors.py +3 -17
edsl/questions/question_registry.py +1 -1
edsl/results/CSSParameterizer.py +1 -1
edsl/results/Dataset.py +7 -170
edsl/results/DatasetExportMixin.py +305 -168
edsl/results/DatasetTree.py +8 -28
edsl/results/Result.py +206 -298
edsl/results/Results.py +131 -149
edsl/results/ResultsDBMixin.py +238 -0
edsl/results/ResultsExportMixin.py +0 -2
edsl/results/{results_selector.py → Selector.py} +13 -23
edsl/results/TableDisplay.py +171 -98
edsl/results/__init__.py +1 -1
edsl/scenarios/FileStore.py +239 -150
edsl/scenarios/Scenario.py +193 -90
edsl/scenarios/ScenarioHtmlMixin.py +3 -4
edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +6 -10
edsl/scenarios/ScenarioList.py +244 -415
edsl/scenarios/ScenarioListExportMixin.py +7 -0
edsl/scenarios/ScenarioListPdfMixin.py +37 -15
edsl/scenarios/__init__.py +2 -1
edsl/study/ObjectEntry.py +1 -1
edsl/study/SnapShot.py +1 -1
edsl/study/Study.py +12 -5
edsl/surveys/Rule.py +4 -5
edsl/surveys/RuleCollection.py +27 -25
edsl/surveys/Survey.py +791 -270
edsl/surveys/SurveyCSS.py +8 -20
edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +9 -11
edsl/surveys/__init__.py +2 -4
edsl/surveys/descriptors.py +2 -6
edsl/surveys/instructions/ChangeInstruction.py +2 -1
edsl/surveys/instructions/Instruction.py +13 -4
edsl/surveys/instructions/InstructionCollection.py +6 -11
edsl/templates/error_reporting/interview_details.html +1 -1
edsl/templates/error_reporting/report.html +1 -1
edsl/tools/plotting.py +1 -1
edsl/utilities/utilities.py +23 -35
{edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/METADATA +10 -12
edsl-0.1.39.dev1.dist-info/RECORD +277 -0
{edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/WHEEL +1 -1
edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
edsl/agents/question_option_processor.py +0 -172
edsl/coop/CoopFunctionsMixin.py +0 -15
edsl/coop/ExpectedParrotKeyHandler.py +0 -125
edsl/exceptions/inference_services.py +0 -5
edsl/inference_services/AvailableModelCacheHandler.py +0 -184
edsl/inference_services/AvailableModelFetcher.py +0 -215
edsl/inference_services/ServiceAvailability.py +0 -135
edsl/inference_services/data_structures.py +0 -134
edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -223
edsl/jobs/FetchInvigilator.py +0 -47
edsl/jobs/InterviewTaskManager.py +0 -98
edsl/jobs/InterviewsConstructor.py +0 -50
edsl/jobs/JobsComponentConstructor.py +0 -189
edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
edsl/jobs/RequestTokenEstimator.py +0 -30
edsl/jobs/async_interview_runner.py +0 -138
edsl/jobs/buckets/TokenBucketAPI.py +0 -211
edsl/jobs/buckets/TokenBucketClient.py +0 -191
edsl/jobs/check_survey_scenario_compatibility.py +0 -85
edsl/jobs/data_structures.py +0 -120
edsl/jobs/decorators.py +0 -35
edsl/jobs/jobs_status_enums.py +0 -9
edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
edsl/jobs/results_exceptions_handler.py +0 -98
edsl/language_models/ComputeCost.py +0 -63
edsl/language_models/PriceManager.py +0 -127
edsl/language_models/RawResponseHandler.py +0 -106
edsl/language_models/ServiceDataSources.py +0 -0
edsl/language_models/key_management/KeyLookup.py +0 -63
edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
edsl/language_models/key_management/KeyLookupCollection.py +0 -38
edsl/language_models/key_management/__init__.py +0 -0
edsl/language_models/key_management/models.py +0 -131
edsl/language_models/model.py +0 -256
edsl/notebooks/NotebookToLaTeX.py +0 -142
edsl/questions/ExceptionExplainer.py +0 -77
edsl/questions/HTMLQuestion.py +0 -103
edsl/questions/QuestionMatrix.py +0 -265
edsl/questions/data_structures.py +0 -20
edsl/questions/loop_processor.py +0 -149
edsl/questions/response_validator_factory.py +0 -34
edsl/questions/templates/matrix/__init__.py +0 -1
edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
edsl/questions/templates/matrix/question_presentation.jinja +0 -20
edsl/results/MarkdownToDocx.py +0 -122
edsl/results/MarkdownToPDF.py +0 -111
edsl/results/TextEditor.py +0 -50
edsl/results/file_exports.py +0 -252
edsl/results/smart_objects.py +0 -96
edsl/results/table_data_class.py +0 -12
edsl/results/table_renderers.py +0 -118
edsl/scenarios/ConstructDownloadLink.py +0 -109
edsl/scenarios/DocumentChunker.py +0 -102
edsl/scenarios/DocxScenario.py +0 -16
edsl/scenarios/PdfExtractor.py +0 -40
edsl/scenarios/directory_scanner.py +0 -96
edsl/scenarios/file_methods.py +0 -85
edsl/scenarios/handlers/__init__.py +0 -13
edsl/scenarios/handlers/csv.py +0 -49
edsl/scenarios/handlers/docx.py +0 -76
edsl/scenarios/handlers/html.py +0 -37
edsl/scenarios/handlers/json.py +0 -111
edsl/scenarios/handlers/latex.py +0 -5
edsl/scenarios/handlers/md.py +0 -51
edsl/scenarios/handlers/pdf.py +0 -68
edsl/scenarios/handlers/png.py +0 -39
edsl/scenarios/handlers/pptx.py +0 -105
edsl/scenarios/handlers/py.py +0 -294
edsl/scenarios/handlers/sql.py +0 -313
edsl/scenarios/handlers/sqlite.py +0 -149
edsl/scenarios/handlers/txt.py +0 -33
edsl/scenarios/scenario_selector.py +0 -156
edsl/surveys/ConstructDAG.py +0 -92
edsl/surveys/EditSurvey.py +0 -221
edsl/surveys/InstructionHandler.py +0 -100
edsl/surveys/MemoryManagement.py +0 -72
edsl/surveys/RuleManager.py +0 -172
edsl/surveys/Simulator.py +0 -75
edsl/surveys/SurveyToApp.py +0 -141
edsl/utilities/PrettyList.py +0 -56
edsl/utilities/is_notebook.py +0 -18
edsl/utilities/is_valid_variable_name.py +0 -11
edsl/utilities/remove_edsl_version.py +0 -24
edsl-0.1.39.dist-info/RECORD +0 -358
/edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
/edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
/edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
{edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/LICENSE +0 -0

edsl/results/Dataset.py CHANGED Viewed

@@ -1,22 +1,19 @@
 """A module to represent a dataset of observations."""
 from __future__ import annotations
-import sys
-import json
 import random
+import json
 from collections import UserList
 from typing import Any, Union, Optional
+import sys
+import numpy as np
 from edsl.results.ResultsExportMixin import ResultsExportMixin
 from edsl.results.DatasetTree import Tree
 from edsl.results.TableDisplay import TableDisplay
-from edsl.Base import PersistenceMixin, HashingMixin
-from edsl.results.smart_objects import FirstObject
-class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
+class Dataset(UserList, ResultsExportMixin):
     """A class to represent a dataset of observations."""
     def __init__(
@@ -39,46 +36,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
         _, values = list(self.data[0].items())[0]
         return len(values)
-    def tail(self, n: int = 5) -> Dataset:
-        """Return the last n observations in the dataset.
-        >>> d = Dataset([{'a.b':[1,2,3,4]}])
-        >>> d.tail(2)
-        Dataset([{'a.b': [3, 4]}])
-        """
-        new_data = []
-        for observation in self.data:
-            key, values = list(observation.items())[0]
-            new_data.append({key: values[-n:]})
-        return Dataset(new_data)
-    def head(self, n: int = 5) -> Dataset:
-        """Return the first n observations in the dataset.
-        >>> d = Dataset([{'a.b':[1,2,3,4]}])
-        >>> d.head(2)
-        Dataset([{'a.b': [1, 2]}])
-        """
-        new_data = []
-        for observation in self.data:
-            key, values = list(observation.items())[0]
-            new_data.append({key: values[:n]})
-        return Dataset(new_data)
-    def expand(self, field):
-        return self.to_scenario_list().expand(field)
-    def view(self):
-        from perspective.widget import PerspectiveWidget
-        w = PerspectiveWidget(
-            self.to_pandas(),
-            plugin="Datagrid",
-            aggregates={"datetime": "any"},
-            sort=[["date", "desc"]],
-        )
-        return w
     def keys(self) -> list[str]:
         """Return the keys of the first observation in the dataset.
@@ -91,79 +48,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
     def filter(self, expression):
         return self.to_scenario_list().filter(expression).to_dataset()
-    def long(self, exclude_fields: list[str] = None) -> Dataset:
-        headers, data = self._tabular()
-        exclude_fields = exclude_fields or []
-        # Initialize result dictionaries for each column
-        result_dict = {}
-        for index, row in enumerate(data):
-            row_values = dict(zip(headers, row))
-            excluded_values = {field: row_values[field] for field in exclude_fields}
-            # Transform non-excluded fields to long format
-            for header, value in row_values.items():
-                if header not in exclude_fields:
-                    # Initialize lists in result_dict if needed
-                    if not result_dict:
-                        result_dict = {
-                            "row": [],
-                            "key": [],
-                            "value": [],
-                            **{field: [] for field in exclude_fields},
-                        }
-                    # Add values to each column
-                    result_dict["row"].append(index)
-                    result_dict["key"].append(header)
-                    result_dict["value"].append(value)
-                    for field in exclude_fields:
-                        result_dict[field].append(excluded_values[field])
-        return Dataset([{k: v} for k, v in result_dict.items()])
-    def wide(self) -> "Dataset":
-        """
-        Convert a long-format dataset (with row, key, value columns) to wide format.
-        Expected input format:
-        - A dataset with three columns containing dictionaries:
-          - row: list of row indices
-          - key: list of column names
-          - value: list of values
-        Returns:
-        - Dataset: A new dataset with columns corresponding to unique keys
-        """
-        # Extract the component arrays
-        row_dict = next(col for col in self if "row" in col)
-        key_dict = next(col for col in self if "key" in col)
-        value_dict = next(col for col in self if "value" in col)
-        rows = row_dict["row"]
-        keys = key_dict["key"]
-        values = value_dict["value"]
-        if not (len(rows) == len(keys) == len(values)):
-            raise ValueError("All input arrays must have the same length")
-        # Get unique keys and row indices
-        unique_keys = sorted(set(keys))
-        unique_rows = sorted(set(rows))
-        # Create a dictionary to store the result
-        result = {key: [None] * len(unique_rows) for key in unique_keys}
-        # Populate the result dictionary
-        for row_idx, key, value in zip(rows, keys, values):
-            # Find the position in the output array for this row
-            output_row_idx = unique_rows.index(row_idx)
-            result[key][output_row_idx] = value
-        # Convert to list of column dictionaries format
-        return Dataset([{key: values} for key, values in result.items()])
     def __repr__(self) -> str:
         """Return a string representation of the dataset."""
         return f"Dataset({self.data})"
@@ -242,21 +126,7 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
             """Get the values of the first key in the dictionary."""
             return list(d.values())[0]
-        return FirstObject(get_values(self.data[0])[0])
-    def latex(self, **kwargs):
-        return self.table().latex()
-    def remove_prefix(self) -> Dataset:
-        new_data = []
-        for observation in self.data:
-            key, values = list(observation.items())[0]
-            if "." in key:
-                new_key = key.split(".")[1]
-                new_data.append({new_key: values})
-            else:
-                new_data.append({key: values})
-        return Dataset(new_data)
+        return get_values(self.data[0])[0]
     def print(self, pretty_labels=None, **kwargs):
         if "format" in kwargs:
@@ -276,25 +146,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
             new_data.append({new_key: values})
         return Dataset(new_data)
-    def merge(self, other: Dataset, by_x, by_y) -> Dataset:
-        """Merge the dataset with another dataset on the given keys.""
-        merged_df = df1.merge(df2, how="left", on=["key1", "key2"])
-        """
-        df1 = self.to_pandas()
-        df2 = other.to_pandas()
-        merged_df = df1.merge(df2, how="left", left_on=by_x, right_on=by_y)
-        return Dataset.from_pandas_dataframe(merged_df)
-    def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "Jobs":
-        from edsl.surveys.Survey import Survey
-        from edsl.questions.QuestionBase import QuestionBase
-        if isinstance(survey_or_question, Survey):
-            return survey_or_question.by(self.to_scenario_list())
-        elif isinstance(survey_or_question, QuestionBase):
-            return Survey([survey_or_question]).by(self.to_scenario_list())
     def select(self, *keys) -> Dataset:
         """Return a new dataset with only the selected keys.
@@ -430,7 +281,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
         """
-        import numpy as np
         def sort_indices(lst: list[Any]) -> list[int]:
             """
@@ -559,26 +409,13 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
         return Dataset([{"num_observations": [len(self)], "keys": [self.keys()]}])
     @classmethod
-    def example(self, n: int = None):
+    def example(self):
         """Return an example dataset.
         >>> Dataset.example()
         Dataset([{'a': [1, 2, 3, 4]}, {'b': [4, 3, 2, 1]}])
         """
-        if n is None:
-            return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
-        else:
-            return Dataset([{"a": [1] * n}, {"b": [2] * n}])
-    @classmethod
-    def from_edsl_object(cls, object):
-        d = object.to_dict(add_edsl_version=False)
-        return cls([{"key": list(d.keys())}, {"value": list(d.values())}])
-    @classmethod
-    def from_pandas_dataframe(cls, df):
-        result = cls([{col: df[col].tolist()} for col in df.columns])
-        return result
+        return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
 if __name__ == "__main__":

edsl 0.1.39__py3-none-any.whl → 0.1.39.dev1__py3-none-any.whl

edsl 0.1.39py3-none-any.whl → 0.1.39.dev1py3-none-any.whl