PyPI - edsl - Versions diffs - 0.1.31.dev4__py3-none-any.whl → 0.1.33__py3-none-any.whl - Mend

edsl 0.1.31.dev4py3-none-any.whl → 0.1.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

edsl/Base.py +9 -3
edsl/TemplateLoader.py +24 -0
edsl/__init__.py +8 -3
edsl/__version__.py +1 -1
edsl/agents/Agent.py +40 -8
edsl/agents/AgentList.py +43 -0
edsl/agents/Invigilator.py +136 -221
edsl/agents/InvigilatorBase.py +148 -59
edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +154 -85
edsl/agents/__init__.py +1 -0
edsl/auto/AutoStudy.py +117 -0
edsl/auto/StageBase.py +230 -0
edsl/auto/StageGenerateSurvey.py +178 -0
edsl/auto/StageLabelQuestions.py +125 -0
edsl/auto/StagePersona.py +61 -0
edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
edsl/auto/StagePersonaDimensionValues.py +74 -0
edsl/auto/StagePersonaDimensions.py +69 -0
edsl/auto/StageQuestions.py +73 -0
edsl/auto/SurveyCreatorPipeline.py +21 -0
edsl/auto/utilities.py +224 -0
edsl/config.py +48 -47
edsl/conjure/Conjure.py +6 -0
edsl/coop/PriceFetcher.py +58 -0
edsl/coop/coop.py +50 -7
edsl/data/Cache.py +35 -1
edsl/data/CacheHandler.py +3 -4
edsl/data_transfer_models.py +73 -38
edsl/enums.py +8 -0
edsl/exceptions/general.py +10 -8
edsl/exceptions/language_models.py +25 -1
edsl/exceptions/questions.py +62 -5
edsl/exceptions/results.py +4 -0
edsl/inference_services/AnthropicService.py +13 -11
edsl/inference_services/AwsBedrock.py +112 -0
edsl/inference_services/AzureAI.py +214 -0
edsl/inference_services/DeepInfraService.py +4 -3
edsl/inference_services/GoogleService.py +16 -12
edsl/inference_services/GroqService.py +5 -4
edsl/inference_services/InferenceServiceABC.py +58 -3
edsl/inference_services/InferenceServicesCollection.py +13 -8
edsl/inference_services/MistralAIService.py +120 -0
edsl/inference_services/OllamaService.py +18 -0
edsl/inference_services/OpenAIService.py +55 -56
edsl/inference_services/TestService.py +80 -0
edsl/inference_services/TogetherAIService.py +170 -0
edsl/inference_services/models_available_cache.py +25 -0
edsl/inference_services/registry.py +19 -1
edsl/jobs/Answers.py +10 -12
edsl/jobs/FailedQuestion.py +78 -0
edsl/jobs/Jobs.py +137 -41
edsl/jobs/buckets/BucketCollection.py +24 -15
edsl/jobs/buckets/TokenBucket.py +105 -18
edsl/jobs/interviews/Interview.py +393 -83
edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +22 -18
edsl/jobs/interviews/InterviewExceptionEntry.py +167 -0
edsl/jobs/runners/JobsRunnerAsyncio.py +152 -160
edsl/jobs/runners/JobsRunnerStatus.py +331 -0
edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
edsl/jobs/tasks/TaskCreators.py +1 -1
edsl/jobs/tasks/TaskHistory.py +205 -126
edsl/language_models/LanguageModel.py +297 -177
edsl/language_models/ModelList.py +2 -2
edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
edsl/language_models/fake_openai_call.py +15 -0
edsl/language_models/fake_openai_service.py +61 -0
edsl/language_models/registry.py +25 -8
edsl/language_models/repair.py +0 -19
edsl/language_models/utilities.py +61 -0
edsl/notebooks/Notebook.py +20 -2
edsl/prompts/Prompt.py +52 -2
edsl/questions/AnswerValidatorMixin.py +23 -26
edsl/questions/QuestionBase.py +330 -249
edsl/questions/QuestionBaseGenMixin.py +133 -0
edsl/questions/QuestionBasePromptsMixin.py +266 -0
edsl/questions/QuestionBudget.py +99 -42
edsl/questions/QuestionCheckBox.py +227 -36
edsl/questions/QuestionExtract.py +98 -28
edsl/questions/QuestionFreeText.py +47 -31
edsl/questions/QuestionFunctional.py +7 -0
edsl/questions/QuestionList.py +141 -23
edsl/questions/QuestionMultipleChoice.py +159 -66
edsl/questions/QuestionNumerical.py +88 -47
edsl/questions/QuestionRank.py +182 -25
edsl/questions/Quick.py +41 -0
edsl/questions/RegisterQuestionsMeta.py +31 -12
edsl/questions/ResponseValidatorABC.py +170 -0
edsl/questions/__init__.py +3 -4
edsl/questions/decorators.py +21 -0
edsl/questions/derived/QuestionLikertFive.py +10 -5
edsl/questions/derived/QuestionLinearScale.py +15 -2
edsl/questions/derived/QuestionTopK.py +10 -1
edsl/questions/derived/QuestionYesNo.py +24 -3
edsl/questions/descriptors.py +43 -7
edsl/questions/prompt_templates/question_budget.jinja +13 -0
edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
edsl/questions/prompt_templates/question_extract.jinja +11 -0
edsl/questions/prompt_templates/question_free_text.jinja +3 -0
edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
edsl/questions/prompt_templates/question_list.jinja +17 -0
edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
edsl/questions/prompt_templates/question_numerical.jinja +37 -0
edsl/questions/question_registry.py +6 -2
edsl/questions/templates/__init__.py +0 -0
edsl/questions/templates/budget/__init__.py +0 -0
edsl/questions/templates/budget/answering_instructions.jinja +7 -0
edsl/questions/templates/budget/question_presentation.jinja +7 -0
edsl/questions/templates/checkbox/__init__.py +0 -0
edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
edsl/questions/templates/extract/__init__.py +0 -0
edsl/questions/templates/extract/answering_instructions.jinja +7 -0
edsl/questions/templates/extract/question_presentation.jinja +1 -0
edsl/questions/templates/free_text/__init__.py +0 -0
edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
edsl/questions/templates/free_text/question_presentation.jinja +1 -0
edsl/questions/templates/likert_five/__init__.py +0 -0
edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
edsl/questions/templates/linear_scale/__init__.py +0 -0
edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
edsl/questions/templates/list/__init__.py +0 -0
edsl/questions/templates/list/answering_instructions.jinja +4 -0
edsl/questions/templates/list/question_presentation.jinja +5 -0
edsl/questions/templates/multiple_choice/__init__.py +0 -0
edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
edsl/questions/templates/multiple_choice/html.jinja +0 -0
edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
edsl/questions/templates/numerical/__init__.py +0 -0
edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
edsl/questions/templates/numerical/question_presentation.jinja +7 -0
edsl/questions/templates/rank/__init__.py +0 -0
edsl/questions/templates/rank/answering_instructions.jinja +11 -0
edsl/questions/templates/rank/question_presentation.jinja +15 -0
edsl/questions/templates/top_k/__init__.py +0 -0
edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
edsl/questions/templates/top_k/question_presentation.jinja +22 -0
edsl/questions/templates/yes_no/__init__.py +0 -0
edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
edsl/results/Dataset.py +20 -0
edsl/results/DatasetExportMixin.py +58 -30
edsl/results/DatasetTree.py +145 -0
edsl/results/Result.py +32 -5
edsl/results/Results.py +135 -46
edsl/results/ResultsDBMixin.py +3 -3
edsl/results/Selector.py +118 -0
edsl/results/tree_explore.py +115 -0
edsl/scenarios/FileStore.py +71 -10
edsl/scenarios/Scenario.py +109 -24
edsl/scenarios/ScenarioImageMixin.py +2 -2
edsl/scenarios/ScenarioList.py +546 -21
edsl/scenarios/ScenarioListExportMixin.py +24 -4
edsl/scenarios/ScenarioListPdfMixin.py +153 -4
edsl/study/SnapShot.py +8 -1
edsl/study/Study.py +32 -0
edsl/surveys/Rule.py +15 -3
edsl/surveys/RuleCollection.py +21 -5
edsl/surveys/Survey.py +707 -298
edsl/surveys/SurveyExportMixin.py +71 -9
edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
edsl/surveys/SurveyQualtricsImport.py +284 -0
edsl/surveys/instructions/ChangeInstruction.py +47 -0
edsl/surveys/instructions/Instruction.py +34 -0
edsl/surveys/instructions/InstructionCollection.py +77 -0
edsl/surveys/instructions/__init__.py +0 -0
edsl/templates/error_reporting/base.html +24 -0
edsl/templates/error_reporting/exceptions_by_model.html +35 -0
edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
edsl/templates/error_reporting/exceptions_by_type.html +17 -0
edsl/templates/error_reporting/interview_details.html +116 -0
edsl/templates/error_reporting/interviews.html +10 -0
edsl/templates/error_reporting/overview.html +5 -0
edsl/templates/error_reporting/performance_plot.html +2 -0
edsl/templates/error_reporting/report.css +74 -0
edsl/templates/error_reporting/report.html +118 -0
edsl/templates/error_reporting/report.js +25 -0
edsl/utilities/utilities.py +40 -1
{edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/METADATA +8 -2
edsl-0.1.33.dist-info/RECORD +295 -0
edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -271
edsl/jobs/interviews/retry_management.py +0 -37
edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -303
edsl/utilities/gcp_bucket/simple_example.py +0 -9
edsl-0.1.31.dev4.dist-info/RECORD +0 -204
{edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
{edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0

edsl/scenarios/ScenarioList.py CHANGED Viewed

@@ -6,6 +6,11 @@ import csv
 import random
 from collections import UserList, Counter
 from collections.abc import Iterable
+import urllib.parse
+import urllib.request
+from io import StringIO
+from collections import defaultdict
+import inspect
 from simpleeval import EvalWithCompoundTypes
@@ -15,6 +20,9 @@ from edsl.scenarios.Scenario import Scenario
 from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
 from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
+from edsl.conjure.naming_utilities import sanitize_string
+from edsl.utilities.utilities import is_valid_variable_name
 class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
     pass
@@ -23,12 +31,180 @@ class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
 class ScenarioList(Base, UserList, ScenarioListMixin):
     """Class for creating a list of scenarios to be used in a survey."""
-    def __init__(self, data: Optional[list] = None):
+    def __init__(self, data: Optional[list] = None, codebook: Optional[dict] = None):
         """Initialize the ScenarioList class."""
         if data is not None:
             super().__init__(data)
         else:
             super().__init__([])
+        self.codebook = codebook or {}
+    @property
+    def has_jinja_braces(self) -> bool:
+        """Check if the ScenarioList has Jinja braces."""
+        return any([scenario.has_jinja_braces for scenario in self])
+    def convert_jinja_braces(self) -> ScenarioList:
+        """Convert Jinja braces to Python braces."""
+        return ScenarioList([scenario.convert_jinja_braces() for scenario in self])
+    def give_valid_names(self) -> ScenarioList:
+        """Give valid names to the scenario keys.
+        >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
+        >>> s.give_valid_names()
+        ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
+        >>> s = ScenarioList([Scenario({'are you there John?': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
+        >>> s.give_valid_names()
+        ScenarioList([Scenario({'john': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
+        """
+        codebook = {}
+        new_scenaerios = []
+        for scenario in self:
+            new_scenario = {}
+            for key in scenario:
+                if not is_valid_variable_name(key):
+                    if key in codebook:
+                        new_key = codebook[key]
+                    else:
+                        new_key = sanitize_string(key)
+                        if not is_valid_variable_name(new_key):
+                            new_key = f"var_{len(codebook)}"
+                        codebook[key] = new_key
+                    new_scenario[new_key] = scenario[key]
+                else:
+                    new_scenario[key] = scenario[key]
+            new_scenaerios.append(Scenario(new_scenario))
+        return ScenarioList(new_scenaerios, codebook)
+    def unpivot(self, id_vars=None, value_vars=None):
+        """
+        Unpivot the ScenarioList, allowing for id variables to be specified.
+        Parameters:
+        id_vars (list): Fields to use as identifier variables (kept in each entry)
+        value_vars (list): Fields to unpivot. If None, all fields not in id_vars will be used.
+        Example:
+        >>> s = ScenarioList([
+        ...     Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}),
+        ...     Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})
+        ... ])
+        >>> s.unpivot(id_vars=['id', 'year'], value_vars=['a', 'b'])
+        ScenarioList([Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}), Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}), Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}), Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})])
+        """
+        if id_vars is None:
+            id_vars = []
+        if value_vars is None:
+            value_vars = [field for field in self[0].keys() if field not in id_vars]
+        new_scenarios = []
+        for scenario in self:
+            for var in value_vars:
+                new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
+                new_scenario["variable"] = var
+                new_scenario["value"] = scenario[var]
+                new_scenarios.append(Scenario(new_scenario))
+        return ScenarioList(new_scenarios)
+    def pivot(self, id_vars, var_name="variable", value_name="value"):
+        """
+        Pivot the ScenarioList from long to wide format.
+        Parameters:
+        id_vars (list): Fields to use as identifier variables
+        var_name (str): Name of the variable column (default: 'variable')
+        value_name (str): Name of the value column (default: 'value')
+        Example:
+        >>> s = ScenarioList([
+        ...     Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}),
+        ...     Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}),
+        ...     Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}),
+        ...     Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})
+        ... ])
+        >>> s.pivot(id_vars=['id', 'year'])
+        ScenarioList([Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}), Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})])
+        """
+        pivoted_dict = {}
+        for scenario in self:
+            # Create a tuple of id values to use as a key
+            id_key = tuple(scenario[id_var] for id_var in id_vars)
+            # If this combination of id values hasn't been seen before, initialize it
+            if id_key not in pivoted_dict:
+                pivoted_dict[id_key] = {id_var: scenario[id_var] for id_var in id_vars}
+            # Add the variable-value pair to the dict
+            variable = scenario[var_name]
+            value = scenario[value_name]
+            pivoted_dict[id_key][variable] = value
+        # Convert the dict of dicts to a list of Scenarios
+        pivoted_scenarios = [
+            Scenario(dict(zip(id_vars, id_key), **values))
+            for id_key, values in pivoted_dict.items()
+        ]
+        return ScenarioList(pivoted_scenarios)
+    def group_by(self, id_vars, variables, func):
+        """
+        Group the ScenarioList by id_vars and apply a function to the specified variables.
+        Parameters:
+        id_vars (list): Fields to use as identifier variables for grouping
+        variables (list): Fields to pass to the aggregation function
+        func (callable): Function to apply to the grouped variables.
+                        Should accept lists of values for each variable.
+        Returns:
+        ScenarioList: A new ScenarioList with the grouped and aggregated results
+        Example:
+        >>> def avg_sum(a, b):
+        ...     return {'avg_a': sum(a) / len(a), 'sum_b': sum(b)}
+        >>> s = ScenarioList([
+        ...     Scenario({'group': 'A', 'year': 2020, 'a': 10, 'b': 20}),
+        ...     Scenario({'group': 'A', 'year': 2021, 'a': 15, 'b': 25}),
+        ...     Scenario({'group': 'B', 'year': 2020, 'a': 12, 'b': 22}),
+        ...     Scenario({'group': 'B', 'year': 2021, 'a': 17, 'b': 27})
+        ... ])
+        >>> s.group_by(id_vars=['group'], variables=['a', 'b'], func=avg_sum)
+        ScenarioList([Scenario({'group': 'A', 'avg_a': 12.5, 'sum_b': 45}), Scenario({'group': 'B', 'avg_a': 14.5, 'sum_b': 49})])
+        """
+        # Check if the function is compatible with the specified variables
+        func_params = inspect.signature(func).parameters
+        if len(func_params) != len(variables):
+            raise ValueError(
+                f"Function {func.__name__} expects {len(func_params)} arguments, but {len(variables)} variables were provided"
+            )
+        # Group the scenarios
+        grouped = defaultdict(lambda: defaultdict(list))
+        for scenario in self:
+            key = tuple(scenario[id_var] for id_var in id_vars)
+            for var in variables:
+                grouped[key][var].append(scenario[var])
+        # Apply the function to each group
+        result = []
+        for key, group in grouped.items():
+            try:
+                aggregated = func(*[group[var] for var in variables])
+            except Exception as e:
+                raise ValueError(f"Error applying function to group {key}: {str(e)}")
+            if not isinstance(aggregated, dict):
+                raise ValueError(f"Function {func.__name__} must return a dictionary")
+            new_scenario = dict(zip(id_vars, key))
+            new_scenario.update(aggregated)
+            result.append(Scenario(new_scenario))
+        return ScenarioList(result)
     @property
     def parameters(self) -> set:
@@ -106,6 +282,10 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         for s in data["scenarios"]:
             _ = s.pop("edsl_version")
             _ = s.pop("edsl_class_name")
+        for scenario in data["scenarios"]:
+            for key, value in scenario.items():
+                if hasattr(value, "to_dict"):
+                    data[key] = value.to_dict()
         return data_to_html(data)
     def tally(self, field) -> dict:
@@ -153,6 +333,71 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
                 new_scenarios.append(new_scenario)
         return ScenarioList(new_scenarios)
+    def concatenate(self, fields: List[str], separator: str = ";") -> "ScenarioList":
+        """Concatenate specified fields into a single field.
+        Args:
+            fields (List[str]): List of field names to concatenate.
+            separator (str, optional): Separator to use between field values. Defaults to ";".
+        Returns:
+            ScenarioList: A new ScenarioList with concatenated fields.
+        Example:
+            >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
+            >>> s.concatenate(['a', 'b', 'c'])
+            ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
+        """
+        new_scenarios = []
+        for scenario in self:
+            new_scenario = scenario.copy()
+            concat_values = []
+            for field in fields:
+                if field in new_scenario:
+                    concat_values.append(str(new_scenario[field]))
+                    del new_scenario[field]
+            new_field_name = f"concat_{'_'.join(fields)}"
+            new_scenario[new_field_name] = separator.join(concat_values)
+            new_scenarios.append(new_scenario)
+        return ScenarioList(new_scenarios)
+    def unpack_dict(
+        self, field: str, prefix: Optional[str] = None, drop_field: bool = False
+    ) -> ScenarioList:
+        """Unpack a dictionary field into separate fields.
+        Example:
+        >>> s = ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}})])
+        >>> s.unpack_dict('b')
+        ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}, 'c': 2, 'd': 3})])
+        """
+        new_scenarios = []
+        for scenario in self:
+            new_scenario = scenario.copy()
+            for key, value in scenario[field].items():
+                if prefix:
+                    new_scenario[prefix + key] = value
+                else:
+                    new_scenario[key] = value
+            if drop_field:
+                new_scenario.pop(field)
+            new_scenarios.append(new_scenario)
+        return ScenarioList(new_scenarios)
+    def transform(
+        self, field: str, func: Callable, new_name: Optional[str] = None
+    ) -> ScenarioList:
+        """Transform a field using a function."""
+        new_scenarios = []
+        for scenario in self:
+            new_scenario = scenario.copy()
+            new_scenario[new_name or field] = func(scenario[field])
+            new_scenarios.append(new_scenario)
+        return ScenarioList(new_scenarios)
     def mutate(
         self, new_var_string: str, functions_dict: Optional[dict[str, Callable]] = None
     ) -> ScenarioList:
@@ -242,6 +487,18 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         return ScenarioList(new_data)
+    def from_urls(
+        self, urls: list[str], field_name: Optional[str] = "text"
+    ) -> ScenarioList:
+        """Create a ScenarioList from a list of URLs.
+        :param urls: A list of URLs.
+        :param field_name: The name of the field to store the text from the URLs.
+        """
+        return ScenarioList([Scenario.from_url(url, field_name) for url in urls])
     def select(self, *fields) -> ScenarioList:
         """
         Selects scenarios with only the references fields.
@@ -288,12 +545,28 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         >>> s = ScenarioList.from_list("a", [1,2,3])
         >>> s.to_dataset()
         Dataset([{'a': [1, 2, 3]}])
+        >>> s = ScenarioList.from_list("a", [1,2,3]).add_list("b", [4,5,6])
+        >>> s.to_dataset()
+        Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
         """
         from edsl.results.Dataset import Dataset
         keys = self[0].keys()
-        data = {key: [scenario[key] for scenario in self.data] for key in keys}
-        return Dataset([data])
+        data = [{key: [scenario[key] for scenario in self.data]} for key in keys]
+        return Dataset(data)
+    def split(
+        self, field: str, split_on: str, index: int, new_name: Optional[str] = None
+    ) -> ScenarioList:
+        """Split a scenario fiel in multiple fields."""
+        if new_name is None:
+            new_name = field + "_split_" + str(index)
+        new_scenarios = []
+        for scenario in self:
+            new_scenario = scenario.copy()
+            new_scenario[new_name] = scenario[field].split(split_on)[index]
+            new_scenarios.append(new_scenario)
+        return ScenarioList(new_scenarios)
     def add_list(self, name, values) -> ScenarioList:
         """Add a list of values to a ScenarioList.
@@ -352,6 +625,68 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
             data = cursor.fetchall()
         return cls([Scenario(dict(zip(columns, row))) for row in data])
+    @classmethod
+    def from_latex(cls, tex_file_path: str):
+        with open(tex_file_path, "r") as file:
+            lines = file.readlines()
+        processed_lines = []
+        non_blank_lines = [
+            (i, line.strip()) for i, line in enumerate(lines) if line.strip()
+        ]
+        for index, (line_no, text) in enumerate(non_blank_lines):
+            entry = {
+                "line_no": line_no + 1,  # Using 1-based index for line numbers
+                "text": text,
+                "line_before": non_blank_lines[index - 1][1] if index > 0 else None,
+                "line_after": (
+                    non_blank_lines[index + 1][1]
+                    if index < len(non_blank_lines) - 1
+                    else None
+                ),
+            }
+            processed_lines.append(entry)
+        return ScenarioList([Scenario(entry) for entry in processed_lines])
+    @classmethod
+    def from_google_doc(cls, url: str) -> ScenarioList:
+        """Create a ScenarioList from a Google Doc.
+        This method downloads the Google Doc as a Word file (.docx), saves it to a temporary file,
+        and then reads it using the from_docx class method.
+        Args:
+            url (str): The URL to the Google Doc.
+        Returns:
+            ScenarioList: An instance of the ScenarioList class.
+        """
+        import tempfile
+        import requests
+        from docx import Document
+        if "/edit" in url:
+            doc_id = url.split("/d/")[1].split("/edit")[0]
+        else:
+            raise ValueError("Invalid Google Doc URL format.")
+        export_url = f"https://docs.google.com/document/d/{doc_id}/export?format=docx"
+        # Download the Google Doc as a Word file (.docx)
+        response = requests.get(export_url)
+        response.raise_for_status()  # Ensure the request was successful
+        # Save the Word file to a temporary file
+        with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as temp_file:
+            temp_file.write(response.content)
+            temp_filename = temp_file.name
+        # Call the from_docx class method with the temporary file
+        return cls.from_docx(temp_filename)
     @classmethod
     def from_pandas(cls, df) -> ScenarioList:
         """Create a ScenarioList from a pandas DataFrame.
@@ -365,6 +700,62 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         """
         return cls([Scenario(row) for row in df.to_dict(orient="records")])
+    @classmethod
+    def from_wikipedia(cls, url: str, table_index: int = 0):
+        """
+        Extracts a table from a Wikipedia page.
+        Parameters:
+            url (str): The URL of the Wikipedia page.
+            table_index (int): The index of the table to extract (default is 0).
+        Returns:
+            pd.DataFrame: A DataFrame containing the extracted table.
+        # # Example usage
+        # url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
+        # df = from_wikipedia(url, 0)
+        # if not df.empty:
+        #     print(df.head())
+        # else:
+        #     print("Failed to extract table.")
+        """
+        import pandas as pd
+        import requests
+        from requests.exceptions import RequestException
+        try:
+            # Check if the URL is reachable
+            response = requests.get(url)
+            response.raise_for_status()  # Raises HTTPError for bad responses
+            # Extract tables from the Wikipedia page
+            tables = pd.read_html(url)
+            # Ensure the requested table index is within the range of available tables
+            if table_index >= len(tables) or table_index < 0:
+                raise IndexError(
+                    f"Table index {table_index} is out of range. This page has {len(tables)} table(s)."
+                )
+            # Return the requested table as a DataFrame
+            # return tables[table_index]
+            return cls.from_pandas(tables[table_index])
+        except RequestException as e:
+            print(f"Error fetching the URL: {e}")
+        except ValueError as e:
+            print(f"Error parsing tables: {e}")
+        except IndexError as e:
+            print(e)
+        except Exception as e:
+            print(f"An unexpected error occurred: {e}")
+        # Return an empty DataFrame in case of an error
+        # return cls.from_pandas(pd.DataFrame())
     def to_key_value(self, field: str, value=None) -> Union[dict, set]:
         """Return the set of values in the field.
@@ -380,8 +771,121 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
             return {scenario[field]: scenario[value] for scenario in self}
     @classmethod
-    def from_csv(cls, filename: str) -> ScenarioList:
-        """Create a ScenarioList from a CSV file.
+    def from_excel(
+        cls, filename: str, sheet_name: Optional[str] = None
+    ) -> ScenarioList:
+        """Create a ScenarioList from an Excel file.
+        If the Excel file contains multiple sheets and no sheet_name is provided,
+        the method will print the available sheets and require the user to specify one.
+        Example:
+        >>> import tempfile
+        >>> import os
+        >>> import pandas as pd
+        >>> with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as f:
+        ...     df1 = pd.DataFrame({
+        ...         'name': ['Alice', 'Bob'],
+        ...         'age': [30, 25],
+        ...         'location': ['New York', 'Los Angeles']
+        ...     })
+        ...     df2 = pd.DataFrame({
+        ...         'name': ['Charlie', 'David'],
+        ...         'age': [35, 40],
+        ...         'location': ['Chicago', 'Boston']
+        ...     })
+        ...     with pd.ExcelWriter(f.name) as writer:
+        ...         df1.to_excel(writer, sheet_name='Sheet1', index=False)
+        ...         df2.to_excel(writer, sheet_name='Sheet2', index=False)
+        ...     temp_filename = f.name
+        >>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1')
+        >>> len(scenario_list)
+        2
+        >>> scenario_list[0]['name']
+        'Alice'
+        >>> scenario_list = ScenarioList.from_excel(temp_filename)  # Should raise an error and list sheets
+        Traceback (most recent call last):
+        ...
+        ValueError: Please provide a sheet name to load data from.
+        """
+        from edsl.scenarios.Scenario import Scenario
+        import pandas as pd
+        # Get all sheets
+        all_sheets = pd.read_excel(filename, sheet_name=None)
+        # If no sheet_name is provided and there is more than one sheet, print available sheets
+        if sheet_name is None:
+            if len(all_sheets) > 1:
+                print("The Excel file contains multiple sheets:")
+                for name in all_sheets.keys():
+                    print(f"- {name}")
+                raise ValueError("Please provide a sheet name to load data from.")
+            else:
+                # If there is only one sheet, use it
+                sheet_name = list(all_sheets.keys())[0]
+        # Load the specified or determined sheet
+        df = pd.read_excel(filename, sheet_name=sheet_name)
+        observations = []
+        for _, row in df.iterrows():
+            observations.append(Scenario(row.to_dict()))
+        return cls(observations)
+    @classmethod
+    def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
+        """Create a ScenarioList from a Google Sheet.
+        This method downloads the Google Sheet as an Excel file, saves it to a temporary file,
+        and then reads it using the from_excel class method.
+        Args:
+            url (str): The URL to the Google Sheet.
+            sheet_name (str, optional): The name of the sheet to load. If None, the method will behave
+                                        the same as from_excel regarding multiple sheets.
+        Returns:
+            ScenarioList: An instance of the ScenarioList class.
+        """
+        import pandas as pd
+        import tempfile
+        import requests
+        if "/edit" in url:
+            sheet_id = url.split("/d/")[1].split("/edit")[0]
+        else:
+            raise ValueError("Invalid Google Sheet URL format.")
+        export_url = (
+            f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
+        )
+        # Download the Google Sheet as an Excel file
+        response = requests.get(export_url)
+        response.raise_for_status()  # Ensure the request was successful
+        # Save the Excel file to a temporary file
+        with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
+            temp_file.write(response.content)
+            temp_filename = temp_file.name
+        # Call the from_excel class method with the temporary file
+        return cls.from_excel(temp_filename, sheet_name=sheet_name)
+    @classmethod
+    def from_csv(cls, source: Union[str, urllib.parse.ParseResult]) -> ScenarioList:
+        """Create a ScenarioList from a CSV file or URL.
+        Args:
+            source: A string representing either a local file path or a URL to a CSV file,
+                    or a urllib.parse.ParseResult object for a URL.
+        Returns:
+            ScenarioList: A ScenarioList object containing the data from the CSV.
         Example:
@@ -397,15 +901,37 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         'Alice'
         >>> scenario_list[1]['age']
         '25'
+        >>> url = "https://example.com/data.csv"
+        >>> ## scenario_list_from_url = ScenarioList.from_csv(url)
         """
         from edsl.scenarios.Scenario import Scenario
-        observations = []
-        with open(filename, "r") as f:
-            reader = csv.reader(f)
+        def is_url(source):
+            try:
+                result = urllib.parse.urlparse(source)
+                return all([result.scheme, result.netloc])
+            except ValueError:
+                return False
+        if isinstance(source, str) and is_url(source):
+            with urllib.request.urlopen(source) as response:
+                csv_content = response.read().decode("utf-8")
+            csv_file = StringIO(csv_content)
+        elif isinstance(source, urllib.parse.ParseResult):
+            with urllib.request.urlopen(source.geturl()) as response:
+                csv_content = response.read().decode("utf-8")
+            csv_file = StringIO(csv_content)
+        else:
+            csv_file = open(source, "r")
+        try:
+            reader = csv.reader(csv_file)
             header = next(reader)
-            for row in reader:
-                observations.append(Scenario(dict(zip(header, row))))
+            observations = [Scenario(dict(zip(header, row))) for row in reader]
+        finally:
+            csv_file.close()
         return cls(observations)
     def _to_dict(self, sort=False) -> dict:
@@ -449,6 +975,16 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         return cls([Scenario.from_dict(s) for s in data["scenarios"]])
+    @classmethod
+    def from_nested_dict(cls, data: dict) -> ScenarioList:
+        """Create a `ScenarioList` from a nested dictionary."""
+        from edsl.scenarios.Scenario import Scenario
+        s = ScenarioList()
+        for key, value in data.items():
+            s.add_list(key, value)
+        return s
     def code(self) -> str:
         ## TODO: Refactor to only use the questions actually in the survey
         """Create the Python code representation of a survey."""
@@ -484,17 +1020,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
             table.add_row(str(i), s.rich_print())
         return table
-    # def print(
-    #     self,
-    #     format: Optional[str] = None,
-    #     max_rows: Optional[int] = None,
-    #     pretty_labels: Optional[dict] = None,
-    #     filename: str = None,
-    # ):
-    #     from edsl.utilities.interface import print_scenario_list
-    #     print_scenario_list(self[:max_rows])
     def __getitem__(self, key: Union[int, slice]) -> Any:
         """Return the item at the given index.

edsl 0.1.31.dev4__py3-none-any.whl → 0.1.33__py3-none-any.whl

edsl 0.1.31.dev4py3-none-any.whl → 0.1.33py3-none-any.whl