PyPI - edsl - Versions diffs - 0.1.54__py3-none-any.whl → 0.1.55__py3-none-any.whl - Mend

edsl 0.1.54py3-none-any.whl → 0.1.55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

edsl/__init__.py +8 -1
edsl/__init__original.py +134 -0
edsl/__version__.py +1 -1
edsl/agents/agent.py +29 -0
edsl/agents/agent_list.py +36 -1
edsl/base/base_class.py +281 -151
edsl/buckets/__init__.py +8 -3
edsl/buckets/bucket_collection.py +9 -3
edsl/buckets/model_buckets.py +4 -2
edsl/buckets/token_bucket.py +2 -2
edsl/buckets/token_bucket_client.py +5 -3
edsl/caching/cache.py +131 -62
edsl/caching/cache_entry.py +70 -58
edsl/caching/sql_dict.py +17 -0
edsl/cli.py +99 -0
edsl/config/config_class.py +16 -0
edsl/conversation/__init__.py +31 -0
edsl/coop/coop.py +276 -242
edsl/coop/coop_jobs_objects.py +59 -0
edsl/coop/coop_objects.py +29 -0
edsl/coop/coop_regular_objects.py +26 -0
edsl/coop/utils.py +24 -19
edsl/dataset/dataset.py +338 -101
edsl/db_list/sqlite_list.py +349 -0
edsl/inference_services/__init__.py +40 -5
edsl/inference_services/exceptions.py +11 -0
edsl/inference_services/services/anthropic_service.py +5 -2
edsl/inference_services/services/aws_bedrock.py +6 -2
edsl/inference_services/services/azure_ai.py +6 -2
edsl/inference_services/services/google_service.py +3 -2
edsl/inference_services/services/mistral_ai_service.py +6 -2
edsl/inference_services/services/open_ai_service.py +6 -2
edsl/inference_services/services/perplexity_service.py +6 -2
edsl/inference_services/services/test_service.py +94 -5
edsl/interviews/answering_function.py +167 -59
edsl/interviews/interview.py +124 -72
edsl/interviews/interview_task_manager.py +10 -0
edsl/invigilators/invigilators.py +9 -0
edsl/jobs/async_interview_runner.py +146 -104
edsl/jobs/data_structures.py +6 -4
edsl/jobs/decorators.py +61 -0
edsl/jobs/fetch_invigilator.py +61 -18
edsl/jobs/html_table_job_logger.py +14 -2
edsl/jobs/jobs.py +180 -104
edsl/jobs/jobs_component_constructor.py +2 -2
edsl/jobs/jobs_interview_constructor.py +2 -0
edsl/jobs/jobs_remote_inference_logger.py +4 -0
edsl/jobs/jobs_runner_status.py +30 -25
edsl/jobs/progress_bar_manager.py +79 -0
edsl/jobs/remote_inference.py +35 -1
edsl/key_management/key_lookup_builder.py +6 -1
edsl/language_models/language_model.py +86 -6
edsl/language_models/model.py +10 -3
edsl/language_models/price_manager.py +45 -75
edsl/language_models/registry.py +5 -0
edsl/notebooks/notebook.py +77 -10
edsl/questions/VALIDATION_README.md +134 -0
edsl/questions/__init__.py +24 -1
edsl/questions/exceptions.py +21 -0
edsl/questions/question_dict.py +201 -16
edsl/questions/question_multiple_choice_with_other.py +624 -0
edsl/questions/question_registry.py +2 -1
edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
edsl/questions/validation_analysis.py +185 -0
edsl/questions/validation_cli.py +131 -0
edsl/questions/validation_html_report.py +404 -0
edsl/questions/validation_logger.py +136 -0
edsl/results/result.py +63 -16
edsl/results/results.py +702 -171
edsl/scenarios/construct_download_link.py +16 -3
edsl/scenarios/directory_scanner.py +226 -226
edsl/scenarios/file_methods.py +5 -0
edsl/scenarios/file_store.py +117 -6
edsl/scenarios/handlers/__init__.py +5 -1
edsl/scenarios/handlers/mp4_file_store.py +104 -0
edsl/scenarios/handlers/webm_file_store.py +104 -0
edsl/scenarios/scenario.py +120 -101
edsl/scenarios/scenario_list.py +800 -727
edsl/scenarios/scenario_list_gc_test.py +146 -0
edsl/scenarios/scenario_list_memory_test.py +214 -0
edsl/scenarios/scenario_list_source_refactor.md +35 -0
edsl/scenarios/scenario_selector.py +5 -4
edsl/scenarios/scenario_source.py +1990 -0
edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
edsl/surveys/survey.py +22 -0
edsl/tasks/__init__.py +4 -2
edsl/tasks/task_history.py +198 -36
edsl/tests/scenarios/test_ScenarioSource.py +51 -0
edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
edsl/utilities/__init__.py +2 -1
edsl/utilities/decorators.py +121 -0
edsl/utilities/memory_debugger.py +1010 -0
{edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/METADATA +51 -76
{edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/RECORD +99 -75
edsl/jobs/jobs_runner_asyncio.py +0 -281
edsl/language_models/unused/fake_openai_service.py +0 -60
{edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/LICENSE +0 -0
{edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/WHEEL +0 -0
{edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/entry_points.txt +0 -0

edsl/scenarios/scenario_list.py CHANGED Viewed

@@ -34,7 +34,13 @@ import os
 from io import StringIO
 import inspect
 from collections import UserList, defaultdict
-from collections.abc import Iterable
+from collections.abc import Iterable, MutableSequence
+import json
+import pickle
+# Import for refactoring to Source classes
+from edsl.scenarios.scenario_source import deprecated_classmethod, TuplesSource
 from simpleeval import EvalWithCompoundTypes, NameNotDefined  # type: ignore
 from tabulate import tabulate_formats
@@ -53,9 +59,17 @@ if TYPE_CHECKING:
 from ..base import Base
-from ..utilities import remove_edsl_version, sanitize_string, is_valid_variable_name, dict_hash
+from ..utilities import (
+    remove_edsl_version,
+    sanitize_string,
+    is_valid_variable_name,
+    dict_hash,
+    memory_profile,
+)
 from ..dataset import ScenarioListOperationsMixin
+from ..db_list.sqlite_list import SQLiteList
 from .exceptions import ScenarioError
 from .scenario import Scenario
 from .scenario_list_pdf_tools import PdfTools
@@ -83,41 +97,39 @@ TableFormat: TypeAlias = Literal[
     "tsv",
 ]
-class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
+class ScenarioSQLiteList(SQLiteList):
+    """SQLite-backed list specifically for storing Scenario objects."""
+    def serialize(self, obj):
+        """Serialize a Scenario object or other data to bytes using pickle."""
+        return pickle.dumps(obj)
+    def deserialize(self, data):
+        """Deserialize pickled bytes back to a Scenario object or other data."""
+        if isinstance(data, str):
+            return pickle.loads(data.encode())
+        return pickle.loads(data)
+if use_sqlite := True:
+    data_class = ScenarioSQLiteList
+else:
+    data_class = list
+class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
     """
     A collection of Scenario objects with advanced operations for manipulation and analysis.
-    ScenarioList extends Python's UserList to provide specialized functionality for
-    working with collections of Scenario objects. It inherits from Base to integrate
-    with EDSL's object model and from ScenarioListOperationsMixin to provide
-    powerful data manipulation capabilities.
-    The class provides methods for filtering, sorting, joining, transforming, and
-    analyzing collections of Scenarios. It's designed to work seamlessly with other
-    EDSL components like Surveys, Jobs, and Questions.
+    ScenarioList provides specialized functionality for working with collections of
+    Scenario objects. It inherits from MutableSequence to provide standard list operations,
+    from Base to integrate with EDSL's object model, and from ScenarioListOperationsMixin
+    to provide powerful data manipulation capabilities.
     Attributes:
-        data (list): The underlying list of Scenario objects.
+        data (list): The underlying list containing Scenario objects.
         codebook (dict): Optional metadata describing the fields in the scenarios.
-    Examples:
-        Create a ScenarioList from Scenario objects:
-        >>> from edsl.scenarios import Scenario, ScenarioList
-        >>> s1 = Scenario({"product": "apple", "price": 1.99})
-        >>> s2 = Scenario({"product": "banana", "price": 0.99})
-        >>> sl = ScenarioList([s1, s2])
-        Filter scenarios based on a condition:
-        >>> cheap_fruits = sl.filter("price < 1.50")
-        >>> len(cheap_fruits)
-        1
-        >>> cheap_fruits[0]["product"]
-        'banana'
-        Add a new column based on existing data:
-        >>> sl_with_tax = sl.mutate("tax = price * 0.08")
-        >>> sl_with_tax[0]["tax"]
-        0.1592
     """
     __documentation__ = (
@@ -125,43 +137,64 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
     )
     def __init__(
-        self, data: Optional[list] = None, codebook: Optional[dict[str, str]] = None
+        self,
+        data: Optional[list] = None,
+        codebook: Optional[dict[str, str]] = None,
+        data_class: Optional[type] = data_class,
     ):
-        """
-        Initialize a new ScenarioList with optional data and codebook.
-        Args:
-            data: A list of Scenario objects. If None, an empty list is used.
-            codebook: A dictionary mapping field names to descriptions or metadata.
-                     Used for documentation and to provide context for fields.
-        Examples:
-            >>> sl = ScenarioList()  # Empty list
-            >>> s1 = Scenario({"product": "apple"})
-            >>> s2 = Scenario({"product": "banana"})
-            >>> sl = ScenarioList([s1, s2])  # With data
-            >>> # With a codebook
-            >>> codebook = {"product": "Fruit name", "price": "Price in USD"}
-            >>> sl = ScenarioList([s1, s2], codebook=codebook)
-        """
-        if data is not None:
-            super().__init__(data)
-        else:
-            super().__init__([])
+        """Initialize a new ScenarioList with optional data and codebook."""
+        self._data_class = data_class
+        self.data = self._data_class([])
+        warned = False
+        for item in data or []:
+            try:
+                _ = json.dumps(item.to_dict())
+            except:
+                import warnings
+                if not warned:
+                    warnings.warn(
+                        f"One or more items in the data list are not JSON serializable. "
+                        "This would prevent running a job that uses this ScenarioList."
+                        "One solution is to use 'str(item)' to convert the item to a string before adding."
+                    )
+                    warned = True
+            self.data.append(item)
         self.codebook = codebook or {}
+    # Required MutableSequence abstract methods
+    def __getitem__(self, index):
+        """Get item at index."""
+        if isinstance(index, slice):
+            return self.__class__(list(self.data[index]), self.codebook.copy())
+        return self.data[index]
+    def __setitem__(self, index, value):
+        """Set item at index."""
+        self.data[index] = value
+    def __delitem__(self, index):
+        """Delete item at index."""
+        del self.data[index]
+    def __len__(self):
+        """Return number of items."""
+        return len(self.data)
+    def insert(self, index, value):
+        """Insert value at index."""
+        self.data.insert(index, value)
     def unique(self) -> ScenarioList:
         """
         Return a new ScenarioList containing only unique Scenario objects.
         This method removes duplicate Scenario objects based on their hash values,
         which are determined by their content. Two Scenarios with identical key-value
         pairs will have the same hash and be considered duplicates.
         Returns:
             A new ScenarioList containing only unique Scenario objects.
         Examples:
             >>> from edsl.scenarios import Scenario, ScenarioList
             >>> s1 = Scenario({"a": 1})
@@ -173,26 +206,36 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             2
             >>> unique_sl
             ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
         Notes:
             - The order of scenarios in the result is not guaranteed due to the use of sets
             - Uniqueness is determined by the Scenario's __hash__ method
             - The original ScenarioList is not modified
+            - This implementation is memory efficient as it processes scenarios one at a time
         """
-        return ScenarioList(list(set(self)))
+        seen_hashes = set()
+        result = ScenarioList()
+        for scenario in self.data:
+            scenario_hash = hash(scenario)
+            if scenario_hash not in seen_hashes:
+                seen_hashes.add(scenario_hash)
+                result.append(scenario)
+        return result
     @property
     def has_jinja_braces(self) -> bool:
         """
         Check if any Scenario in the list contains values with Jinja template braces.
         This property checks all Scenarios in the list to determine if any contain
         string values with Jinja template syntax ({{ and }}). This is important for
         rendering templates and avoiding conflicts with other templating systems.
         Returns:
             True if any Scenario contains values with Jinja braces, False otherwise.
         Examples:
             >>> from edsl.scenarios import Scenario, ScenarioList
             >>> s1 = Scenario({"text": "Plain text"})
@@ -204,20 +247,23 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             >>> sl2.has_jinja_braces
             True
         """
-        return any([scenario.has_jinja_braces for scenario in self])
+        for scenario in self:
+            if scenario.has_jinja_braces:
+                return True
+        return False
     def _convert_jinja_braces(self) -> ScenarioList:
         """
         Convert Jinja braces to alternative symbols in all Scenarios in the list.
         This method creates a new ScenarioList where all Jinja template braces
         ({{ and }}) in string values are converted to alternative symbols (<< and >>).
         This is useful when you need to prevent template processing or avoid conflicts
         with other templating systems.
         Returns:
             A new ScenarioList with converted braces in all Scenarios.
         Examples:
             >>> from edsl.scenarios import Scenario, ScenarioList
             >>> s = Scenario({"text": "Template with {{variable}}"})
@@ -225,13 +271,16 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             >>> converted = sl._convert_jinja_braces()
             >>> converted[0]["text"]
             'Template with <<variable>>'
         Notes:
             - The original ScenarioList is not modified
             - This is primarily intended for internal use
             - The default replacement symbols are << and >>
         """
-        return ScenarioList([scenario._convert_jinja_braces() for scenario in self])
+        converted_sl = ScenarioList()
+        for scenario in self:
+            converted_sl.append(scenario._convert_jinja_braces())
+        return converted_sl
     def give_valid_names(self, existing_codebook: dict = None) -> ScenarioList:
         """Give valid names to the scenario keys, using an existing codebook if provided.
@@ -253,7 +302,8 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         ScenarioList([Scenario({'custom_name': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
         """
         codebook = existing_codebook.copy() if existing_codebook else {}
-        new_scenarios = []
+        new_scenarios = ScenarioList(data = [], codebook = codebook)
         for scenario in self:
             new_scenario = {}
@@ -274,7 +324,7 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             new_scenarios.append(Scenario(new_scenario))
-        return ScenarioList(new_scenarios, codebook)
+        return new_scenarios
     def unpivot(
         self,
@@ -301,7 +351,7 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         if value_vars is None:
             value_vars = [field for field in self[0].keys() if field not in id_vars]
-        new_scenarios = []
+        new_scenarios = ScenarioList(data = [], codebook = {})
         for scenario in self:
             for var in value_vars:
                 new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
@@ -309,35 +359,7 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
                 new_scenario["value"] = scenario[var]
                 new_scenarios.append(Scenario(new_scenario))
-        return ScenarioList(new_scenarios)
-    def sem_filter(self, language_predicate: str) -> ScenarioList:
-        """Filter the ScenarioList based on a language predicate.
-        :param language_predicate: The language predicate to use.
-        Inspired by:
-        @misc{patel2024semanticoperators,
-            title={Semantic Operators: A Declarative Model for Rich, AI-based Analytics Over Text Data},
-            author={Liana Patel and Siddharth Jha and Parth Asawa and Melissa Pan and Carlos Guestrin and Matei Zaharia},
-            year={2024},
-            eprint={2407.11418},
-            archivePrefix={arXiv},
-            primaryClass={cs.DB},
-            url={https://arxiv.org/abs/2407.11418},
-            }
-        """
-        from ..questions import QuestionYesNo
-        new_scenario_list = self.duplicate()
-        q = QuestionYesNo(
-            question_text=language_predicate, question_name="binary_outcome"
-        )
-        results = q.by(new_scenario_list).run(verbose=False)
-        new_scenario_list = new_scenario_list.add_list(
-            "criteria", results.select("binary_outcome").to_list()
-        )
-        return new_scenario_list.filter("criteria == 'Yes'").drop("criteria")
+        return new_scenarios
     def pivot(
         self,
@@ -378,14 +400,11 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             value = scenario[value_name]
             pivoted_dict[id_key][variable] = value
-        # Convert the dict of dicts to a list of Scenarios
-        pivoted_scenarios = [
-            Scenario(dict(zip(id_vars, id_key), **values))
-            for id_key, values in pivoted_dict.items()
-        ]
-        return ScenarioList(pivoted_scenarios)
+        new_sl = ScenarioList(data = [], codebook = self.codebook)
+        for id_key, values in pivoted_dict.items():
+            new_sl.append(Scenario(dict(zip(id_vars, id_key), **values)))
+        return new_sl
     def group_by(
         self, id_vars: List[str], variables: List[str], func: Callable
     ) -> ScenarioList:
@@ -426,7 +445,7 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
                 grouped[key][var].append(scenario[var])
         # Apply the function to each group
-        result = []
+        new_sl= ScenarioList(data = [], codebook = self.codebook)
         for key, group in grouped.items():
             try:
                 aggregated = func(*[group[var] for var in variables])
@@ -440,9 +459,9 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             new_scenario = dict(zip(id_vars, key))
             new_scenario.update(aggregated)
-            result.append(Scenario(new_scenario))
+            new_sl.append(Scenario(new_scenario))
-        return ScenarioList(result)
+        return new_sl
     @property
     def parameters(self) -> set:
@@ -457,22 +476,51 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         if len(self) == 0:
             return set()
-        return set.union(*[set(s.keys()) for s in self])
+        params = set()
+        for scenario in self:
+            params.update(scenario.keys())
+        return params
-    def __hash__(self) -> int:
-        """Return the hash of the ScenarioList.
+    def __original_hash__(self) -> int:
+        """Return the original hash of the ScenarioList using the dictionary-based approach.
         >>> s = ScenarioList.example()
-        >>> hash(s)
+        >>> s.__original_hash__()
         1262252885757976162
         """
         return dict_hash(self.to_dict(sort=True, add_edsl_version=False))
+    def __hash__(self) -> int:
+        """Return the hash of the ScenarioList using a memory-efficient streaming approach.
+        >>> s = ScenarioList.example()
+        >>> hash(s)
+        1219708685929871252
+        """
+        # Start with a seed value
+        running_hash = 0
+        # Use a heap to maintain sorted order as we go
+        import heapq
+        heap = []
+        # Process each scenario's hash and add to heap
+        for scenario in self:
+            heapq.heappush(heap, hash(scenario))
+        # Combine hashes in sorted order
+        while heap:
+            h = heapq.heappop(heap)
+            # Use a large prime number to mix the bits
+            running_hash = (running_hash * 31) ^ h
+        return running_hash
     def __eq__(self, other: Any) -> bool:
         return hash(self) == hash(other)
     def __repr__(self):
-        return f"ScenarioList({self.data})"
+        return f"ScenarioList({list(self.data)})"
     def __mul__(self, other: ScenarioList) -> ScenarioList:
         """Takes the cross product of two ScenarioLists.
@@ -484,16 +532,18 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         """
         from itertools import product
         from .scenario import Scenario
         if isinstance(other, Scenario):
             other = ScenarioList([other])
         elif not isinstance(other, ScenarioList):
             from .exceptions import TypeScenarioError
             raise TypeScenarioError(f"Cannot multiply ScenarioList with {type(other)}")
-        new_sl = []
-        for s1, s2 in list(product(self, other)):
+        new_sl = ScenarioList(data=[], codebook=self.codebook)
+        for s1, s2 in product(self, other):
             new_sl.append(s1 + s2)
-        return ScenarioList(new_sl)
+        return new_sl
     def times(self, other: ScenarioList) -> ScenarioList:
         """Takes the cross product of two ScenarioLists.
@@ -505,6 +555,8 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         >>> s1.times(s2)
         ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2}), Scenario({'a': 2, 'b': 1}), Scenario({'a': 2, 'b': 2})])
         """
+        import warnings
+        warnings.warn("times is deprecated, use * instead", DeprecationWarning)
         return self.__mul__(other)
     def shuffle(self, seed: Optional[str] = None) -> ScenarioList:
@@ -524,14 +576,16 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         """Return a random sample from the ScenarioList
         >>> s = ScenarioList.from_list("a", [1,2,3,4,5,6])
-        >>> s.sample(3, seed = "edsl")
+        >>> s.sample(3, seed = "edsl")  # doctest: +SKIP
         ScenarioList([Scenario({'a': 2}), Scenario({'a': 1}), Scenario({'a': 3})])
         """
         if seed:
             random.seed(seed)
         sl = self.duplicate()
-        return ScenarioList(random.sample(sl.data, n))
+        # Convert to list if necessary for random.sample
+        data_list = list(sl.data)
+        return ScenarioList(random.sample(data_list, n))
     def expand(self, expand_field: str, number_field: bool = False) -> ScenarioList:
         """Expand the ScenarioList by a field.
@@ -560,15 +614,21 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
                 new_scenarios.append(new_scenario)
         return ScenarioList(new_scenarios)
-    def _concatenate(self, fields: List[str], output_type: str = "string", separator: str = ";", new_field_name: Optional[str] = None) -> ScenarioList:
+    def _concatenate(
+        self,
+        fields: List[str],
+        output_type: str = "string",
+        separator: str = ";",
+        new_field_name: Optional[str] = None,
+    ) -> ScenarioList:
         """Private method to handle concatenation logic for different output types.
         :param fields: The fields to concatenate.
         :param output_type: The type of output ("string", "list", or "set").
         :param separator: The separator to use for string concatenation.
         :param new_field_name: Optional custom name for the concatenated field.
                              If None, defaults to "concat_field1_field2_..."
         Returns:
             ScenarioList: A new ScenarioList with concatenated fields.
         """
@@ -577,7 +637,7 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             raise ScenarioError(
                 f"The 'fields' parameter must be a list of field names, not a string. Got '{fields}'."
             )
         new_scenarios = []
         for scenario in self:
             new_scenario = scenario.copy()
@@ -587,8 +647,12 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
                     values.append(new_scenario[field])
                     del new_scenario[field]
-            field_name = new_field_name if new_field_name is not None else f"concat_{'_'.join(fields)}"
+            field_name = (
+                new_field_name
+                if new_field_name is not None
+                else f"concat_{'_'.join(fields)}"
+            )
             if output_type == "string":
                 # Convert all values to strings and join with separator
                 new_scenario[field_name] = separator.join(str(v) for v in values)
@@ -600,13 +664,21 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
                 new_scenario[field_name] = set(values)
             else:
                 from .exceptions import ValueScenarioError
-                raise ValueScenarioError(f"Invalid output_type: {output_type}. Must be 'string', 'list', or 'set'.")
+                raise ValueScenarioError(
+                    f"Invalid output_type: {output_type}. Must be 'string', 'list', or 'set'."
+                )
             new_scenarios.append(new_scenario)
         return ScenarioList(new_scenarios)
-    def concatenate(self, fields: List[str], separator: str = ";", new_field_name: Optional[str] = None) -> ScenarioList:
+    def concatenate(
+        self,
+        fields: List[str],
+        separator: str = ";",
+        new_field_name: Optional[str] = None,
+    ) -> ScenarioList:
         """Concatenate specified fields into a single string field.
         :param fields: The fields to concatenate.
@@ -623,9 +695,16 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             >>> s.concatenate(['a', 'b', 'c'], new_field_name='combined')
             ScenarioList([Scenario({'combined': '1;2;3'}), Scenario({'combined': '4;5;6'})])
         """
-        return self._concatenate(fields, output_type="string", separator=separator, new_field_name=new_field_name)
+        return self._concatenate(
+            fields,
+            output_type="string",
+            separator=separator,
+            new_field_name=new_field_name,
+        )
-    def concatenate_to_list(self, fields: List[str], new_field_name: Optional[str] = None) -> ScenarioList:
+    def concatenate_to_list(
+        self, fields: List[str], new_field_name: Optional[str] = None
+    ) -> ScenarioList:
         """Concatenate specified fields into a single list field.
         :param fields: The fields to concatenate.
@@ -641,9 +720,13 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             >>> s.concatenate_to_list(['a', 'b', 'c'], new_field_name='values')
             ScenarioList([Scenario({'values': [1, 2, 3]}), Scenario({'values': [4, 5, 6]})])
         """
-        return self._concatenate(fields, output_type="list", new_field_name=new_field_name)
+        return self._concatenate(
+            fields, output_type="list", new_field_name=new_field_name
+        )
-    def concatenate_to_set(self, fields: List[str], new_field_name: Optional[str] = None) -> ScenarioList:
+    def concatenate_to_set(
+        self, fields: List[str], new_field_name: Optional[str] = None
+    ) -> ScenarioList:
         """Concatenate specified fields into a single set field.
         :param fields: The fields to concatenate.
@@ -659,7 +742,9 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             >>> s.concatenate_to_set(['a', 'b', 'c'], new_field_name='unique_values')
             ScenarioList([Scenario({'unique_values': {1, 2, 3}}), Scenario({'unique_values': {4, 5, 6}})])
         """
-        return self._concatenate(fields, output_type="set", new_field_name=new_field_name)
+        return self._concatenate(
+            fields, output_type="set", new_field_name=new_field_name
+        )
     def unpack_dict(
         self, field: str, prefix: Optional[str] = None, drop_field: bool = False
@@ -773,10 +858,10 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         def get_sort_key(scenario: Any) -> tuple:
             return tuple(scenario[field] for field in fields)
-        return ScenarioList(sorted(self, key=get_sort_key, reverse=reverse))
+        return ScenarioList(sorted(self.data, key=get_sort_key, reverse=reverse))
     def duplicate(self) -> ScenarioList:
-        """Return a copy of the ScenarioList.
+        """Return a copy of the ScenarioList using streaming to avoid loading everything into memory.
         >>> sl = ScenarioList.example()
         >>> sl_copy = sl.duplicate()
@@ -785,8 +870,30 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         >>> sl is sl_copy
         False
         """
-        return ScenarioList([scenario.copy() for scenario in self])
+        new_list = ScenarioList()
+        for scenario in self.data:
+            new_list.append(scenario.copy())
+        return new_list
+    def __iter__(self):
+        """Iterate over scenarios using streaming."""
+        return iter(self.data)
+    def equals(self, other: Any) -> bool:
+        """Memory-efficient comparison of two ScenarioLists."""
+        if not isinstance(other, ScenarioList):
+            return False
+        if len(self) != len(other):
+            return False
+        if self.codebook != other.codebook:
+            return False
+        return self.data == other.data
+    def __eq__(self, other: Any) -> bool:
+        """Use memory-efficient comparison by default."""
+        return self.equals(other)
+    @memory_profile
     def filter(self, expression: str) -> ScenarioList:
         """
         Filter a list of scenarios based on an expression.
@@ -799,36 +906,62 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         >>> s.filter("b == 2")
         ScenarioList([Scenario({'a': 1, 'b': 2})])
         """
-        sl = self.duplicate()
-        base_keys = set(self[0].keys())
-        keys = set()
-        for scenario in sl:
-            keys.update(scenario.keys())
-        if keys != base_keys:
-            import warnings
-            warnings.warn(
-                "Ragged ScenarioList detected (different keys for different scenario entries). This may cause unexpected behavior."
-            )
+        # Get first item to check keys if available
+        try:
+            first_item = self[0] if len(self) > 0 else None
+            if first_item:
+                # Check for ragged keys by examining a sample of scenarios
+                # rather than iterating through all of them
+                sample_size = min(len(self), 100)  # Check at most 100 scenarios
+                base_keys = set(first_item.keys())
+                keys = set()
+                # Use a counter to check only the sample_size
+                count = 0
+                for scenario in self:
+                    keys.update(scenario.keys())
+                    count += 1
+                    if count >= sample_size:
+                        break
+                if keys != base_keys:
+                    import warnings
+                    warnings.warn(
+                        "Ragged ScenarioList detected (different keys for different scenario entries). This may cause unexpected behavior."
+                    )
+        except IndexError:
+            pass
+        # Create new ScenarioList with filtered data
+        new_sl = ScenarioList(data=[], codebook=self.codebook)
         def create_evaluator(scenario: Scenario):
-            """Create an evaluator for the given result.
-            The 'combined_dict' is a mapping of all values for that Result object.
-            """
+            """Create an evaluator for the given scenario."""
             return EvalWithCompoundTypes(names=scenario)
         try:
-            # iterates through all the results and evaluates the expression
-            new_data = []
-            for scenario in sl:
+            # Process one scenario at a time to minimize memory usage
+            for scenario in self:
+                # Check if scenario matches the filter expression
                 if create_evaluator(scenario).eval(expression):
-                    new_data.append(scenario)
+                    # Create a copy and immediately append to the new list
+                    scenario_copy = scenario.copy()
+                    new_sl.append(scenario_copy)
+                    # Remove reference to allow for garbage collection
+                    del scenario_copy
         except NameNotDefined as e:
-            available_fields = ", ".join(self.data[0].keys() if self.data else [])
+            # Get available fields for error message
+            try:
+                first_item = self[0] if len(self) > 0 else None
+                available_fields = ", ".join(first_item.keys() if first_item else [])
+            except:
+                available_fields = "unknown"
             raise ScenarioError(
                 f"Error in filter: '{e}'\n"
                 f"The expression '{expression}' refers to a field that does not exist.\n"
-                f"Scenario: {scenario}\n"
                 f"Available fields: {available_fields}\n"
                 "Check your filter expression or consult the documentation: "
                 "https://docs.expectedparrot.com/en/latest/scenarios.html#module-edsl.scenarios.Scenario"
@@ -836,18 +969,24 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         except Exception as e:
             raise ScenarioError(f"Error in filter. Exception:{e}")
-        return ScenarioList(new_data)
+        return new_sl
-    def from_urls(
-        self, urls: list[str], field_name: Optional[str] = "text"
-    ) -> ScenarioList:
-        """Create a ScenarioList from a list of URLs.
-        :param urls: A list of URLs.
-        :param field_name: The name of the field to store the text from the URLs.
+    @classmethod
+    def from_urls(cls, urls: list[str], field_name: Optional[str] = "text") -> ScenarioList:
+        from .scenario_source import URLSource
+        return URLSource(urls, field_name).to_scenario_list()
+    @classmethod
+    def from_list(cls, field_name: str, values: list, use_indexes: bool = False) -> ScenarioList:
+        """Create a ScenarioList from a list of values with a specified field name.
+        >>> ScenarioList.from_list('text', ['a', 'b', 'c'])
+        ScenarioList([Scenario({'text': 'a'}), Scenario({'text': 'b'}), Scenario({'text': 'c'})])
         """
-        return ScenarioList([Scenario.from_url(url, field_name) for url in urls])
+        from .scenario_source import ListSource
+        return ListSource(field_name, values, use_indexes).to_scenario_list()
     def select(self, *fields: str) -> ScenarioList:
         """
@@ -874,8 +1013,10 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         >>> s.drop('a')
         ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
         """
-        sl = self.duplicate()
-        return ScenarioList([scenario.drop(fields) for scenario in sl])
+        new_sl = ScenarioList(data=[], codebook=self.codebook)
+        for scenario in self:
+            new_sl.append(scenario.drop(fields))
+        return new_sl
     def keep(self, *fields: str) -> ScenarioList:
         """Keep only the specified fields in the scenarios.
@@ -888,8 +1029,10 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         >>> s.keep('a')
         ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
         """
-        sl = self.duplicate()
-        return ScenarioList([scenario.keep(fields) for scenario in sl])
+        new_sl = ScenarioList(data=[], codebook=self.codebook)
+        for scenario in self:
+            new_sl.append(scenario.keep(fields))
+        return new_sl
     @classmethod
     def from_directory(
@@ -899,12 +1042,12 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         key_name: str = "content",
     ) -> "ScenarioList":
         """Create a ScenarioList of Scenario objects from files in a directory.
         This method scans a directory and creates a Scenario object for each file found,
         where each Scenario contains a FileStore object under the specified key.
         Optionally filters files based on a wildcard pattern. If no path is provided,
         the current working directory is used.
         Args:
             path: The directory path to scan, optionally including a wildcard pattern.
                  If None, uses the current working directory.
@@ -914,124 +1057,84 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
                  - "*.txt" - scans only text files in the current working directory
             recursive: Whether to scan subdirectories recursively. Defaults to False.
             key_name: The key to use for the FileStore object in each Scenario. Defaults to "content".
         Returns:
             A ScenarioList containing Scenario objects for all matching files, where each Scenario
             has a FileStore object under the specified key.
         Raises:
             FileNotFoundError: If the specified directory does not exist.
         Examples:
             # Get all files in the current directory with default key "content"
             sl = ScenarioList.from_directory()
             # Get all Python files in a specific directory with custom key "python_file"
             sl = ScenarioList.from_directory('*.py', key_name="python_file")
             # Get all image files in the current directory
             sl = ScenarioList.from_directory('*.png', key_name="image")
             # Get all files recursively including subdirectories
             sl = ScenarioList.from_directory(recursive=True, key_name="document")
         """
-        # Handle default case - use current directory
-        if path is None:
-            directory_path = os.getcwd()
-            pattern = None
-        else:
-            # Special handling for "**" pattern which indicates recursive scanning
-            has_recursive_pattern = '**' in path if path else False
-            # Check if path contains any wildcard
-            if path and ('*' in path):
-                # Handle "**/*.ext" pattern - find the directory part before the **
-                if has_recursive_pattern:
-                    # Extract the base directory by finding the part before **
-                    parts = path.split('**')
-                    if parts and parts[0]:
-                        # Remove trailing slash if any
-                        directory_path = parts[0].rstrip('/')
-                        if not directory_path:
-                            directory_path = os.getcwd()
-                        # Get the pattern after **
-                        pattern = parts[1] if len(parts) > 1 else None
-                        if pattern and pattern.startswith('/'):
-                            pattern = pattern[1:]  # Remove leading slash
-                    else:
-                        directory_path = os.getcwd()
-                        pattern = None
-                # Handle case where path is just a pattern (e.g., "*.py")
-                elif os.path.dirname(path) == '':
-                    directory_path = os.getcwd()
-                    pattern = os.path.basename(path)
-                else:
-                    # Split into directory and pattern
-                    directory_path = os.path.dirname(path)
-                    if not directory_path:
-                        directory_path = os.getcwd()
-                    pattern = os.path.basename(path)
-            else:
-                # Path is a directory with no pattern
-                directory_path = path
-                pattern = None
-        # Ensure directory exists
-        if not os.path.isdir(directory_path):
-            from .exceptions import FileNotFoundScenarioError
-            raise FileNotFoundScenarioError(f"Directory not found: {directory_path}")
-        # Create a DirectoryScanner for the directory
-        scanner = DirectoryScanner(directory_path)
-        # Configure wildcard pattern filtering
-        suffix_allow_list = None
-        example_suffix = None
-        if pattern:
-            if pattern.startswith('*.'):
-                # Simple extension filter (e.g., "*.py")
-                suffix_allow_list = [pattern[2:]]
-            elif '*' in pattern:
-                # Other wildcard patterns
-                example_suffix = pattern
-            else:
-                # Handle simple non-wildcard pattern (exact match)
-                example_suffix = pattern
+        import warnings
+        warnings.warn(
+            "from_directory is deprecated. Use ScenarioSource.from_source('directory', ...) instead.",
+            DeprecationWarning,
+            stacklevel=2
+        )
+        from .scenario_source import DirectorySource
-        # Use scanner to find files and create FileStore objects
-        file_stores = scanner.scan(
-            factory=lambda path: FileStore(path),
+        source = DirectorySource(
+            directory=path or os.getcwd(),
+            pattern="*",
             recursive=recursive,
-            suffix_allow_list=suffix_allow_list,
-            example_suffix=example_suffix
+            metadata=True
         )
-        # Convert FileStore objects to Scenario objects with the specified key
-        scenarios = [Scenario({key_name: file_store}) for file_store in file_stores]
+        # Get the ScenarioList with FileStore objects under "file" key
+        sl = source.to_scenario_list()
-        return cls(scenarios)
-    @classmethod
-    def from_list(
-        cls, name: str, values: list, func: Optional[Callable] = None
-    ) -> ScenarioList:
-        """Create a ScenarioList from a list of values.
+        # If the requested key is different from the default "file" key used by DirectoryScanner.scan_directory,
+        # rename the keys in all scenarios
+        if key_name != "file":
+            # Create a new ScenarioList
+            result = ScenarioList([])
+            for scenario in sl:
+                # Create a new scenario with the file under the specified key
+                new_data = {key_name: scenario["file"]}
+                # Add all other fields from the original scenario
+                for k, v in scenario.items():
+                    if k != "file":
+                        new_data[k] = v
+                result.append(Scenario(new_data))
+            return result
+        return sl
-        :param name: The name of the field.
-        :param values: The list of values.
-        :param func: An optional function to apply to the values.
+    # @classmethod
+    # def from_list(
+    #     cls, name: str, values: list, func: Optional[Callable] = None
+    # ) -> ScenarioList:
+    #     """Create a ScenarioList from a list of values.
-        Example:
+    #     :param name: The name of the field.
+    #     :param values: The list of values.
+    #     :param func: An optional function to apply to the values.
-        >>> ScenarioList.from_list('name', ['Alice', 'Bob'])
-        ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
-        """
-        if not func:
-            def identity(x):
-                return x
-            func = identity
-        return cls([Scenario({name: func(value)}) for value in values])
+    #     Example:
+    #     >>> ScenarioList.from_list('name', ['Alice', 'Bob'])
+    #     ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
+    #     """
+    #     if not func:
+    #         def identity(x):
+    #             return x
+    #         func = identity
+    #     return cls([Scenario({name: func(value)}) for value in values])
     def table(
         self,
@@ -1041,7 +1144,6 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
     ) -> str:
         """Return the ScenarioList as a table."""
         if tablefmt is not None and tablefmt not in tabulate_formats:
             raise ValueError(
                 f"Invalid table format: {tablefmt}",
@@ -1084,11 +1186,11 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         """
         assert set(new_order) == set(self.parameters)
-        new_scenarios = []
+        new_sl = ScenarioList(data=[], codebook=self.codebook)
         for scenario in self:
             new_scenario = Scenario({key: scenario[key] for key in new_order})
-            new_scenarios.append(new_scenario)
-        return ScenarioList(new_scenarios)
+            new_sl.append(new_scenario)
+        return new_sl
     def to_dataset(self) -> "Dataset":
         """
@@ -1128,7 +1230,7 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         """
         new_names = new_names or [f"{field}_{i}" for i in range(len(self[0][field]))]
-        new_scenarios = []
+        new_sl = ScenarioList(data=[], codebook=self.codebook)
         for scenario in self:
             new_scenario = scenario.copy()
             if len(new_names) == 1:
@@ -1139,15 +1241,25 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             if not keep_original:
                 del new_scenario[field]
-            new_scenarios.append(new_scenario)
-        return ScenarioList(new_scenarios)
+            new_sl.append(new_scenario)
+        return new_sl
     @classmethod
-    def from_list_of_tuples(self, *names: str, values: List[tuple]) -> ScenarioList:
-        sl = ScenarioList.from_list(names[0], [value[0] for value in values])
-        for index, name in enumerate(names[1:]):
-            sl = sl.add_list(name, [value[index + 1] for value in values])
-        return sl
+    @deprecated_classmethod("ScenarioSource.from_source('list_of_tuples', ...)")
+    def from_list_of_tuples(cls, field_names: list[str], values: list[tuple], use_indexes: bool = False) -> ScenarioList:
+        """Create a ScenarioList from a list of tuples with specified field names.
+        Args:
+            field_names: A list of field names for the tuples
+            values: A list of tuples with values matching the field_names
+            use_indexes: Whether to add an index field to each scenario
+        Returns:
+            A ScenarioList containing the data from the tuples
+        """
+        from .scenario_source import TuplesSource
+        source = TuplesSource(field_names, values, use_indexes)
+        return source.to_scenario_list()
     def add_list(self, name: str, values: List[Any]) -> ScenarioList:
         """Add a list of values to a ScenarioList.
@@ -1158,19 +1270,25 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         >>> s.add_list('age', [30, 25])
         ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
         """
-        sl = self.duplicate()
-        if len(values) != len(sl):
+        #sl = self.duplicate()
+        if len(values) != len(self.data):
             raise ScenarioError(
-                f"Length of values ({len(values)}) does not match length of ScenarioList ({len(sl)})"
+                f"Length of values ({len(values)}) does not match length of ScenarioList ({len(self)})"
             )
+        new_sl = ScenarioList(data=[], codebook=self.codebook)
         for i, value in enumerate(values):
-            sl[i][name] = value
-        return sl
+            scenario = self.data[i]
+            scenario[name] = value
+            new_sl.append(scenario)
+        return new_sl
     @classmethod
     def create_empty_scenario_list(cls, n: int) -> ScenarioList:
         """Create an empty ScenarioList with n scenarios.
+        Args:
+            n: The number of empty scenarios to create
         Example:
         >>> ScenarioList.create_empty_scenario_list(3)
@@ -1187,11 +1305,12 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         >>> s.add_value('age', 30)
         ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 30})])
         """
-        sl = self.duplicate()
-        for scenario in sl:
+        new_sl = ScenarioList(data=[], codebook=self.codebook)
+        for scenario in self:
             scenario[name] = value
-        return sl
+            new_sl.append(scenario)
+        return new_sl
     def rename(self, replacement_dict: dict) -> ScenarioList:
         """Rename the fields in the scenarios.
@@ -1204,13 +1323,11 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
         """
-        new_list = ScenarioList([])
-        for obj in self:
-            new_obj = obj.rename(replacement_dict)
-            new_list.append(new_obj)
-        return new_list
+        new_sl = ScenarioList(data = [], codebook=self.codebook)
+        for scenario in self:
+            new_scenario = scenario.rename(replacement_dict)
+            new_sl.append(new_scenario)
+        return new_sl
     def replace_names(self, new_names: list) -> ScenarioList:
         """Replace the field names in the scenarios with a new list of names.
@@ -1225,7 +1342,7 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         """
         if not self:
             return ScenarioList([])
         if len(new_names) != len(self[0].keys()):
             raise ScenarioError(
                 f"Length of new names ({len(new_names)}) does not match number of fields ({len(self[0].keys())})"
@@ -1253,72 +1370,71 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
     #     return new_list
     @classmethod
-    def from_sqlite(cls, filepath: str, table: Optional[str] = None, sql_query: Optional[str] = None):
+    @deprecated_classmethod("ScenarioSource.from_source('sqlite', ...)")
+    def from_sqlite(
+        cls, filepath: str, table: Optional[str] = None, sql_query: Optional[str] = None
+    ):
         """Create a ScenarioList from a SQLite database.
         Args:
             filepath (str): Path to the SQLite database file
             table (Optional[str]): Name of table to query. If None, sql_query must be provided.
             sql_query (Optional[str]): SQL query to execute. Used if table is None.
         Returns:
             ScenarioList: List of scenarios created from database rows
         Raises:
             ValueError: If both table and sql_query are None
             sqlite3.Error: If there is an error executing the database query
         """
-        import sqlite3
+        from .scenario_source import SQLiteSource
+        # Handle the case where sql_query is provided instead of table
         if table is None and sql_query is None:
             from .exceptions import ValueScenarioError
             raise ValueScenarioError("Either table or sql_query must be provided")
+        if table is None:
+            # We need to use the old implementation for SQL queries
+            import sqlite3
-        try:
-            with sqlite3.connect(filepath) as conn:
-                cursor = conn.cursor()
-                if table is not None:
-                    cursor.execute(f"SELECT * FROM {table}")
-                else:
+            try:
+                with sqlite3.connect(filepath) as conn:
+                    cursor = conn.cursor()
                     cursor.execute(sql_query)
-                columns = [description[0] for description in cursor.description]
-                data = cursor.fetchall()
-            return cls([Scenario(dict(zip(columns, row))) for row in data])
-        except sqlite3.Error as e:
-            raise sqlite3.Error(f"Database error occurred: {str(e)}")
+                    columns = [description[0] for description in cursor.description]
+                    data = cursor.fetchall()
-    @classmethod
-    def from_latex(cls, tex_file_path: str):
-        with open(tex_file_path, "r") as file:
-            lines = file.readlines()
+                return cls([Scenario(dict(zip(columns, row))) for row in data])
-        processed_lines = []
-        non_blank_lines = [
-            (i, line.strip()) for i, line in enumerate(lines) if line.strip()
-        ]
+            except sqlite3.Error as e:
+                raise sqlite3.Error(f"Database error occurred: {str(e)}")
+        else:
+            # If a table is specified, use SQLiteSource
+            source = SQLiteSource(filepath, table)
+            return source.to_scenario_list()
-        for index, (line_no, text) in enumerate(non_blank_lines):
-            entry = {
-                "line_no": line_no + 1,  # Using 1-based index for line numbers
-                "text": text,
-                "num_words": len(text.split()),
-                "num_chars": len(text),
-                "line_before": non_blank_lines[index - 1][1] if index > 0 else None,
-                "line_after": (
-                    non_blank_lines[index + 1][1]
-                    if index < len(non_blank_lines) - 1
-                    else None
-                ),
-            }
-            processed_lines.append(entry)
-        return ScenarioList([Scenario(entry) for entry in processed_lines])
+    @classmethod
+    @deprecated_classmethod("ScenarioSource.from_source('latex', ...)")
+    def from_latex(cls, tex_file_path: str, table_index: int = 0, has_header: bool = True):
+        """Create a ScenarioList from a LaTeX file.
+        Args:
+            tex_file_path: The path to the LaTeX file.
+            table_index: The index of the table to extract (if multiple tables exist).
+                Default is 0 (first table).
+            has_header: Whether the table has a header row. Default is True.
+        Returns:
+            ScenarioList: A new ScenarioList containing the data from the LaTeX table.
+        """
+        from .scenario_source import LaTeXSource
+        source = LaTeXSource(tex_file_path, table_index, has_header)
+        return source.to_scenario_list()
     @classmethod
+    @deprecated_classmethod("ScenarioSource.from_source('google_doc', ...)")
     def from_google_doc(cls, url: str) -> ScenarioList:
         """Create a ScenarioList from a Google Doc.
@@ -1332,30 +1448,12 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             ScenarioList: An instance of the ScenarioList class.
         """
-        import tempfile
-        import requests
-        if "/edit" in url:
-            doc_id = url.split("/d/")[1].split("/edit")[0]
-        else:
-            from .exceptions import ValueScenarioError
-            raise ValueScenarioError("Invalid Google Doc URL format.")
-        export_url = f"https://docs.google.com/document/d/{doc_id}/export?format=docx"
-        # Download the Google Doc as a Word file (.docx)
-        response = requests.get(export_url)
-        response.raise_for_status()  # Ensure the request was successful
-        # Save the Word file to a temporary file
-        with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as temp_file:
-            temp_file.write(response.content)
-            temp_filename = temp_file.name
-        # Call the from_docx class method with the temporary file
-        return cls.from_docx(temp_filename)
+        from .scenario_source import GoogleDocSource
+        source = GoogleDocSource(url)
+        return source.to_scenario_list()
     @classmethod
+    @deprecated_classmethod("ScenarioSource.from_source('pandas', ...)")
     def from_pandas(cls, df) -> ScenarioList:
         """Create a ScenarioList from a pandas DataFrame.
@@ -1366,105 +1464,48 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         >>> ScenarioList.from_pandas(df)
         ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
         """
-        return cls([Scenario(row) for row in df.to_dict(orient="records")])
+        from .scenario_source import PandasSource
+        source = PandasSource(df)
+        return source.to_scenario_list()
     @classmethod
+    @deprecated_classmethod("ScenarioSource.from_source('dta', ...)")
     def from_dta(cls, filepath: str, include_metadata: bool = True) -> ScenarioList:
         """Create a ScenarioList from a Stata file.
         Args:
             filepath (str): Path to the Stata (.dta) file
             include_metadata (bool): If True, extract and preserve variable labels and value labels
                                     as additional metadata in the ScenarioList
         Returns:
             ScenarioList: A ScenarioList containing the data from the Stata file
         """
-        import pandas as pd
-        # Read the Stata file with pandas
-        df = pd.read_stata(filepath)
-        # Create the basic ScenarioList
-        scenario_list = cls.from_pandas(df)
-        # Extract and preserve metadata if requested
-        if include_metadata:
-            # Get variable labels (if any)
-            variable_labels = {}
-            if hasattr(df, 'variable_labels') and df.variable_labels:
-                variable_labels = df.variable_labels
-            # Get value labels (if any)
-            value_labels = {}
-            if hasattr(df, 'value_labels') and df.value_labels:
-                value_labels = df.value_labels
-            # Store the metadata in the ScenarioList's codebook
-            if variable_labels or value_labels:
-                scenario_list.codebook = {
-                    'variable_labels': variable_labels,
-                    'value_labels': value_labels
-                }
-        return scenario_list
+        from .scenario_source import StataSource
+        source = StataSource(filepath, include_metadata)
+        return source.to_scenario_list()
     @classmethod
-    def from_wikipedia(cls, url: str, table_index: int = 0):
+    @deprecated_classmethod("ScenarioSource.from_source('wikipedia', ...)")
+    def from_wikipedia(cls, url: str, table_index: int = 0, header: bool = True):
         """
         Extracts a table from a Wikipedia page.
         Parameters:
             url (str): The URL of the Wikipedia page.
             table_index (int): The index of the table to extract (default is 0).
+            header (bool): Whether the table has a header row (default is True).
         Returns:
-            pd.DataFrame: A DataFrame containing the extracted table.
-        # # Example usage
-        # url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
-        # df = from_wikipedia(url, 0)
-        # if not df.empty:
-        #     print(df.head())
-        # else:
-        #     print("Failed to extract table.")
+            ScenarioList: A ScenarioList containing data from the Wikipedia table.
+        Example usage:
+            url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
+            scenarios = ScenarioList.from_wikipedia(url, 0)
         """
-        import pandas as pd
-        import requests
-        from requests.exceptions import RequestException
-        try:
-            # Check if the URL is reachable
-            response = requests.get(url)
-            response.raise_for_status()  # Raises HTTPError for bad responses
-            # Extract tables from the Wikipedia page
-            tables = pd.read_html(url)
-            # Ensure the requested table index is within the range of available tables
-            if table_index >= len(tables) or table_index < 0:
-                raise IndexError(
-                    f"Table index {table_index} is out of range. This page has {len(tables)} table(s)."
-                )
-            # Return the requested table as a DataFrame
-            # return tables[table_index]
-            return cls.from_pandas(tables[table_index])
-        except RequestException as e:
-            print(f"Error fetching the URL: {e}")
-        except ValueError as e:
-            print(f"Error parsing tables: {e}")
-        except IndexError as e:
-            print(e)
-        except Exception as e:
-            print(f"An unexpected error occurred: {e}")
-        # Return an empty DataFrame in case of an error
-        # return cls.from_pandas(pd.DataFrame())
+        from .scenario_source import WikipediaSource
+        source = WikipediaSource(url, table_index, header)
+        return source.to_scenario_list()
     def to_key_value(self, field: str, value=None) -> Union[dict, set]:
         """Return the set of values in the field.
@@ -1484,8 +1525,14 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             return {scenario[field]: scenario[value] for scenario in self}
     @classmethod
+    @deprecated_classmethod("ScenarioSource.from_source('excel', ...)")
     def from_excel(
-        cls, filename: str, sheet_name: Optional[str] = None, skip_rows: Optional[List[int]] = None, use_codebook: bool = False
+        cls,
+        filename: str,
+        sheet_name: Optional[str] = None,
+        skip_rows: Optional[List[int]] = None,
+        use_codebook: bool = False,
+        **kwargs
     ) -> ScenarioList:
         """Create a ScenarioList from an Excel file.
@@ -1497,6 +1544,8 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             sheet_name (Optional[str]): Name of the sheet to load. If None and multiple sheets exist,
                                       will raise an error listing available sheets.
             skip_rows (Optional[List[int]]): List of row indices to skip (0-based). If None, all rows are included.
+            use_codebook (bool): If True, rename columns to standard format and store original names in codebook.
+            **kwargs: Additional parameters to pass to pandas.read_excel.
         Example:
@@ -1531,52 +1580,21 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         >>> scenario_list[1]['name']
         'Charlie'
         """
-        from .scenario import Scenario
-        import pandas as pd
-        # Get all sheets
-        all_sheets = pd.read_excel(filename, sheet_name=None)
-        # If no sheet_name is provided and there is more than one sheet, print available sheets
-        if sheet_name is None:
-            if len(all_sheets) > 1:
-                print("The Excel file contains multiple sheets:")
-                for name in all_sheets.keys():
-                    print(f"- {name}")
-                from .exceptions import ValueScenarioError
-                raise ValueScenarioError("Please provide a sheet name to load data from.")
-            else:
-                # If there is only one sheet, use it
-                sheet_name = list(all_sheets.keys())[0]
-        # Load the specified or determined sheet
-        df = pd.read_excel(filename, sheet_name=sheet_name)
-        # Skip specified rows if any
-        if skip_rows:
-            df = df.drop(skip_rows)
-            # Reset index to ensure continuous indexing
-            df = df.reset_index(drop=True)
-        if use_codebook:
-            codebook = {f"col_{i}": col for i, col in enumerate(df.columns)}
-            koobedoc = {col:f"col_{i}" for i, col in enumerate(df.columns)}
-        observations = []
-        for _, row in df.iterrows():
-            if use_codebook:
-                observations.append(Scenario({koobedoc.get(k):v for k,v in row.to_dict().items()}))
-            else:
-                observations.append(Scenario(row.to_dict()))
-        if use_codebook:
-            return cls(observations, codebook=codebook)
-        else:
-            return cls(observations)
+        from .scenario_source import ExcelSource
+        source = ExcelSource(
+            file_path=filename,
+            sheet_name=sheet_name,
+            skip_rows=skip_rows,
+            use_codebook=use_codebook,
+            **kwargs
+        )
+        return source.to_scenario_list()
     @classmethod
-    def from_google_sheet(cls, url: str, sheet_name: str = None, column_names: Optional[List[str]]= None) -> ScenarioList:
+    @deprecated_classmethod("ScenarioSource.from_source('google_sheet', ...)")
+    def from_google_sheet(
+        cls, url: str, sheet_name: str = None, column_names: Optional[List[str]] = None, **kwargs
+    ) -> ScenarioList:
         """Create a ScenarioList from a Google Sheet.
         This method downloads the Google Sheet as an Excel file, saves it to a temporary file,
@@ -1588,126 +1606,111 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
                                         the same as from_excel regarding multiple sheets.
             column_names (List[str], optional): If provided, use these names for the columns instead
                                               of the default column names from the sheet.
+            **kwargs: Additional parameters to pass to pandas.read_excel.
         Returns:
             ScenarioList: An instance of the ScenarioList class.
         """
-        import tempfile
-        import requests
-        if "/edit" in url:
-            sheet_id = url.split("/d/")[1].split("/edit")[0]
-        else:
-            from .exceptions import ValueScenarioError
-            raise ValueScenarioError("Invalid Google Sheet URL format.")
-        export_url = (
-            f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
-        )
-        # Download the Google Sheet as an Excel file
-        response = requests.get(export_url)
-        response.raise_for_status()  # Ensure the request was successful
-        # Save the Excel file to a temporary file
-        with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
-            temp_file.write(response.content)
-            temp_filename = temp_file.name
-        # First create the ScenarioList with default column names
-        scenario_list = cls.from_excel(temp_filename, sheet_name=sheet_name)
-        # If column_names is provided, create a new ScenarioList with the specified names
-        if column_names is not None:
-            if len(column_names) != len(scenario_list[0].keys()):
-                raise ValueError(
-                    f"Number of provided column names ({len(column_names)}) "
-                    f"does not match number of columns in sheet ({len(scenario_list[0].keys())})"
-                )
-            # Create a codebook mapping original keys to new names
-            original_keys = list(scenario_list[0].keys())
-            codebook = dict(zip(original_keys, column_names))
-            # Return new ScenarioList with renamed columns
-            return scenario_list.rename(codebook)
-        else:
-            return scenario_list
+        from .scenario_source import GoogleSheetSource
+        source = GoogleSheetSource(url, sheet_name=sheet_name, column_names=column_names, **kwargs)
+        return source.to_scenario_list()
     @classmethod
+    @deprecated_classmethod("ScenarioSource.from_source('delimited_file', ...)")
     def from_delimited_file(
-        cls, source: Union[str, "ParseResult"], delimiter: str = ","
+        cls, source: Union[str, "ParseResult"], delimiter: str = ",", encoding: str = "utf-8", **kwargs
     ) -> ScenarioList:
-        """Create a ScenarioList from a delimited file (CSV/TSV) or URL."""
-        import requests
-        from .scenario import Scenario
-        from urllib.parse import urlparse
+        """Create a ScenarioList from a delimited file (CSV/TSV) or URL.
+        Args:
+            source: Path to a local file or URL to a remote file.
+            delimiter: The delimiter character used in the file (default is ',').
+            encoding: The file encoding to use (default is 'utf-8').
+            **kwargs: Additional parameters for csv reader.
+        Returns:
+            ScenarioList: An instance of the ScenarioList class.
+        """
+        from .scenario_source import DelimitedFileSource
         from urllib.parse import ParseResult
-        headers = {
-            "Accept": "text/csv,application/csv,text/plain",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
-        }
-        def is_url(source):
-            try:
-                result = urlparse(source)
-                return all([result.scheme, result.netloc])
-            except ValueError:
-                return False
-        try:
-            if isinstance(source, str) and is_url(source):
-                response = requests.get(source, headers=headers)
-                response.raise_for_status()
-                file_obj = StringIO(response.text)
-            elif isinstance(source, ParseResult):
-                response = requests.get(source.geturl(), headers=headers)
-                response.raise_for_status()
-                file_obj = StringIO(response.text)
-            else:
-                # Try different encodings if the default fails
-                encodings_to_try = ["utf-8", "latin-1", "cp1252", "ISO-8859-1"]
-                last_exception = None
-                file_obj = None
-                for encoding in encodings_to_try:
-                    try:
-                        file_obj = open(source, "r", encoding=encoding)
-                        # Test reading a bit to verify encoding
-                        file_obj.readline()
-                        file_obj.seek(0)  # Reset file position
-                        break
-                    except UnicodeDecodeError as e:
-                        last_exception = e
-                        if file_obj:
-                            file_obj.close()
-                            file_obj = None
-                if file_obj is None:
-                    from .exceptions import ValueScenarioError
-                    raise ValueScenarioError(f"Could not decode file {source} with any of the attempted encodings. Original error: {last_exception}")
-            reader = csv.reader(file_obj, delimiter=delimiter)
-            try:
-                header = next(reader)
-                observations = [Scenario(dict(zip(header, row))) for row in reader]
-            except StopIteration:
-                from .exceptions import ValueScenarioError
-                raise ValueScenarioError(f"File {source} appears to be empty or has an invalid format")
-        finally:
-            if file_obj:
-                file_obj.close()
-        return cls(observations)
+        if isinstance(source, ParseResult):
+            # Convert ParseResult to string URL
+            file_or_url = source.geturl()
+        else:
+            file_or_url = source
+        source = DelimitedFileSource(
+            file_or_url=file_or_url,
+            delimiter=delimiter,
+            encoding=encoding,
+            **kwargs
+        )
+        return source.to_scenario_list()
     # Convenience methods for specific file types
     @classmethod
-    def from_csv(cls, source: Union[str, "ParseResult"]) -> ScenarioList:
-        """Create a ScenarioList from a CSV file or URL."""
-        return cls.from_delimited_file(source, delimiter=",")
+    @deprecated_classmethod("ScenarioSource.from_source('csv', ...)")
+    def from_csv(cls, source: Union[str, "ParseResult"], has_header: bool = True, encoding: str = "utf-8", **kwargs) -> ScenarioList:
+        """Create a ScenarioList from a CSV file or URL.
+        Args:
+            source: Path to a local file or URL to a remote file.
+            has_header: Whether the file has a header row (default is True).
+            encoding: The file encoding to use (default is 'utf-8').
+            **kwargs: Additional parameters for csv reader.
+        Returns:
+            ScenarioList: An instance of the ScenarioList class.
+        """
+        from .scenario_source import CSVSource
+        from urllib.parse import ParseResult
+        if isinstance(source, ParseResult):
+            # Convert ParseResult to string URL
+            file_or_url = source.geturl()
+        else:
+            file_or_url = source
+        source = CSVSource(
+            file_or_url=file_or_url,
+            has_header=has_header,
+            encoding=encoding,
+            **kwargs
+        )
+        return source.to_scenario_list()
+    @classmethod
+    @deprecated_classmethod("ScenarioSource.from_source('tsv', ...)")
+    def from_tsv(cls, source: Union[str, "ParseResult"], has_header: bool = True, encoding: str = "utf-8", **kwargs) -> ScenarioList:
+        """Create a ScenarioList from a TSV file or URL.
+        Args:
+            source: Path to a local file or URL to a remote file.
+            has_header: Whether the file has a header row (default is True).
+            encoding: The file encoding to use (default is 'utf-8').
+            **kwargs: Additional parameters for csv reader.
+        Returns:
+            ScenarioList: An instance of the ScenarioList class.
+        """
+        from .scenario_source import TSVSource
+        from urllib.parse import ParseResult
+        if isinstance(source, ParseResult):
+            # Convert ParseResult to string URL
+            file_or_url = source.geturl()
+        else:
+            file_or_url = source
+        source = TSVSource(
+            file_or_url=file_or_url,
+            has_header=has_header,
+            encoding=encoding,
+            **kwargs
+        )
+        return source.to_scenario_list()
     def left_join(self, other: ScenarioList, by: Union[str, list[str]]) -> ScenarioList:
         """Perform a left join with another ScenarioList, following SQL join semantics.
@@ -1730,21 +1733,35 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
     @classmethod
     def from_tsv(cls, source: Union[str, "ParseResult"]) -> ScenarioList:
         """Create a ScenarioList from a TSV file or URL."""
-        return cls.from_delimited_file(source, delimiter="\t")
+        from .scenario_source import ScenarioSource
+        # Delegate to ScenarioSource implementation
+        return ScenarioSource._from_tsv(source)
     def to_dict(self, sort: bool = False, add_edsl_version: bool = True) -> dict:
         """
         >>> s = ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood-fired pizza'})])
-        >>> s.to_dict()
+        >>> s.to_dict()  # doctest: +ELLIPSIS
         {'scenarios': [{'food': 'wood chips', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}, {'food': 'wood-fired pizza', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}], 'edsl_version': '...', 'edsl_class_name': 'ScenarioList'}
+        >>> s = ScenarioList([Scenario({'food': 'wood chips'})], codebook={'food': 'description'})
+        >>> d = s.to_dict()
+        >>> 'codebook' in d
+        True
+        >>> d['codebook'] == {'food': 'description'}
+        True
         """
         if sort:
             data = sorted(self, key=lambda x: hash(x))
         else:
             data = self
         d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
+        # Add codebook if it exists
+        if hasattr(self, 'codebook') and self.codebook:
+            d['codebook'] = self.codebook
         if add_edsl_version:
             from .. import __version__
@@ -1758,8 +1775,8 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         :param survey: The Survey object to use for the Jobs object.
         Example:
-        >>> from edsl import Survey, Jobs, ScenarioList
-        >>> isinstance(ScenarioList.example().to(Survey.example()), Jobs)
+        >>> from edsl import Survey, Jobs, ScenarioList  # doctest: +SKIP
+        >>> isinstance(ScenarioList.example().to(Survey.example()), Jobs)  # doctest: +SKIP
         True
         """
         from ..surveys import Survey
@@ -1786,11 +1803,23 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
     @classmethod
     @remove_edsl_version
-    def from_dict(cls, data) -> ScenarioList:
-        """Create a `ScenarioList` from a dictionary."""
+    def from_dict(cls, data: dict) -> ScenarioList:
+        """Create a `ScenarioList` from a dictionary.
+        >>> d = {'scenarios': [{'food': 'wood chips'}], 'codebook': {'food': 'description'}}
+        >>> s = ScenarioList.from_dict(d)
+        >>> s.codebook == {'food': 'description'}
+        True
+        >>> s[0]['food']
+        'wood chips'
+        """
         from .scenario import Scenario
-        return cls([Scenario.from_dict(s) for s in data["scenarios"]])
+        # Extract codebook if it exists
+        codebook = data.get('codebook', None)
+        # Create ScenarioList with scenarios and codebook
+        return cls([Scenario.from_dict(s) for s in data["scenarios"]], codebook=codebook)
     @classmethod
     def from_nested_dict(cls, data: dict) -> ScenarioList:
@@ -1835,62 +1864,80 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         """
         return cls([Scenario.example(randomize), Scenario.example(randomize)])
-    def __getitem__(self, key: Union[int, slice]) -> Any:
-        """Return the item at the given index.
-        Example:
-        >>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
-        >>> s[0]
-        Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})
+    def items(self):
+        """Make this class compatible with dict.items() by accessing first scenario items.
-        >>> s[:1]
-        ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
+        This ensures the class works as a drop-in replacement for UserList in code
+        that expects a dictionary-like interface.
+        Returns:
+            items view from the first scenario object if available, empty list otherwise
         """
-        if isinstance(key, slice):
-            return ScenarioList(super().__getitem__(key))
-        elif isinstance(key, int):
-            return super().__getitem__(key)
-        else:
-            return self.to_dict(add_edsl_version=False)[key]
+        if len(self.data) > 0:
+            return self.data[0].items()
+        return {}.items()
-    def to_agent_list(self):
-        """Convert the ScenarioList to an AgentList.
+    def copy(self):
+        """Create a copy of this ScenarioList.
-        Example:
-        >>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
-        >>> s.to_agent_list()
-        AgentList([Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5}), Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5})])
+        Returns:
+            A new ScenarioList with copies of the same scenarios
         """
-        from ..agents import AgentList, Agent
+        # Get copies of all scenarios
+        if len(self.data) > 0:
+            # If we have at least one scenario, copy the first one
+            if hasattr(self.data[0], "copy"):
+                return self.data[0].copy()
+            # Otherwise try to convert to Scenario
+            from .scenario import Scenario
-        agents = []
-        for scenario in self:
-            new_scenario = scenario.copy().data
-            if "name" in new_scenario:
-                name = new_scenario.pop("name")
-                proposed_agent_name = "agent_name"
-                while proposed_agent_name not in new_scenario:
-                    proposed_agent_name += "_"
-                warnings.warn(
-                    f"The 'name' field is reserved for the agent's name---putting this value in {proposed_agent_name}"
-                )
-                new_scenario[proposed_agent_name] = name
-                new_agent = Agent(traits=new_scenario, name=name)
-            if "agent_parameters" in new_scenario:
-                agent_parameters = new_scenario.pop("agent_parameters")
-                instruction = agent_parameters.get("instruction", None)
-                name = agent_parameters.get("name", None)
-                new_agent = Agent(
-                    traits=new_scenario, name=name, instruction=instruction
-                )
-            else:
-                new_agent = Agent(traits=new_scenario)
+            try:
+                return Scenario(dict(self.data[0]))
+            except (TypeError, ValueError):
+                # Fallback to empty scenario
+                return Scenario({})
-            agents.append(new_agent)
-        return AgentList(agents)
+    def to_agent_list(self):
+        """Convert the ScenarioList to an AgentList.
+        This method supports special fields that map to Agent parameters:
+        - "name": Will be used as the agent's name
+        - "agent_parameters": A dictionary containing:
+            - "instruction": The agent's instruction text
+            - "name": The agent's name (overrides the "name" field if present)
+        Example:
+            >>> from edsl import ScenarioList, Scenario
+            >>> # Basic usage with traits
+            >>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
+            >>> al = s.to_agent_list()
+            >>> al
+            AgentList([Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5})])
+            >>> # Using agent name
+            >>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 22})])
+            >>> al = s.to_agent_list()
+            >>> al[0].name
+            'Alice'
+            >>> # Using agent parameters for instructions
+            >>> s = ScenarioList([Scenario({
+            ...     'age': 22,
+            ...     'agent_parameters': {
+            ...         'instruction': 'You are a helpful assistant',
+            ...         'name': 'Assistant'
+            ...     }
+            ... })])
+            >>> al = s.to_agent_list()
+            >>> al[0].instruction
+            'You are a helpful assistant'
+            >>> al[0].name
+            'Assistant'
+        """
+        from ..agents import AgentList
+        return AgentList.from_scenario_list(self)
     def chunk(
         self,
@@ -1920,7 +1967,9 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             new_scenarios.extend(replacement_scenarios)
         return ScenarioList(new_scenarios)
-    def collapse(self, field: str, separator: Optional[str] = None, add_count: bool = False) -> ScenarioList:
+    def collapse(
+        self, field: str, separator: Optional[str] = None, add_count: bool = False
+    ) -> ScenarioList:
         """Collapse a ScenarioList by grouping on all fields except the specified one,
         collecting the values of the specified field into a list.
@@ -1943,10 +1992,10 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         """
         if not self:
             return ScenarioList([])
         # Determine all fields except the one to collapse
         id_vars = [key for key in self[0].keys() if key != field]
         # Group the scenarios
         grouped = defaultdict(list)
         for scenario in self:
@@ -1954,33 +2003,34 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             key = tuple(scenario[id_var] for id_var in id_vars)
             # Add the value of the field to collapse to the list for this key
             grouped[key].append(scenario[field])
         # Create a new ScenarioList with the collapsed field
-        result = []
+        new_sl = ScenarioList(data = [], codebook=self.codebook)
         for key, values in grouped.items():
             new_scenario = dict(zip(id_vars, key))
             if separator:
-                new_scenario[field] = separator.join(values)
+                new_scenario[field] = separator.join([str(x) for x in values])
             else:
                 new_scenario[field] = values
             if add_count:
-                new_scenario['num_collapsed_rows'] = len(values)
-            result.append(Scenario(new_scenario))
-        return ScenarioList(result)
+                new_scenario["num_collapsed_rows"] = len(values)
+            new_sl.append(Scenario(new_scenario))
+        #return ScenarioList(result)
+        return new_sl
     def create_comparisons(
-        self,
-        bidirectional: bool = False,
+        self,
+        bidirectional: bool = False,
         num_options: int = 2,
         option_prefix: str = "option_",
-        use_alphabet: bool = False
+        use_alphabet: bool = False,
     ) -> ScenarioList:
         """Create a new ScenarioList with comparisons between scenarios.
         Each scenario in the result contains multiple original scenarios as dictionaries,
         allowing for side-by-side comparison.
         Args:
             bidirectional (bool): If True, include both (A,B) and (B,A) comparisons.
                 If False, only include (A,B) where A comes before B in the original list.
@@ -1991,11 +2041,11 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
                 Ignored if use_alphabet is True.
             use_alphabet (bool): If True, use letters as keys (A, B, C, etc.) instead of
                 the option_prefix with numbers.
         Returns:
             ScenarioList: A new ScenarioList where each scenario contains multiple original
                 scenarios as dictionaries.
         Example:
             >>> s = ScenarioList([
             ...     Scenario({'id': 1, 'text': 'Option A'}),
@@ -2009,22 +2059,29 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
         """
         from itertools import combinations, permutations
         import string
         if num_options < 2:
             from .exceptions import ValueScenarioError
             raise ValueScenarioError("num_options must be at least 2")
         if num_options > len(self):
             from .exceptions import ValueScenarioError
-            raise ValueScenarioError(f"num_options ({num_options}) cannot exceed the number of scenarios ({len(self)})")
+            raise ValueScenarioError(
+                f"num_options ({num_options}) cannot exceed the number of scenarios ({len(self)})"
+            )
         if use_alphabet and num_options > 26:
             from .exceptions import ValueScenarioError
-            raise ValueScenarioError("When using alphabet labels, num_options cannot exceed 26 (the number of letters in the English alphabet)")
+            raise ValueScenarioError(
+                "When using alphabet labels, num_options cannot exceed 26 (the number of letters in the English alphabet)"
+            )
         # Convert each scenario to a dictionary
         scenario_dicts = [scenario.to_dict(add_edsl_version=False) for scenario in self]
         # Generate combinations or permutations based on bidirectional flag
         if bidirectional:
             # For bidirectional, use permutations to get all ordered arrangements
@@ -2032,13 +2089,13 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
                 # For pairwise, we can use permutations with r=2
                 scenario_groups = permutations(scenario_dicts, 2)
             else:
-                # For more than 2 options with bidirectional=True,
+                # For more than 2 options with bidirectional=True,
                 # we need all permutations of the specified size
                 scenario_groups = permutations(scenario_dicts, num_options)
         else:
             # For unidirectional, use combinations to get unordered groups
             scenario_groups = combinations(scenario_dicts, num_options)
         # Create new scenarios with the combinations
         result = []
         for group in scenario_groups:
@@ -2052,64 +2109,35 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
                     key = f"{option_prefix}{i+1}"
                 new_scenario[key] = scenario_dict
             result.append(Scenario(new_scenario))
         return ScenarioList(result)
     @classmethod
+    @deprecated_classmethod("ScenarioSource.from_source('parquet', ...)")
     def from_parquet(cls, filepath: str) -> ScenarioList:
         """Create a ScenarioList from a Parquet file.
         Args:
-            filepath (str): Path to the Parquet file
+            filepath (str): The path to the Parquet file.
         Returns:
-            ScenarioList: A ScenarioList containing the data from the Parquet file
-        Example:
-        >>> import pandas as pd
-        >>> import tempfile
-        >>> df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [30, 25]})
-        >>> # The following would create and read a parquet file if dependencies are installed:
-        >>> # with tempfile.NamedTemporaryFile(suffix='.parquet', delete=False) as f:
-        >>> #     df.to_parquet(f.name)
-        >>> #     scenario_list = ScenarioList.from_parquet(f.name)
-        >>> # Instead, we'll demonstrate the equivalent result:
-        >>> scenario_list = ScenarioList.from_pandas(df)
-        >>> len(scenario_list)
-        2
-        >>> scenario_list[0]['name']
-        'Alice'
+            ScenarioList: A new ScenarioList containing the scenarios from the Parquet file.
         """
-        import pandas as pd
-        try:
-            # Try to read the Parquet file with pandas
-            df = pd.read_parquet(filepath)
-        except ImportError as e:
-            # Handle missing dependencies with a helpful error message
-            if "pyarrow" in str(e) or "fastparquet" in str(e):
-                raise ImportError(
-                    "Missing dependencies for Parquet support. Please install either pyarrow or fastparquet:\n"
-                    "  pip install pyarrow\n"
-                    "  or\n"
-                    "  pip install fastparquet"
-                ) from e
-            else:
-                raise
-        # Convert the DataFrame to a ScenarioList
-        return cls.from_pandas(df)
+        from .scenario_source import ParquetSource
+        source = ParquetSource(filepath)
+        return source.to_scenario_list()
-    def replace_values(self, replacements:dict) -> "ScenarioList":
+    def replace_values(self, replacements: dict) -> "ScenarioList":
         """
         Create new scenarios with values replaced according to the provided replacement dictionary.
         Args:
             replacements (dict): Dictionary of values to replace {old_value: new_value}
         Returns:
             ScenarioList: A new ScenarioList with replaced values
         Examples:
             >>> scenarios = ScenarioList([
             ...     Scenario({'a': 'nan', 'b': 1}),
@@ -2122,7 +2150,7 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
             >>> print(scenarios)
             ScenarioList([Scenario({'a': 'nan', 'b': 1}), Scenario({'a': 2, 'b': 'nan'})])
         """
-        new_scenarios = []
+        new_sl = ScenarioList(data=[], codebook=self.codebook)
         for scenario in self:
             new_scenario = {}
             for key, value in scenario.items():
@@ -2130,20 +2158,65 @@ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
                     new_scenario[key] = replacements[str(value)]
                 else:
                     new_scenario[key] = value
-            new_scenarios.append(Scenario(new_scenario))
-        return ScenarioList(new_scenarios)
+            new_sl.append(Scenario(new_scenario))
+        return new_sl
     @classmethod
+    @deprecated_classmethod("ScenarioSource.from_source('pdf', ...)")
     def from_pdf(cls, filename_or_url, collapse_pages=False):
-        return PdfTools.from_pdf(filename_or_url, collapse_pages)
+        """Create a ScenarioList from a PDF file or URL."""
+        from .scenario_source import PDFSource
+        source = PDFSource(
+            file_path=filename_or_url,
+            chunk_type="page" if not collapse_pages else "text",
+            chunk_size=1
+        )
+        return source.to_scenario_list()
     @classmethod
+    @deprecated_classmethod("ScenarioSource.from_source('pdf_to_image', ...)")
     def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
-        return PdfTools.from_pdf_to_image(pdf_path, image_format)
+        """Create a ScenarioList with images extracted from a PDF file."""
+        from .scenario_source import PDFImageSource
+        source = PDFImageSource(
+            file_path=pdf_path,
+            base_width=2000,
+            include_text=True
+        )
+        return source.to_scenario_list()
+    @classmethod
+    def from_source(cls, source_type: str, *args, **kwargs) -> "ScenarioList":
+        """
+        Create a ScenarioList from a specified source type.
+        This method serves as the main entry point for creating ScenarioList objects,
+        providing a unified interface for various data sources.
+        Args:
+            source_type: The type of source to create a ScenarioList from.
+                         Valid values include: 'urls', 'directory', 'csv', 'tsv',
+                         'excel', 'pdf', 'pdf_to_image', and others.
+            *args: Positional arguments to pass to the source-specific method.
+            **kwargs: Keyword arguments to pass to the source-specific method.
+        Returns:
+            A ScenarioList object created from the specified source.
+        Examples:
+            >>> # This is a simplified example for doctest
+            >>> # In real usage, you would provide a path to your CSV file:
+            >>> # sl_csv = ScenarioList.from_source('csv', 'your_data.csv')
+            >>> # Or use other source types like 'directory', 'excel', etc.
+            >>> # Examples of other source types:
+            >>> # sl_dir = ScenarioList.from_source('directory', '/path/to/files')
+        """
+        from .scenario_source import ScenarioSource
+        return ScenarioSource.from_source(source_type, *args, **kwargs)
 if __name__ == "__main__":
     import doctest
-    doctest.testmod(optionflags=doctest.ELLIPSIS)
+    doctest.testmod(optionflags=doctest.ELLIPSIS)

edsl 0.1.54__py3-none-any.whl → 0.1.55__py3-none-any.whl

edsl 0.1.54py3-none-any.whl → 0.1.55py3-none-any.whl