PyPI - edsl - Versions diffs - 0.1.58__py3-none-any.whl → 0.1.59__py3-none-any.whl - Mend

edsl 0.1.58py3-none-any.whl → 0.1.59py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

edsl/__version__.py +1 -1
edsl/agents/agent.py +23 -4
edsl/agents/agent_list.py +36 -6
edsl/coop/coop.py +103 -1
edsl/dataset/dataset.py +74 -0
edsl/dataset/dataset_operations_mixin.py +67 -62
edsl/inference_services/services/test_service.py +1 -1
edsl/interviews/exception_tracking.py +66 -20
edsl/invigilators/invigilators.py +5 -1
edsl/invigilators/prompt_constructor.py +299 -136
edsl/jobs/html_table_job_logger.py +18 -1
edsl/jobs/jobs_pricing_estimation.py +6 -2
edsl/jobs/jobs_remote_inference_logger.py +2 -0
edsl/jobs/remote_inference.py +34 -7
edsl/language_models/language_model.py +39 -2
edsl/prompts/prompt.py +1 -0
edsl/questions/question_list.py +76 -20
edsl/results/results.py +8 -1
edsl/scenarios/file_store.py +8 -12
edsl/scenarios/scenario.py +50 -2
edsl/scenarios/scenario_list.py +34 -12
edsl/surveys/survey.py +4 -0
edsl/tasks/task_history.py +180 -6
edsl/utilities/wikipedia.py +194 -0
{edsl-0.1.58.dist-info → edsl-0.1.59.dist-info}/METADATA +4 -3
{edsl-0.1.58.dist-info → edsl-0.1.59.dist-info}/RECORD +29 -28
{edsl-0.1.58.dist-info → edsl-0.1.59.dist-info}/LICENSE +0 -0
{edsl-0.1.58.dist-info → edsl-0.1.59.dist-info}/WHEEL +0 -0
{edsl-0.1.58.dist-info → edsl-0.1.59.dist-info}/entry_points.txt +0 -0

edsl/jobs/html_table_job_logger.py CHANGED Viewed

@@ -217,6 +217,17 @@ class HTMLTableJobLogger(JobLogger):
         )
         total_cost = total_input_cost + total_output_cost
+        # Calculate credit totals
+        total_input_credits = sum(
+            cost.input_cost_credits_with_cache or 0
+            for cost in self.jobs_info.model_costs
+        )
+        total_output_credits = sum(
+            cost.output_cost_credits_with_cache or 0
+            for cost in self.jobs_info.model_costs
+        )
+        total_credits = total_input_credits + total_output_credits
         # Generate cost rows HTML with class names for right alignment
         cost_rows = "".join(
             f"""
@@ -228,6 +239,7 @@ class HTMLTableJobLogger(JobLogger):
                 <td class='token-count'>{cost.output_tokens:,}</td>
                 <td class='cost-value'>${cost.output_cost_usd:.4f}</td>
                 <td class='cost-value'>${(cost.input_cost_usd or 0) + (cost.output_cost_usd or 0):.4f}</td>
+                <td class='cost-value'>{(cost.input_cost_credits_with_cache or 0) + (cost.output_cost_credits_with_cache or 0):,.2f}</td>
             </tr>
             """
             for cost in self.jobs_info.model_costs
@@ -242,6 +254,7 @@ class HTMLTableJobLogger(JobLogger):
                 <td class='token-count'>{total_output_tokens:,}</td>
                 <td class='cost-value'>${total_output_cost:.4f}</td>
                 <td class='cost-value'>${total_cost:.4f}</td>
+                <td class='cost-value'>{total_credits:,.2f}</td>
             </tr>
         """
@@ -249,7 +262,7 @@ class HTMLTableJobLogger(JobLogger):
         <div class="model-costs-section">
             <div class="model-costs-header" onclick="{self._collapse(f'model-costs-content-{self.log_id}', f'model-costs-arrow-{self.log_id}')}">
                 <span id="model-costs-arrow-{self.log_id}" class="expand-toggle">&#8963;</span>
-                <span>Model Costs (${total_cost:.4f} total)</span>
+                <span>Model Costs (${total_cost:.4f} / {total_credits:,.2f} credits total)</span>
                 <span style="flex-grow: 1;"></span>
             </div>
             <div id="model-costs-content-{self.log_id}" class="model-costs-content">
@@ -263,6 +276,7 @@ class HTMLTableJobLogger(JobLogger):
                             <th class="cost-header">Output Tokens</th>
                             <th class="cost-header">Output Cost</th>
                             <th class="cost-header">Total Cost</th>
+                            <th class="cost-header">Total Credits</th>
                         </tr>
                     </thead>
                     <tbody>
@@ -270,6 +284,9 @@ class HTMLTableJobLogger(JobLogger):
                         {total_row}
                     </tbody>
                 </table>
+                <p style="font-style: italic; margin-top: 8px; font-size: 0.85em; color: #4b5563;">
+                    You can obtain the total credit cost by multiplying the total USD cost by 100. A lower credit cost indicates that you saved money by retrieving responses from the universal remote cache.
+                </p>
             </div>
         </div>
         """

edsl/jobs/jobs_pricing_estimation.py CHANGED Viewed

@@ -88,7 +88,6 @@ class PromptCostEstimator:
 class JobsPrompts:
     relevant_keys = [
         "user_prompt",
         "system_prompt",
@@ -171,13 +170,18 @@ class JobsPrompts:
         cost = prompt_cost["cost_usd"]
         # Generate cache keys for each iteration
+        files_list = prompts.get("files_list", None)
+        if files_list:
+            files_hash = "+".join([str(hash(file)) for file in files_list])
+            user_prompt_with_hashes = user_prompt + f" {files_hash}"
         cache_keys = []
         for iteration in range(iterations):
             cache_key = CacheEntry.gen_key(
                 model=model,
                 parameters=invigilator.model.parameters,
                 system_prompt=system_prompt,
-                user_prompt=user_prompt,
+                user_prompt=user_prompt_with_hashes if files_list else user_prompt,
                 iteration=iteration,
             )
             cache_keys.append(cache_key)

edsl/jobs/jobs_remote_inference_logger.py CHANGED Viewed

@@ -40,6 +40,8 @@ class ModelCost:
     input_cost_usd: float = None
     output_tokens: int = None
     output_cost_usd: float = None
+    input_cost_credits_with_cache: int = None
+    output_cost_credits_with_cache: int = None
 @dataclass

edsl/jobs/remote_inference.py CHANGED Viewed

@@ -279,9 +279,7 @@ class JobsRemoteInferenceHandler:
         )
         time.sleep(self.poll_interval)
-    def _get_expenses_from_results(
-        self, results: "Results", include_cached_responses_in_cost: bool = False
-    ) -> dict:
+    def _get_expenses_from_results(self, results: "Results") -> dict:
         """
         Calculates expenses from Results object.
@@ -309,10 +307,6 @@ class JobsRemoteInferenceHandler:
                 question_name = key.removesuffix("_cost")
                 cache_used = result["cache_used_dict"][question_name]
-                # Skip if we're excluding cached responses and this was cached
-                if not include_cached_responses_in_cost and cache_used:
-                    continue
                 # Get expense keys for input and output tokens
                 input_key = (
                     result["model"]._inference_service_,
@@ -332,6 +326,7 @@ class JobsRemoteInferenceHandler:
                     expenses[input_key] = {
                         "tokens": 0,
                         "cost_usd": 0,
+                        "cost_usd_with_cache": 0,
                     }
                 input_price_per_million_tokens = input_key[3]
@@ -341,11 +336,15 @@ class JobsRemoteInferenceHandler:
                 expenses[input_key]["tokens"] += input_tokens
                 expenses[input_key]["cost_usd"] += input_cost
+                if not cache_used:
+                    expenses[input_key]["cost_usd_with_cache"] += input_cost
                 # Update output token expenses
                 if output_key not in expenses:
                     expenses[output_key] = {
                         "tokens": 0,
                         "cost_usd": 0,
+                        "cost_usd_with_cache": 0,
                     }
                 output_price_per_million_tokens = output_key[3]
@@ -357,6 +356,9 @@ class JobsRemoteInferenceHandler:
                 expenses[output_key]["tokens"] += output_tokens
                 expenses[output_key]["cost_usd"] += output_cost
+                if not cache_used:
+                    expenses[output_key]["cost_usd_with_cache"] += output_cost
         expenses_by_model = {}
         for expense_key, expense_usage in expenses.items():
             service, model, token_type, _ = expense_key
@@ -368,8 +370,10 @@ class JobsRemoteInferenceHandler:
                     "model": model,
                     "input_tokens": 0,
                     "input_cost_usd": 0,
+                    "input_cost_usd_with_cache": 0,
                     "output_tokens": 0,
                     "output_cost_usd": 0,
+                    "output_cost_usd_with_cache": 0,
                 }
             if token_type == "input":
@@ -377,14 +381,22 @@ class JobsRemoteInferenceHandler:
                 expenses_by_model[model_key]["input_cost_usd"] += expense_usage[
                     "cost_usd"
                 ]
+                expenses_by_model[model_key][
+                    "input_cost_usd_with_cache"
+                ] += expense_usage["cost_usd_with_cache"]
             elif token_type == "output":
                 expenses_by_model[model_key]["output_tokens"] += expense_usage["tokens"]
                 expenses_by_model[model_key]["output_cost_usd"] += expense_usage[
                     "cost_usd"
                 ]
+                expenses_by_model[model_key][
+                    "output_cost_usd_with_cache"
+                ] += expense_usage["cost_usd_with_cache"]
         converter = CostConverter()
         for model_key, model_cost_dict in expenses_by_model.items():
+            # Handle full cost (without cache)
             input_cost = model_cost_dict["input_cost_usd"]
             output_cost = model_cost_dict["output_cost_usd"]
             model_cost_dict["input_cost_credits"] = converter.usd_to_credits(input_cost)
@@ -399,6 +411,15 @@ class JobsRemoteInferenceHandler:
                 model_cost_dict["output_cost_credits"]
             )
+            # Handle cost with cache
+            input_cost_with_cache = model_cost_dict["input_cost_usd_with_cache"]
+            output_cost_with_cache = model_cost_dict["output_cost_usd_with_cache"]
+            model_cost_dict["input_cost_credits_with_cache"] = converter.usd_to_credits(
+                input_cost_with_cache
+            )
+            model_cost_dict["output_cost_credits_with_cache"] = (
+                converter.usd_to_credits(output_cost_with_cache)
+            )
         return list(expenses_by_model.values())
     def _fetch_results_and_log(
@@ -423,6 +444,12 @@ class JobsRemoteInferenceHandler:
                 input_cost_usd=model_cost_dict.get("input_cost_usd"),
                 output_tokens=model_cost_dict.get("output_tokens"),
                 output_cost_usd=model_cost_dict.get("output_cost_usd"),
+                input_cost_credits_with_cache=model_cost_dict.get(
+                    "input_cost_credits_with_cache"
+                ),
+                output_cost_credits_with_cache=model_cost_dict.get(
+                    "output_cost_credits_with_cache"
+                ),
             )
             for model_cost_dict in model_cost_dicts
         ]

edsl/language_models/language_model.py CHANGED Viewed

@@ -769,8 +769,45 @@ class LanguageModel(
                 params["question_name"] = invigilator.question.question_name
             # Get timeout from configuration
             from ..config import CONFIG
-            TIMEOUT = float(CONFIG.get("EDSL_API_TIMEOUT"))
+            import logging
+            logger = logging.getLogger(__name__)
+            base_timeout = float(CONFIG.get("EDSL_API_TIMEOUT"))
+            # Adjust timeout if files are present
+            import time
+            start = time.time()
+            if files_list:
+                # Calculate total size of attached files in MB
+                file_sizes = []
+                for file in files_list:
+                    # Try different attributes that might contain the file content
+                    if hasattr(file, "base64_string") and file.base64_string:
+                        file_sizes.append(len(file.base64_string) / (1024 * 1024))
+                    elif hasattr(file, "content") and file.content:
+                        file_sizes.append(len(file.content) / (1024 * 1024))
+                    elif hasattr(file, "data") and file.data:
+                        file_sizes.append(len(file.data) / (1024 * 1024))
+                    else:
+                        # Default minimum size if we can't determine actual size
+                        file_sizes.append(1)  # Assume at least 1MB
+                total_size_mb = sum(file_sizes)
+                # Increase timeout proportionally to file size
+                # For each MB of file size, add 10 seconds to the timeout (adjust as needed)
+                size_adjustment = total_size_mb * 10
+                # Cap the maximum timeout adjustment at 5 minutes (300 seconds)
+                size_adjustment = min(size_adjustment, 300)
+                TIMEOUT = base_timeout + size_adjustment
+                logger.info(
+                    f"Adjusted timeout for API call with {len(files_list)} files (total size: {total_size_mb:.2f}MB). Base timeout: {base_timeout}s, New timeout: {TIMEOUT}s"
+                )
+            else:
+                TIMEOUT = base_timeout
             # Execute the model call with timeout
             response = await asyncio.wait_for(f(**params), timeout=TIMEOUT)

edsl/prompts/prompt.py CHANGED Viewed

@@ -290,6 +290,7 @@ class Prompt(PersistenceMixin, RepresentationMixin):
             return result
         except Exception as e:
             print(f"Error rendering prompt: {e}")
+            raise e
             return self
     @staticmethod

edsl/questions/question_list.py CHANGED Viewed

@@ -299,23 +299,24 @@ class ListResponseValidator(ResponseValidatorABC):
         # This method can now be removed since validation is handled in the Pydantic model
         pass
-    def fix(self, response, verbose=False):
+    def fix(self, response, verbose=False) -> dict[str, Any]:
         """
         Fix common issues in list responses by splitting strings into lists.
         Examples:
             >>> from edsl import QuestionList
-            >>> q = QuestionList.example(min_list_items=2, max_list_items=4)
-            >>> validator = q.response_validator
+            >>> q_constrained = QuestionList.example(min_list_items=2, max_list_items=4)
+            >>> validator_constrained = q_constrained.response_validator
+            >>> q_permissive = QuestionList.example(permissive=True)
+            >>> validator_permissive = q_permissive.response_validator
             >>> # Fix a string that should be a list
             >>> bad_response = {"answer": "apple,banana,cherry"}
-            >>> try:
-            ...     validator.validate(bad_response)
-            ... except Exception:
-            ...     fixed = validator.fix(bad_response)
-            ...     validated = validator.validate(fixed)
-            ...     validated  # Show full response
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': ['apple', 'banana', 'cherry']}
+            >>> validator_constrained.validate(fixed)  # Show full response after validation
             {'answer': ['apple', 'banana', 'cherry'], 'comment': None, 'generated_tokens': None}
             >>> # Fix using generated_tokens when answer is invalid
@@ -323,12 +324,10 @@ class ListResponseValidator(ResponseValidatorABC):
             ...     "answer": None,
             ...     "generated_tokens": "pizza, pasta, salad"
             ... }
-            >>> try:
-            ...     validator.validate(bad_response)
-            ... except Exception:
-            ...     fixed = validator.fix(bad_response)
-            ...     validated = validator.validate(fixed)
-            ...     validated
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': ['pizza', ' pasta', ' salad']}
+            >>> validator_constrained.validate(fixed)
             {'answer': ['pizza', ' pasta', ' salad'], 'comment': None, 'generated_tokens': None}
             >>> # Preserve comments during fixing
@@ -336,17 +335,74 @@ class ListResponseValidator(ResponseValidatorABC):
             ...     "answer": "red,blue,green",
             ...     "comment": "These are colors"
             ... }
-            >>> fixed = validator.fix(bad_response)
-            >>> fixed == {
+            >>> fixed_output = validator_constrained.fix(bad_response)
+            >>> fixed_output
+            {'answer': ['red', 'blue', 'green'], 'comment': 'These are colors'}
+            >>> validated_output = validator_constrained.validate(fixed_output)
+            >>> validated_output == {
             ...     "answer": ["red", "blue", "green"],
-            ...     "comment": "These are colors"
+            ...     "comment": "These are colors",
+            ...     "generated_tokens": None
             ... }
             True
+            >>> # Fix an empty string answer
+            >>> bad_response = {"answer": ""}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': []}
+            >>> validator_permissive.validate(fixed)
+            {'answer': [], 'comment': None, 'generated_tokens': None}
+            >>> # Fix a single item string answer (no commas)
+            >>> bad_response = {"answer": "single_item"}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': ['single_item']}
+            >>> validator_permissive.validate(fixed)
+            {'answer': ['single_item'], 'comment': None, 'generated_tokens': None}
+            >>> # Fix when answer is None and no generated_tokens
+            >>> bad_response = {"answer": None}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': []}
+            >>> validator_permissive.validate(fixed)
+            {'answer': [], 'comment': None, 'generated_tokens': None}
+            >>> # Fix when answer key is missing but generated_tokens is present
+            >>> bad_response = {"generated_tokens": "token1,token2"}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': ['token1', 'token2']}
+            >>> validator_constrained.validate(fixed) # 2 items, OK for constrained validator
+            {'answer': ['token1', 'token2'], 'comment': None, 'generated_tokens': None}
+            >>> # Fix when answer key is missing and generated_tokens is an empty string
+            >>> bad_response = {"generated_tokens": ""}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': []}
+            >>> validator_permissive.validate(fixed)
+            {'answer': [], 'comment': None, 'generated_tokens': None}
+            >>> # Fix when answer key is missing and generated_tokens is a single item
+            >>> bad_response = {"generated_tokens": "single_token"}
+            >>> fixed = validator_constrained.fix(bad_response)
+            >>> fixed
+            {'answer': ['single_token']}
+            >>> validator_permissive.validate(fixed)
+            {'answer': ['single_token'], 'comment': None, 'generated_tokens': None}
         """
         if verbose:
             print(f"Fixing list response: {response}")
         answer = str(response.get("answer") or response.get("generated_tokens", ""))
-        result = {"answer": answer.split(",")}
+        if "," in answer:
+            result = {"answer": answer.split(",")}
+        elif answer == "":
+            result = {"answer": []}
+        else:
+            result = {"answer": [answer]}
         if "comment" in response:
             result["comment"] = response["comment"]
         return result
@@ -395,7 +451,7 @@ class QuestionList(QuestionBase):
         self.include_comment = include_comment
         self.answering_instructions = answering_instructions
-        self.question_presentations = question_presentation
+        self.question_presentation = question_presentation
     def create_response_model(self):
         return create_model(self.min_list_items, self.max_list_items, self.permissive)

edsl/results/results.py CHANGED Viewed

@@ -771,6 +771,10 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
     def to_dataset(self) -> "Dataset":
         return self.select()
+    def optimzie_scenarios(self):
+        for result in self.data:
+            result.scenario.offload(inplace=True)
     def to_dict(
         self,
         sort: bool = False,
@@ -778,9 +782,12 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
         include_cache: bool = True,
         include_task_history: bool = False,
         include_cache_info: bool = True,
+        offload_scenarios: bool = True,
     ) -> dict[str, Any]:
         from ..caching import Cache
+        if offload_scenarios:
+            self.optimzie_scenarios()
         if sort:
             data = sorted([result for result in self.data], key=lambda x: hash(x))
         else:
@@ -809,7 +816,7 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
             )
         if self.task_history.has_unfixed_exceptions or include_task_history:
-            d.update({"task_history": self.task_history.to_dict()})
+            d.update({"task_history": self.task_history.to_dict(offload_content=True)})
         if add_edsl_version:
             from .. import __version__

edsl/scenarios/file_store.py CHANGED Viewed

@@ -446,9 +446,7 @@ class FileStore(Scenario):
         if suffix is None:
             suffix = self.suffix
         if self.binary:
-            file_like_object = self.base64_to_file(
-                self["base64_string"], is_binary=True
-            )
+            file_like_object = self.base64_to_file(self.base64_string, is_binary=True)
         else:
             file_like_object = self.base64_to_text_file(self.base64_string)
@@ -765,15 +763,13 @@ class FileStore(Scenario):
         if name.startswith("__") and name.endswith("__"):
             raise AttributeError(name)
-        # Only try to access suffix if it's in our __dict__
-        if hasattr(self, "_data") and "suffix" in self._data:
-            if self._data["suffix"] == "csv":
-                # Get the pandas DataFrame
-                df = self.to_pandas()
-                # Check if the requested attribute exists in the DataFrame
-                if hasattr(df, name):
-                    return getattr(df, name)
-        # If not a CSV or attribute doesn't exist in DataFrame, raise AttributeError
+        # Check for _data directly in __dict__ to avoid recursion
+        _data = self.__dict__.get("_data", None)
+        if _data and _data.get("suffix") == "csv":
+            df = self.to_pandas()
+            if hasattr(df, name):
+                return getattr(df, name)
         raise AttributeError(
             f"'{self.__class__.__name__}' object has no attribute '{name}'"
         )

edsl/scenarios/scenario.py CHANGED Viewed

@@ -264,9 +264,49 @@ class Scenario(Base, UserDict):
         """Display a scenario as a table."""
         return self.to_dataset().table(tablefmt=tablefmt)
-    def to_dict(self, add_edsl_version: bool = True) -> dict:
+    def offload(self, inplace=False) -> "Scenario":
+        """
+        Offloads base64-encoded content from the scenario by replacing 'base64_string'
+        fields with 'offloaded'. This reduces memory usage.
+        Args:
+            inplace (bool): If True, modify the current scenario. If False, return a new one.
+        Returns:
+            Scenario: The modified scenario (either self or a new instance).
+        """
+        from edsl.scenarios import FileStore
+        from edsl.prompts import Prompt
+        target = self if inplace else Scenario()
+        for key, value in self.items():
+            if isinstance(value, FileStore):
+                file_store_dict = value.to_dict()
+                if "base64_string" in file_store_dict:
+                    file_store_dict["base64_string"] = "offloaded"
+                modified_value = FileStore.from_dict(file_store_dict)
+            elif isinstance(value, dict) and "base64_string" in value:
+                value_copy = value.copy()
+                value_copy["base64_string"] = "offloaded"
+                modified_value = value_copy
+            else:
+                modified_value = value
+            target[key] = modified_value
+        return target
+    def to_dict(
+        self, add_edsl_version: bool = True, offload_base64: bool = False
+    ) -> dict:
         """Convert a scenario to a dictionary.
+        Args:
+            add_edsl_version: If True, adds the EDSL version to the returned dictionary.
+            offload_base64: If True, replaces any base64_string fields with 'offloaded'
+                           to reduce memory usage.
         Example:
         >>> s = Scenario({"food": "wood chips"})
@@ -283,7 +323,15 @@ class Scenario(Base, UserDict):
         d = self.data.copy()
         for key, value in d.items():
             if isinstance(value, FileStore) or isinstance(value, Prompt):
-                d[key] = value.to_dict(add_edsl_version=add_edsl_version)
+                value_dict = value.to_dict(add_edsl_version=add_edsl_version)
+                if (
+                    offload_base64
+                    and isinstance(value_dict, dict)
+                    and "base64_string" in value_dict
+                ):
+                    value_dict["base64_string"] = "offloaded"
+                d[key] = value_dict
         if add_edsl_version:
             from edsl import __version__

edsl/scenarios/scenario_list.py CHANGED Viewed

@@ -145,22 +145,18 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
         """Initialize a new ScenarioList with optional data and codebook."""
         self._data_class = data_class
         self.data = self._data_class([])
-        warned = False
         for item in data or []:
-            try:
-                _ = json.dumps(item.to_dict())
-            except:
-                import warnings
-                if not warned:
-                    warnings.warn(
-                        f"One or more items in the data list are not JSON serializable. "
-                        "This would prevent running a job that uses this ScenarioList."
-                        "One solution is to use 'str(item)' to convert the item to a string before adding."
-                    )
-                    warned = True
             self.data.append(item)
         self.codebook = codebook or {}
+    def is_serializable(self):
+        for item in self.data:
+            try:
+                _ = json.dumps(item.to_dict())
+            except Exception as e:
+                return False
+        return True
     # Required MutableSequence abstract methods
     def __getitem__(self, index):
         """Get item at index."""
@@ -360,6 +356,32 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
                 new_scenarios.append(Scenario(new_scenario))
         return new_scenarios
+    @classmethod
+    def from_search_terms(cls, search_terms: List[str]) -> ScenarioList:
+        """Create a ScenarioList from a list of search terms, using Wikipedia.
+        Args:
+            search_terms: A list of search terms.
+        """
+        from ..utilities.wikipedia import fetch_wikipedia_content
+        results = fetch_wikipedia_content(search_terms)
+        return cls([Scenario(result) for result in results])
+    def augment_with_wikipedia(self, search_key:str, content_only: bool = True, key_name: str = "wikipedia_content") -> ScenarioList:
+        """Augment the ScenarioList with Wikipedia content."""
+        search_terms = self.select(search_key).to_list()
+        wikipedia_results = ScenarioList.from_search_terms(search_terms)
+        new_sl = ScenarioList(data = [], codebook = self.codebook)
+        for scenario, wikipedia_result in zip(self, wikipedia_results):
+            if content_only:
+                scenario[key_name] = wikipedia_result["content"]
+                new_sl.append(scenario)
+            else:
+                scenario[key_name] = wikipedia_result
+                new_sl.append(scenario)
+        return new_sl
     def pivot(
         self,

edsl/surveys/survey.py CHANGED Viewed

@@ -384,6 +384,10 @@ class Survey(Base):
         if question_name not in self.question_name_to_index:
             raise SurveyError(f"Question name {question_name} not found in survey.")
         return self.questions[self.question_name_to_index[question_name]]
+    def get(self, question_name: str) -> QuestionBase:
+        """Return the question object given the question name."""
+        return self._get_question_by_name(question_name)
     def question_names_to_questions(self) -> dict:
         """Return a dictionary mapping question names to question attributes."""

edsl 0.1.58__py3-none-any.whl → 0.1.59__py3-none-any.whl

edsl 0.1.58py3-none-any.whl → 0.1.59py3-none-any.whl