PyPI - edsl - Versions diffs - 0.1.44__py3-none-any.whl → 0.1.46__py3-none-any.whl - Mend

edsl 0.1.44py3-none-any.whl → 0.1.46py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

edsl/Base.py +7 -3
edsl/__version__.py +1 -1
edsl/agents/InvigilatorBase.py +3 -1
edsl/agents/PromptConstructor.py +66 -91
edsl/agents/QuestionInstructionPromptBuilder.py +160 -79
edsl/agents/QuestionTemplateReplacementsBuilder.py +80 -17
edsl/agents/question_option_processor.py +15 -6
edsl/coop/CoopFunctionsMixin.py +3 -4
edsl/coop/coop.py +171 -96
edsl/data/RemoteCacheSync.py +10 -9
edsl/enums.py +3 -3
edsl/inference_services/AnthropicService.py +11 -9
edsl/inference_services/AvailableModelFetcher.py +2 -0
edsl/inference_services/AwsBedrock.py +1 -2
edsl/inference_services/AzureAI.py +12 -9
edsl/inference_services/GoogleService.py +9 -4
edsl/inference_services/InferenceServicesCollection.py +2 -2
edsl/inference_services/MistralAIService.py +1 -2
edsl/inference_services/OpenAIService.py +9 -4
edsl/inference_services/PerplexityService.py +2 -1
edsl/inference_services/{GrokService.py → XAIService.py} +2 -2
edsl/inference_services/registry.py +2 -2
edsl/jobs/AnswerQuestionFunctionConstructor.py +12 -1
edsl/jobs/Jobs.py +24 -17
edsl/jobs/JobsChecks.py +10 -13
edsl/jobs/JobsPrompts.py +49 -26
edsl/jobs/JobsRemoteInferenceHandler.py +4 -5
edsl/jobs/async_interview_runner.py +3 -1
edsl/jobs/check_survey_scenario_compatibility.py +5 -5
edsl/jobs/data_structures.py +3 -0
edsl/jobs/interviews/Interview.py +6 -3
edsl/jobs/interviews/InterviewExceptionEntry.py +12 -0
edsl/jobs/tasks/TaskHistory.py +1 -1
edsl/language_models/LanguageModel.py +6 -3
edsl/language_models/PriceManager.py +45 -5
edsl/language_models/model.py +47 -26
edsl/questions/QuestionBase.py +21 -0
edsl/questions/QuestionBasePromptsMixin.py +103 -0
edsl/questions/QuestionFreeText.py +22 -5
edsl/questions/descriptors.py +4 -0
edsl/questions/question_base_gen_mixin.py +96 -29
edsl/results/Dataset.py +65 -0
edsl/results/DatasetExportMixin.py +320 -32
edsl/results/Result.py +27 -0
edsl/results/Results.py +22 -2
edsl/results/ResultsGGMixin.py +7 -3
edsl/scenarios/DocumentChunker.py +2 -0
edsl/scenarios/FileStore.py +10 -0
edsl/scenarios/PdfExtractor.py +21 -1
edsl/scenarios/Scenario.py +25 -9
edsl/scenarios/ScenarioList.py +226 -24
edsl/scenarios/handlers/__init__.py +1 -0
edsl/scenarios/handlers/docx.py +5 -1
edsl/scenarios/handlers/jpeg.py +39 -0
edsl/surveys/Survey.py +5 -4
edsl/surveys/SurveyFlowVisualization.py +91 -43
edsl/templates/error_reporting/exceptions_table.html +7 -8
edsl/templates/error_reporting/interview_details.html +1 -1
edsl/templates/error_reporting/interviews.html +0 -1
edsl/templates/error_reporting/overview.html +2 -7
edsl/templates/error_reporting/performance_plot.html +1 -1
edsl/templates/error_reporting/report.css +1 -1
edsl/utilities/PrettyList.py +14 -0
edsl-0.1.46.dist-info/METADATA +246 -0
{edsl-0.1.44.dist-info → edsl-0.1.46.dist-info}/RECORD +67 -66
edsl-0.1.44.dist-info/METADATA +0 -110
{edsl-0.1.44.dist-info → edsl-0.1.46.dist-info}/LICENSE +0 -0
{edsl-0.1.44.dist-info → edsl-0.1.46.dist-info}/WHEEL +0 -0

edsl/results/Result.py CHANGED Viewed

@@ -439,6 +439,33 @@ class Result(Base, UserDict):
         from edsl.results.Results import Results
         return Results.example()[0]
+    def score_with_answer_key(self, answer_key: dict) -> Union[int, float]:
+        """Score the result using an answer key.
+        :param answer_key: A dictionary that maps question_names to answers
+        >>> Result.example()['answer']
+        {'how_feeling': 'OK', 'how_feeling_yesterday': 'Great'}
+        >>> answer_key = {'how_feeling': 'OK', 'how_feeling_yesterday': 'Great'}
+        >>> Result.example().score_with_answer_key(answer_key)
+        {'correct': 2, 'incorrect': 0, 'missing': 0}
+        >>> answer_key = {'how_feeling': 'OK', 'how_feeling_yesterday': ['Great', 'Good']}
+        >>> Result.example().score_with_answer_key(answer_key)
+        {'correct': 2, 'incorrect': 0, 'missing': 0}
+        """
+        final_scores = {'correct': 0, 'incorrect': 0, 'missing': 0}
+        for question_name, answer in self.answer.items():
+            if question_name in answer_key:
+                if answer == answer_key[question_name] or answer in answer_key[question_name]:
+                    final_scores['correct'] += 1
+                else:
+                    final_scores['incorrect'] += 1
+            else:
+                final_scores['missing'] += 1
+        return final_scores
     def score(self, scoring_function: Callable) -> Union[int, float]:
         """Score the result using a passed-in scoring function.

edsl/results/Results.py CHANGED Viewed

@@ -34,7 +34,7 @@ if TYPE_CHECKING:
     from simpleeval import EvalWithCompoundTypes
 from edsl.results.ResultsExportMixin import ResultsExportMixin
-from edsl.results.ResultsGGMixin import ResultsGGMixin
+from edsl.results.ResultsGGMixin import GGPlotMethod
 from edsl.results.results_fetch_mixin import ResultsFetchMixin
 from edsl.utilities.remove_edsl_version import remove_edsl_version
@@ -100,7 +100,7 @@ class NotReadyObject:
 class Mixins(
     ResultsExportMixin,
     ResultsFetchMixin,
-    ResultsGGMixin,
+#    ResultsGGMixin,
 ):
     def long(self):
         return self.table().long()
@@ -151,6 +151,19 @@ class Results(UserList, Mixins, Base):
         "cache_keys",
     ]
+    def ggplot2(
+        self,
+        ggplot_code: str,
+        shape="wide",
+        sql: str = None,
+        remove_prefix: bool = True,
+        debug: bool = False,
+        height=4,
+        width=6,
+        factor_orders: Optional[dict] = None,
+    ):
+        return GGPlotMethod(self).ggplot2(ggplot_code, shape, sql, remove_prefix, debug, height, width, factor_orders)
     @classmethod
     def from_job_info(cls, job_info: dict) -> Results:
         """
@@ -1277,6 +1290,13 @@ class Results(UserList, Mixins, Base):
         """
         return [r.score(f) for r in self.data]
+    def score_with_answer_key(self, answer_key: dict) -> list:
+        """Score the results using an answer key.
+        :param answer_key: A dictionary that maps answer values to scores.
+        """
+        return [r.score_with_answer_key(answer_key) for r in self.data]
     def fetch_remote(self, job_info: "RemoteJobInfo") -> None:
         """

edsl/results/ResultsGGMixin.py CHANGED Viewed

@@ -75,7 +75,11 @@ class GGPlot:
         return self._svg_data
-class ResultsGGMixin:
+class GGPlotMethod:
+    def __init__(self, results: 'Results'):
+        self.results = results
     """Mixin class for ggplot2 plotting."""
     def ggplot2(
@@ -106,9 +110,9 @@ class ResultsGGMixin:
             sql = "select * from self"
         if shape == "long":
-            df = self.sql(sql, shape="long")
+            df = self.results.sql(sql, shape="long")
         elif shape == "wide":
-            df = self.sql(sql, remove_prefix=remove_prefix)
+            df = self.results.sql(sql, remove_prefix=remove_prefix)
         # Convert DataFrame to CSV format
         csv_data = df.to_csv().text

edsl/scenarios/DocumentChunker.py CHANGED Viewed

@@ -85,6 +85,8 @@ class DocumentChunker:
             new_scenario = copy.deepcopy(self.scenario)
             new_scenario[field] = chunk
             new_scenario[field + "_chunk"] = i
+            new_scenario[field + "_char_count"] = len(chunk)
+            new_scenario[field + "_word_count"] = len(chunk.split())
             if include_original:
                 if hash_original:
                     new_scenario[field + "_original"] = hashlib.md5(

edsl/scenarios/FileStore.py CHANGED Viewed

@@ -29,6 +29,12 @@ class FileStore(Scenario):
         if path is None and "filename" in kwargs:
             path = kwargs["filename"]
+        # Check if path is a URL and handle download
+        if path and (path.startswith('http://') or path.startswith('https://')):
+            temp_filestore = self.from_url(path, mime_type=mime_type)
+            path = temp_filestore._path
+            mime_type = temp_filestore.mime_type
         self._path = path  # Store the original path privately
         self._temp_path = None  # Track any generated temporary file
@@ -138,6 +144,10 @@ class FileStore(Scenario):
                 base64_encoded_data = base64.b64encode(binary_data)
                 self.binary = True
         # Convert the base64 bytes to a string
+        except FileNotFoundError:
+            print(f"File not found: {file_path}")
+            print("Current working directory:", os.getcwd())
+            raise
         base64_string = base64_encoded_data.decode("utf-8")
         return base64_string

edsl/scenarios/PdfExtractor.py CHANGED Viewed

@@ -4,10 +4,30 @@ import os
 class PdfExtractor:
     def __init__(self, pdf_path: str):
         self.pdf_path = pdf_path
+        self._has_pymupdf = self._check_pymupdf()
         #self.constructor = parent_object.__class__
+    def _check_pymupdf(self):
+        """Check if PyMuPDF is installed."""
+        try:
+            import fitz
+            return True
+        except ImportError:
+            return False
     def get_pdf_dict(self) -> dict:
-        # Ensure the file exists
+        # First check if the file exists
+        if not os.path.exists(self.pdf_path):
+            raise FileNotFoundError(f"The file {self.pdf_path} does not exist.")
+        # Then check if PyMuPDF is available
+        if not self._has_pymupdf:
+            raise ImportError(
+                "The 'fitz' module (PyMuPDF) is required for PDF extraction. "
+                "Please install it with: pip install pymupdf"
+            )
+        # If we get here, we can safely import and use fitz
         import fitz
         if not os.path.exists(self.pdf_path):

edsl/scenarios/Scenario.py CHANGED Viewed

@@ -64,6 +64,15 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         self.data = data if data is not None else {}
         self.name = name
+    def __mul__(self, scenario_list_or_scenario: Union["ScenarioList", "Scenario"]) -> "ScenarioList":
+        from edsl.scenarios.ScenarioList import ScenarioList
+        if isinstance(scenario_list_or_scenario, ScenarioList):
+            return scenario_list_or_scenario * self
+        elif isinstance(scenario_list_or_scenario, Scenario):
+            return ScenarioList([self]) * scenario_list_or_scenario
+        else:
+            raise TypeError(f"Cannot multiply Scenario with {type(scenario_list_or_scenario)}")
     def replicate(self, n: int) -> "ScenarioList":
         """Replicate a scenario n times to return a ScenarioList.
@@ -356,11 +365,18 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
     @classmethod
     def from_pdf(cls, pdf_path: str):
-        from edsl.scenarios.PdfExtractor import PdfExtractor
-        extractor = PdfExtractor(pdf_path)
-        return Scenario(extractor.get_pdf_dict())
+        """Create a Scenario from a PDF file."""
+        try:
+            from edsl.scenarios.PdfExtractor import PdfExtractor
+            extractor = PdfExtractor(pdf_path)
+            return Scenario(extractor.get_pdf_dict())
+        except ImportError as e:
+            raise ImportError(
+                f"Could not extract text from PDF: {str(e)}. "
+                "PDF extraction requires the PyMuPDF library. "
+                "Install it with: pip install pymupdf"
+            )
     @classmethod
     def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
         """
@@ -442,18 +458,18 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         >>> s = Scenario({"text": "This is a test.\\nThis is a test.\\n\\nThis is a test."})
         >>> s.chunk("text", num_lines = 1)
-        ScenarioList([Scenario({'text': 'This is a test.', 'text_chunk': 0}), Scenario({'text': 'This is a test.', 'text_chunk': 1}), Scenario({'text': '', 'text_chunk': 2}), Scenario({'text': 'This is a test.', 'text_chunk': 3})])
+        ScenarioList([Scenario({'text': 'This is a test.', 'text_chunk': 0, 'text_char_count': 15, 'text_word_count': 4}), Scenario({'text': 'This is a test.', 'text_chunk': 1, 'text_char_count': 15, 'text_word_count': 4}), Scenario({'text': '', 'text_chunk': 2, 'text_char_count': 0, 'text_word_count': 0}), Scenario({'text': 'This is a test.', 'text_chunk': 3, 'text_char_count': 15, 'text_word_count': 4})])
         >>> s.chunk("text", num_words = 2)
-        ScenarioList([Scenario({'text': 'This is', 'text_chunk': 0}), Scenario({'text': 'a test.', 'text_chunk': 1}), Scenario({'text': 'This is', 'text_chunk': 2}), Scenario({'text': 'a test.', 'text_chunk': 3}), Scenario({'text': 'This is', 'text_chunk': 4}), Scenario({'text': 'a test.', 'text_chunk': 5})])
+        ScenarioList([Scenario({'text': 'This is', 'text_chunk': 0, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 1, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'This is', 'text_chunk': 2, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 3, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'This is', 'text_chunk': 4, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 5, 'text_char_count': 7, 'text_word_count': 2})])
         >>> s = Scenario({"text": "Hello World"})
         >>> s.chunk("text", num_words = 1, include_original = True)
-        ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_original': 'Hello World'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_original': 'Hello World'})])
+        ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'Hello World'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'Hello World'})])
         >>> s = Scenario({"text": "Hello World"})
         >>> s.chunk("text", num_words = 1, include_original = True, hash_original = True)
-        ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'})])
+        ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'})])
         >>> s.chunk("text")
         Traceback (most recent call last):

edsl/scenarios/ScenarioList.py CHANGED Viewed

@@ -360,6 +360,11 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
         """
         from itertools import product
+        from edsl import Scenario
+        if isinstance(other, Scenario):
+            other = ScenarioList([other])
+        elif not isinstance(other, ScenarioList):
+            raise TypeError(f"Cannot multiply ScenarioList with {type(other)}")
         new_sl = []
         for s1, s2 in list(product(self, other)):
@@ -431,35 +436,98 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
                 new_scenarios.append(new_scenario)
         return ScenarioList(new_scenarios)
-    def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
-        """Concatenate specified fields into a single field.
+    def _concatenate(self, fields: List[str], output_type: str = "string", separator: str = ";") -> ScenarioList:
+        """Private method to handle concatenation logic for different output types.
         :param fields: The fields to concatenate.
-        :param separator: The separator to use.
+        :param output_type: The type of output ("string", "list", or "set").
+        :param separator: The separator to use for string concatenation.
         Returns:
             ScenarioList: A new ScenarioList with concatenated fields.
-        Example:
-            >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
-            >>> s.concatenate(['a', 'b', 'c'])
-            ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
         """
+        # Check if fields is a string and raise an exception
+        if isinstance(fields, str):
+            raise ScenarioError(
+                f"The 'fields' parameter must be a list of field names, not a string. Got '{fields}'."
+            )
         new_scenarios = []
         for scenario in self:
             new_scenario = scenario.copy()
-            concat_values = []
+            values = []
             for field in fields:
                 if field in new_scenario:
-                    concat_values.append(str(new_scenario[field]))
+                    values.append(new_scenario[field])
                     del new_scenario[field]
             new_field_name = f"concat_{'_'.join(fields)}"
-            new_scenario[new_field_name] = separator.join(concat_values)
+            if output_type == "string":
+                # Convert all values to strings and join with separator
+                new_scenario[new_field_name] = separator.join(str(v) for v in values)
+            elif output_type == "list":
+                # Keep as a list
+                new_scenario[new_field_name] = values
+            elif output_type == "set":
+                # Convert to a set (removes duplicates)
+                new_scenario[new_field_name] = set(values)
+            else:
+                raise ValueError(f"Invalid output_type: {output_type}. Must be 'string', 'list', or 'set'.")
             new_scenarios.append(new_scenario)
         return ScenarioList(new_scenarios)
+    def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
+        """Concatenate specified fields into a single string field.
+        :param fields: The fields to concatenate.
+        :param separator: The separator to use.
+        Returns:
+            ScenarioList: A new ScenarioList with concatenated fields.
+        Example:
+            >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
+            >>> s.concatenate(['a', 'b', 'c'])
+            ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
+        """
+        return self._concatenate(fields, output_type="string", separator=separator)
+    def concatenate_to_list(self, fields: List[str]) -> ScenarioList:
+        """Concatenate specified fields into a single list field.
+        :param fields: The fields to concatenate.
+        Returns:
+            ScenarioList: A new ScenarioList with fields concatenated into a list.
+        Example:
+            >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
+            >>> s.concatenate_to_list(['a', 'b', 'c'])
+            ScenarioList([Scenario({'concat_a_b_c': [1, 2, 3]}), Scenario({'concat_a_b_c': [4, 5, 6]})])
+        """
+        return self._concatenate(fields, output_type="list")
+    def concatenate_to_set(self, fields: List[str]) -> ScenarioList:
+        """Concatenate specified fields into a single set field.
+        :param fields: The fields to concatenate.
+        Returns:
+            ScenarioList: A new ScenarioList with fields concatenated into a set.
+        Example:
+            >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
+            >>> s.concatenate_to_set(['a', 'b', 'c'])
+            ScenarioList([Scenario({'concat_a_b_c': {1, 2, 3}}), Scenario({'concat_a_b_c': {4, 5, 6}})])
+            >>> s = ScenarioList([Scenario({'a': 1, 'b': 1, 'c': 3})])
+            >>> s.concatenate_to_set(['a', 'b', 'c'])
+            ScenarioList([Scenario({'concat_a_b_c': {1, 3}})])
+        """
+        return self._concatenate(fields, output_type="set")
     def unpack_dict(
         self, field: str, prefix: Optional[str] = None, drop_field: bool = False
     ) -> ScenarioList:
@@ -872,7 +940,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         for scenario in sl:
             scenario[name] = value
         return sl
     def rename(self, replacement_dict: dict) -> ScenarioList:
         """Rename the fields in the scenarios.
@@ -885,13 +953,35 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
         """
         new_list = ScenarioList([])
         for obj in self:
             new_obj = obj.rename(replacement_dict)
             new_list.append(new_obj)
         return new_list
+    def replace_names(self, new_names: list) -> ScenarioList:
+        """Replace the field names in the scenarios with a new list of names.
+        :param new_names: A list of new field names to use.
+        Example:
+        >>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
+        >>> s.replace_names(['first_name', 'years'])
+        ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
+        """
+        if not self:
+            return ScenarioList([])
+        if len(new_names) != len(self[0].keys()):
+            raise ScenarioError(
+                f"Length of new names ({len(new_names)}) does not match number of fields ({len(self[0].keys())})"
+            )
+        old_names = list(self[0].keys())
+        replacement_dict = dict(zip(old_names, new_names))
+        return self.rename(replacement_dict)
     ## NEEDS TO BE FIXED
     # def new_column_names(self, new_names: List[str]) -> ScenarioList:
     #     """Rename the fields in the scenarios.
@@ -910,16 +1000,42 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
     #     return new_list
     @classmethod
-    def from_sqlite(cls, filepath: str, table: str):
-        """Create a ScenarioList from a SQLite database."""
+    def from_sqlite(cls, filepath: str, table: Optional[str] = None, sql_query: Optional[str] = None):
+        """Create a ScenarioList from a SQLite database.
+        Args:
+            filepath (str): Path to the SQLite database file
+            table (Optional[str]): Name of table to query. If None, sql_query must be provided.
+            sql_query (Optional[str]): SQL query to execute. Used if table is None.
+        Returns:
+            ScenarioList: List of scenarios created from database rows
+        Raises:
+            ValueError: If both table and sql_query are None
+            sqlite3.Error: If there is an error executing the database query
+        """
         import sqlite3
-        with sqlite3.connect(filepath) as conn:
-            cursor = conn.cursor()
-            cursor.execute(f"SELECT * FROM {table}")
-            columns = [description[0] for description in cursor.description]
-            data = cursor.fetchall()
-        return cls([Scenario(dict(zip(columns, row))) for row in data])
+        if table is None and sql_query is None:
+            raise ValueError("Either table or sql_query must be provided")
+        try:
+            with sqlite3.connect(filepath) as conn:
+                cursor = conn.cursor()
+                if table is not None:
+                    cursor.execute(f"SELECT * FROM {table}")
+                else:
+                    cursor.execute(sql_query)
+                columns = [description[0] for description in cursor.description]
+                data = cursor.fetchall()
+            return cls([Scenario(dict(zip(columns, row))) for row in data])
+        except sqlite3.Error as e:
+            raise sqlite3.Error(f"Database error occurred: {str(e)}")
     @classmethod
     def from_latex(cls, tex_file_path: str):
@@ -935,6 +1051,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
             entry = {
                 "line_no": line_no + 1,  # Using 1-based index for line numbers
                 "text": text,
+                "num_words": len(text.split()),
+                "num_chars": len(text),
                 "line_before": non_blank_lines[index - 1][1] if index > 0 else None,
                 "line_after": (
                     non_blank_lines[index + 1][1]
@@ -995,8 +1113,49 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
         """
         return cls([Scenario(row) for row in df.to_dict(orient="records")])
     @classmethod
+    def from_dta(cls, filepath: str, include_metadata: bool = True) -> ScenarioList:
+        """Create a ScenarioList from a Stata file.
+        Args:
+            filepath (str): Path to the Stata (.dta) file
+            include_metadata (bool): If True, extract and preserve variable labels and value labels
+                                    as additional metadata in the ScenarioList
+        Returns:
+            ScenarioList: A ScenarioList containing the data from the Stata file
+        """
+        import pandas as pd
+        # Read the Stata file with pandas
+        df = pd.read_stata(filepath)
+        # Create the basic ScenarioList
+        scenario_list = cls.from_pandas(df)
+        # Extract and preserve metadata if requested
+        if include_metadata:
+            # Get variable labels (if any)
+            variable_labels = {}
+            if hasattr(df, 'variable_labels') and df.variable_labels:
+                variable_labels = df.variable_labels
+            # Get value labels (if any)
+            value_labels = {}
+            if hasattr(df, 'value_labels') and df.value_labels:
+                value_labels = df.value_labels
+            # Store the metadata in the ScenarioList's codebook
+            if variable_labels or value_labels:
+                scenario_list.codebook = {
+                    'variable_labels': variable_labels,
+                    'value_labels': value_labels
+                }
+        return scenario_list
     def from_wikipedia(cls, url: str, table_index: int = 0):
         """
         Extracts a table from a Wikipedia page.
@@ -1456,7 +1615,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         >>> s = ScenarioList([Scenario({'text': 'The quick brown fox jumps over the lazy dog.'})])
         >>> s.chunk('text', num_words=3)
-        ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0}), Scenario({'text': 'fox jumps over', 'text_chunk': 1}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2})])
+        ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0, 'text_char_count': 15, 'text_word_count': 3}), Scenario({'text': 'fox jumps over', 'text_chunk': 1, 'text_char_count': 14, 'text_word_count': 3}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2, 'text_char_count': 13, 'text_word_count': 3})])
         """
         new_scenarios = []
         for scenario in self:
@@ -1470,6 +1629,49 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
             new_scenarios.extend(replacement_scenarios)
         return ScenarioList(new_scenarios)
+    def collapse(self, field: str) -> ScenarioList:
+        """Collapse a ScenarioList by grouping on all fields except the specified one,
+        collecting the values of the specified field into a list.
+        Args:
+            field: The field to collapse (whose values will be collected into lists)
+        Returns:
+            ScenarioList: A new ScenarioList with the specified field collapsed into lists
+        Example:
+        >>> s = ScenarioList([
+        ...     Scenario({'category': 'fruit', 'color': 'red', 'item': 'apple'}),
+        ...     Scenario({'category': 'fruit', 'color': 'yellow', 'item': 'banana'}),
+        ...     Scenario({'category': 'fruit', 'color': 'red', 'item': 'cherry'}),
+        ...     Scenario({'category': 'vegetable', 'color': 'green', 'item': 'spinach'})
+        ... ])
+        >>> s.collapse('item')
+        ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry']}), Scenario({'category': 'fruit', 'color': 'yellow', 'item': ['banana']}), Scenario({'category': 'vegetable', 'color': 'green', 'item': ['spinach']})])
+        """
+        if not self:
+            return ScenarioList([])
+        # Determine all fields except the one to collapse
+        id_vars = [key for key in self[0].keys() if key != field]
+        # Group the scenarios
+        grouped = defaultdict(list)
+        for scenario in self:
+            # Create a tuple of the values of all fields except the one to collapse
+            key = tuple(scenario[id_var] for id_var in id_vars)
+            # Add the value of the field to collapse to the list for this key
+            grouped[key].append(scenario[field])
+        # Create a new ScenarioList with the collapsed field
+        result = []
+        for key, values in grouped.items():
+            new_scenario = dict(zip(id_vars, key))
+            new_scenario[field] = values
+            result.append(Scenario(new_scenario))
+        return ScenarioList(result)
 if __name__ == "__main__":
     import doctest

edsl/scenarios/handlers/__init__.py CHANGED Viewed

@@ -11,3 +11,4 @@ from .pptx import PptxMethods
 from .latex import LaTeXMethods
 from .py import PyMethods
 from .sqlite import SQLiteMethods
+from .jpeg import JpegMethods

edsl/scenarios/handlers/docx.py CHANGED Viewed

@@ -37,7 +37,11 @@ class DocxMethods(FileMethods):
             print("DOCX file was not found.")
     def view_notebook(self):
-        import mammoth
+        try:
+            import mammoth
+        except ImportError:
+            print("mammoth is not installed. Please install it using 'pip install mammoth'.")
+            return
         from IPython.display import HTML, display
         with open(self.path, "rb") as docx_file:

edsl/scenarios/handlers/jpeg.py ADDED Viewed

@@ -0,0 +1,39 @@
+import tempfile
+from edsl.scenarios.file_methods import FileMethods
+class JpegMethods(FileMethods):
+    suffix = "jpeg"
+    def view_system(self):
+        import os
+        import subprocess
+        if os.path.exists(self.path):
+            try:
+                if (os_name := os.name) == "posix":
+                    subprocess.run(["open", self.path], check=True)  # macOS
+                elif os_name == "nt":
+                    os.startfile(self.path)  # Windows
+                else:
+                    subprocess.run(["xdg-open", self.path], check=True)  # Linux
+            except Exception as e:
+                print(f"Error opening JPEG: {e}")
+        else:
+            print("JPEG file was not found.")
+    def view_notebook(self):
+        from IPython.display import Image, display
+        display(Image(filename=self.path))
+    def example(self):
+        import matplotlib.pyplot as plt
+        import numpy as np
+        x = np.linspace(0, 10, 100)
+        y = np.sin(x)
+        plt.plot(x, y)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".jpeg") as f:
+            plt.savefig(f.name)
+        return f.name

edsl/surveys/Survey.py CHANGED Viewed

@@ -1248,14 +1248,15 @@ class Survey(SurveyExportMixin, Base):
     ###################
     def humanize(
         self,
-        project_name: str,
+        project_name: str = "Project",
         survey_description: Optional[str] = None,
         survey_alias: Optional[str] = None,
         survey_visibility: Optional["VisibilityType"] = "unlisted",
-    ):
+    ) -> dict:
         """
-        Create a survey object on Coop.
-        Then, create a project on Coop so you can share the survey with humans.
+        Send the survey to Coop.
+        Then, create a project on Coop so you can share the survey with human respondents.
         """
         from edsl.coop import Coop

edsl 0.1.44__py3-none-any.whl → 0.1.46__py3-none-any.whl

edsl 0.1.44py3-none-any.whl → 0.1.46py3-none-any.whl