PyPI - edsl - Versions diffs - 0.1.41__py3-none-any.whl → 0.1.43__py3-none-any.whl - Mend

edsl 0.1.41py3-none-any.whl → 0.1.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

edsl/__version__.py +1 -1
edsl/agents/Invigilator.py +4 -3
edsl/agents/InvigilatorBase.py +2 -1
edsl/agents/PromptConstructor.py +92 -21
edsl/agents/QuestionInstructionPromptBuilder.py +68 -9
edsl/agents/QuestionTemplateReplacementsBuilder.py +7 -2
edsl/agents/prompt_helpers.py +2 -2
edsl/coop/coop.py +97 -19
edsl/enums.py +3 -1
edsl/exceptions/coop.py +4 -0
edsl/exceptions/jobs.py +1 -9
edsl/exceptions/language_models.py +8 -4
edsl/exceptions/questions.py +8 -11
edsl/inference_services/AvailableModelFetcher.py +4 -1
edsl/inference_services/DeepSeekService.py +18 -0
edsl/inference_services/registry.py +2 -0
edsl/jobs/Jobs.py +60 -34
edsl/jobs/JobsPrompts.py +64 -3
edsl/jobs/JobsRemoteInferenceHandler.py +42 -25
edsl/jobs/JobsRemoteInferenceLogger.py +1 -1
edsl/jobs/buckets/BucketCollection.py +30 -0
edsl/jobs/data_structures.py +1 -0
edsl/jobs/interviews/Interview.py +1 -1
edsl/jobs/loggers/HTMLTableJobLogger.py +6 -1
edsl/jobs/results_exceptions_handler.py +2 -7
edsl/jobs/tasks/TaskHistory.py +49 -17
edsl/language_models/LanguageModel.py +7 -4
edsl/language_models/ModelList.py +1 -1
edsl/language_models/key_management/KeyLookupBuilder.py +47 -20
edsl/language_models/key_management/models.py +10 -4
edsl/language_models/model.py +49 -0
edsl/prompts/Prompt.py +124 -61
edsl/questions/descriptors.py +37 -23
edsl/questions/question_base_gen_mixin.py +1 -0
edsl/results/DatasetExportMixin.py +35 -6
edsl/results/Result.py +9 -3
edsl/results/Results.py +180 -2
edsl/results/ResultsGGMixin.py +117 -60
edsl/scenarios/PdfExtractor.py +3 -6
edsl/scenarios/Scenario.py +35 -1
edsl/scenarios/ScenarioList.py +22 -3
edsl/scenarios/ScenarioListPdfMixin.py +9 -3
edsl/surveys/Survey.py +1 -1
edsl/templates/error_reporting/base.html +2 -4
edsl/templates/error_reporting/exceptions_table.html +35 -0
edsl/templates/error_reporting/interview_details.html +67 -53
edsl/templates/error_reporting/interviews.html +4 -17
edsl/templates/error_reporting/overview.html +31 -5
edsl/templates/error_reporting/performance_plot.html +1 -1
{edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/METADATA +2 -3
{edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/RECORD +53 -51
{edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/LICENSE +0 -0
{edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/WHEEL +0 -0

edsl/scenarios/Scenario.py CHANGED Viewed

@@ -358,7 +358,41 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
     def from_pdf(cls, pdf_path: str):
         from edsl.scenarios.PdfExtractor import PdfExtractor
-        return PdfExtractor(pdf_path, cls).get_object()
+        extractor = PdfExtractor(pdf_path)
+        return Scenario(extractor.get_pdf_dict())
+    @classmethod
+    def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
+        """
+        Convert each page of a PDF into an image and create key/value for it.
+        :param pdf_path: Path to the PDF file.
+        :param image_format: Format of the output images (default is 'jpeg').
+        :return: ScenarioList instance containing the Scenario instances.
+        The scenario has a key "filepath" and one or more keys "page_{i}" for each page.
+        """
+        import tempfile
+        from pdf2image import convert_from_path
+        from edsl.scenarios import Scenario
+        with tempfile.TemporaryDirectory() as output_folder:
+            # Convert PDF to images
+            images = convert_from_path(pdf_path)
+            scenario_dict = {"filepath":pdf_path}
+            # Save each page as an image and create Scenario instances
+            for i, image in enumerate(images):
+                image_path = os.path.join(output_folder, f"page_{i}.{image_format}")
+                image.save(image_path, image_format.upper())
+                from edsl import FileStore
+                scenario_dict[f"page_{i}"] = FileStore(image_path)
+            scenario = Scenario(scenario_dict)
+            return cls(scenario)
     @classmethod
     def from_docx(cls, docx_path: str) -> "Scenario":

edsl/scenarios/ScenarioList.py CHANGED Viewed

@@ -1135,7 +1135,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         return cls(observations)
     @classmethod
-    def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
+    def from_google_sheet(cls, url: str, sheet_name: str = None, column_names: Optional[List[str]]= None) -> ScenarioList:
         """Create a ScenarioList from a Google Sheet.
         This method downloads the Google Sheet as an Excel file, saves it to a temporary file,
@@ -1145,6 +1145,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
             url (str): The URL to the Google Sheet.
             sheet_name (str, optional): The name of the sheet to load. If None, the method will behave
                                         the same as from_excel regarding multiple sheets.
+            column_names (List[str], optional): If provided, use these names for the columns instead
+                                              of the default column names from the sheet.
         Returns:
             ScenarioList: An instance of the ScenarioList class.
@@ -1172,8 +1174,25 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
             temp_file.write(response.content)
             temp_filename = temp_file.name
-        # Call the from_excel class method with the temporary file
-        return cls.from_excel(temp_filename, sheet_name=sheet_name)
+        # First create the ScenarioList with default column names
+        scenario_list = cls.from_excel(temp_filename, sheet_name=sheet_name)
+        # If column_names is provided, create a new ScenarioList with the specified names
+        if column_names is not None:
+            if len(column_names) != len(scenario_list[0].keys()):
+                raise ValueError(
+                    f"Number of provided column names ({len(column_names)}) "
+                    f"does not match number of columns in sheet ({len(scenario_list[0].keys())})"
+                )
+            # Create a codebook mapping original keys to new names
+            original_keys = list(scenario_list[0].keys())
+            codebook = dict(zip(original_keys, column_names))
+            # Return new ScenarioList with renamed columns
+            return scenario_list.rename(codebook)
+        else:
+            return scenario_list
     @classmethod
     def from_delimited_file(

edsl/scenarios/ScenarioListPdfMixin.py CHANGED Viewed

@@ -148,13 +148,15 @@ class ScenarioListPdfMixin:
             return False
     @classmethod
-    def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
+    def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
         """
         Convert each page of a PDF into an image and create Scenario instances.
         :param pdf_path: Path to the PDF file.
         :param image_format: Format of the output images (default is 'jpeg').
         :return: ScenarioList instance containing the Scenario instances.
+        The scenario list has keys "filepath", "page", "content".
         """
         import tempfile
         from pdf2image import convert_from_path
@@ -171,10 +173,14 @@ class ScenarioListPdfMixin:
                 image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
                 image.save(image_path, image_format.upper())
-                scenario = Scenario._from_filepath_image(image_path)
+                from edsl import FileStore
+                scenario = Scenario({
+                    "filepath":image_path,
+                    "page":i,
+                    "content":FileStore(image_path)
+                    })
                 scenarios.append(scenario)
-            # print(f"Saved {len(images)} pages as images in {output_folder}")
             return cls(scenarios)
     @staticmethod

edsl/surveys/Survey.py CHANGED Viewed

@@ -1281,4 +1281,4 @@ if __name__ == "__main__":
     import doctest
     # doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.SKIP)
-    doctest.testmod(optionflags=doctest.ELLIPSIS)
+    doctest.testmod(optionflags=doctest.ELLIPSIS)

edsl/templates/error_reporting/base.html CHANGED Viewed

@@ -3,7 +3,7 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Exception Details</title>
+    <title>Exceptions Report</title>
     <style>
     {{ css }}
     </style>
@@ -15,9 +15,7 @@
 </head>
 <body>
     {% include 'overview.html' %}
-    {% include 'exceptions_by_type.html' %}
-    {% include 'exceptions_by_model.html' %}
-    {% include 'exceptions_by_question_name.html' %}
+    {% include 'exceptions_table.html' %}
     {% include 'interviews.html' %}
     {% include 'performance_plot.html' %}
 </body>

edsl/templates/error_reporting/exceptions_table.html ADDED Viewed

@@ -0,0 +1,35 @@
+<style>
+    th, td {
+        padding: 0 10px; /* This applies the padding uniformly to all td elements */
+    }
+</style>
+<table border="1">
+    <thead>
+        <tr>
+            <th>Exception Type</th>
+            <th>Service</th>
+            <th>Model</th>
+            <th>Question Name</th>
+            <th>Total</th>
+        </tr>
+    </thead>
+    <tbody>
+        {% for (exception_type, service, model, question_name), count in exceptions_table.items() %}
+            <tr>
+                <td>{{ exception_type }}</td>
+                <td>{{ service }}</td>
+                <td>{{ model }}</td>
+                <td>{{ question_name }}</td>
+                <td>{{ count }}</td>
+            </tr>
+        {% endfor %}
+    </tbody>
+</table>
+<p>
+    <i>Note:</i> You may encounter repeated exceptions where retries were attempted.
+    You can modify the maximum number of attempts for failed API calls in `edsl/config.py`.
+</p>
+<p>
+    Click to expand the details below for information about each exception, including code for reproducing it.
+</p>

edsl/templates/error_reporting/interview_details.html CHANGED Viewed

@@ -1,43 +1,67 @@
-<div class="question">question_name: {{ question }}</div>
+<style>
+    td {
+        padding: 0 10px; /* This applies the padding uniformly to all td elements */
+    }
+    .toggle-btn {
+        background-color: #4CAF50;
+        color: white;
+        border: none;
+        padding: 10px 20px;
+        text-align: center;
+        text-decoration: none;
+        display: inline-block;
+        font-size: 16px;
+        margin: 4px 2px;
+        cursor: pointer;
+        border-radius: 8px;
+        white-space: nowrap;
+    }
+    .toggle-btn span.collapse {
+        display: none;
+    }
+    .exception-content {
+        max-width: 100%; /* Adjust this value based on your layout */
+        overflow-x: auto; /* Enables horizontal scrolling */
+    }
+</style>
-<h2>Exception details</h2>
+<div class="question">question_name: {{ question }}</div>
 {% for exception_message in exceptions %}
 <div class="exception-detail">
-        <div class="exception-header">
+    <div class="exception-header">
         <span class="exception-exception">Exception: {{ exception_message.name }}</span>
-        <button class="toggle-btn">▼</button>
-        </div>
-        <div class="exception-content">
+        <button id="toggleBtn" class="toggle-btn" onclick="toggleButton(this)" aria-expanded="false">
+            <span class="expand"> ▼ </span>
+        </button>
+    </div>
+    <div class="exception-content">
         <table border="1">
-            <tr>
-                <th>Key</th>
-                <th>Value</th>
-            </tr>
             <tr>
                 <td>Interview ID (index in results)</td>
                 <td>{{ index }}</td>
             </tr>
             <tr>
-                <td>Question name (question_name)</td>
+                <td>Question name</td>
                 <td>{{ question }}</td>
             </tr>
             <tr>
-                <td>Question type (question_type)</td>
+                <td>Question type</td>
                 <td>{{ exception_message.question_type }}</td>
             </tr>
             <tr>
                 <td>Human-readable question</td>
                 <td>{{ interview.survey._get_question_by_name(question).html(
                     scenario = interview.scenario,
                     agent = interview.agent,
-                    answers = exception_message.answers)
+                    answers = exception_message.answers
+                    )
                 }}</td>
             </tr>
+            <tr>
+                <td>User Prompt</td>
+                <td><pre>{{ exception_message.rendered_prompts['user_prompt'] }}</pre></td>
+            </tr>
             <tr>
                 <td>Scenario</td>
                 <td>{{ interview.scenario.__repr__() }}</td>
@@ -47,24 +71,20 @@
                 <td>{{ interview.agent.__repr__() }}</td>
             </tr>
             <tr>
-                <td>Model name</td>
-                <td>{{ interview.model.model }}</td>
+                <td>System Prompt</td>
+                <td><pre>{{ exception_message.rendered_prompts['system_prompt'] }}</pre></td>
             </tr>
             <tr>
                 <td>Inference service</td>
                 <td>{{ interview.model._inference_service_ }}</td>
             </tr>
             <tr>
-                <td>Model parameters</td>
-                <td>{{ interview.model.__repr__() }}</td>
-            </tr>
-            <tr>
-                <td>User Prompt</td>
-                <td><pre>{{ exception_message.rendered_prompts['user_prompt'] }}</pre></td>
+                <td>Model name</td>
+                <td>{{ interview.model.model }}</td>
             </tr>
             <tr>
-                <td>System Prompt</td>
-                <td><pre>{{ exception_message.rendered_prompts['system_prompt'] }}</pre></td>
+                <td>Model parameters</td>
+                <td>{{ interview.model.__repr__() }}</td>
             </tr>
             <tr>
                 <td>Raw model response</td>
@@ -77,7 +97,7 @@
                 </td>
             </tr>
             <tr>
-            <td>Code to (likely) reproduce the error</td>
+            <td>Code likely to reproduce the error</td>
             <td>
                 <textarea id="codeToCopy" rows="10" cols="90">{{ exception_message.code_to_reproduce }}</textarea>
                 <button onclick="copyCode()">Copy</button>
@@ -85,32 +105,26 @@
             </tr>
         </table>
-            {% if exception_message.exception.__class__.__name__ == 'QuestionAnswerValidationError' %}
-            <h3>Answer validation details</h3>
-            <table border="1">
-                <tr>
-                    <th>Field</th>
-                    <th>Value</th>
-                </tr>
-                {% for field, (explanation, open_tag, close_tag, value) in exception_message.exception.to_html_dict().items() %}
-                <tr>
-                    <td>{{ field }}: ({{ explanation }})</td>
-                    <td><{{open_tag}}> {{ value | escape }} <{{close_tag}}></td>
-                </tr>
-                {% endfor %}
-            </table>
-        {% endif %}
-            <div class="exception-time">Time: {{ exception_message.time }}</div>
-            <div class="exception-traceback">Traceback:
-                <text>
-                <pre>{{ exception_message.traceback }}</pre>
-                </text>
-            </div>
+        {% if exception_message.exception.__class__.__name__ == 'QuestionAnswerValidationError' %}
+        <h3>Answer validation details</h3>
+        <table border="1">
+            {% for field, (open_tag, close_tag, value) in exception_message.exception.to_html_dict().items() %}
+            <tr>
+                <td>{{ field }}</td>
+                <td><{{ open_tag }}> {{ value | escape }} <{{ close_tag }}></td>
+            </tr>
+            {% endfor %}
+        </table>
+    {% endif %}
+        <br><br>
+        <div class="exception-time">Time: {{ exception_message.time }}</div>
+        <div class="exception-traceback">Traceback:
+            <text>
+            <pre>{{ exception_message.traceback }}</pre>
+            </text>
         </div>
     </div>
+</div>
 {% endfor %}

edsl/templates/error_reporting/interviews.html CHANGED Viewed

@@ -1,19 +1,6 @@
-{% if interviews|length > max_interviews %}
-    <h1>Only showing the first {{ max_interviews }} interviews with errors</h1>
-{% else %}
-    <h1>Showing all interviews</h1>
-{% endif %}
+<h2>Exceptions Details</h2>
 {% for index, interview in interviews.items() %}
-    {% if index < max_interviews %}
-        {% if interview.exceptions != {} %}
-        <div class="interview">Interview: {{ index }} </div>
-        Model: {{ interview.model.model }}
-        <h1>Failing questions</h1>
-        {% endif %}
-        {% for question, exceptions in interview.exceptions.items() %}
-            {% include 'interview_details.html' %}
-        {% endfor %}
-    {% endif %}
+    {% for question, exceptions in interview.exceptions.items() %}
+        {% include 'interview_details.html' %}
+    {% endfor %}
 {% endfor %}

edsl/templates/error_reporting/overview.html CHANGED Viewed

@@ -1,5 +1,31 @@
-<h1>Overview</h1>
-<p>There were {{ interviews|length }} total interview(s). An 'interview' is the result of one survey, taken by one agent, with one model, with one scenario.</p>
-The number of interviews with any exceptions was {{ num_exceptions }}.</p>
-<p>For advice on dealing with exceptions on Expected Parrot,
-see <a href="https://docs.expectedparrot.com/en/latest/exceptions.html">here</a>.</p>
+<style>
+    td {
+        padding: 0 10px; /* This applies the padding uniformly to all td elements */
+    }
+</style>
+<h1>Exceptions Report</h1>
+<p>
+    This report summarizes exceptions encountered in the job that was run.
+</p>
+<p>
+    For advice on dealing with exceptions, please see the EDSL <a href="https://docs.expectedparrot.com/en/latest/exceptions.html">documentation</a> page. <br>
+    You can also post a question at the Expected Parrot <a href="https://discord.com/invite/mxAYkjfy9m">Discord channel</a>, open an issue on <a href="https://github.com/expectedparrot/edsl">GitHub</a>, or send an email to <a href="mailto:info@expectedparrot.com">info@expectedparrot.com</a>.
+</p>
+<h2>Overview</h2>
+<table border="1">
+    <tbody>
+        <tr>
+            <td>Total interviews</td>
+            <td>{{ interviews|length }}</td>
+        </tr>
+        <tr>
+            <td>Interviews with exceptions</td>
+            <td>{{ num_exceptions }}</td>
+        </tr>
+    </tbody>
+</table>
+<p>
+    An "interview" is the result of one survey, taken by one agent, with one model and one scenario (if any).
+</p>

edsl/templates/error_reporting/performance_plot.html CHANGED Viewed

@@ -1,2 +1,2 @@
-<h1>Performance Plot</h1>
+<h2>Performance Plot</h2>
 {{ performance_plot_html }}

{edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: edsl
-Version: 0.1.41
+Version: 0.1.43
 Summary: Create and analyze LLM-based surveys
 Home-page: https://www.expectedparrot.com/
 License: MIT
@@ -17,7 +17,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Dist: aiohttp (>=3.9.1,<4.0.0)
-Requires-Dist: anthropic (>=0.23.1,<0.24.0)
+Requires-Dist: anthropic (>=0.45.0,<0.46.0)
 Requires-Dist: azure-ai-inference (>=1.0.0b3,<2.0.0)
 Requires-Dist: black[jupyter] (>=24.4.2,<25.0.0)
 Requires-Dist: boto3 (>=1.34.161,<2.0.0)
@@ -37,7 +37,6 @@ Requires-Dist: pandas (>=2.1.4,<3.0.0)
 Requires-Dist: platformdirs (>=4.3.6,<5.0.0)
 Requires-Dist: pydot (>=2.0.0,<3.0.0)
 Requires-Dist: pygments (>=2.17.2,<3.0.0)
-Requires-Dist: pymupdf (>=1.24.4,<2.0.0)
 Requires-Dist: pypdf2 (>=3.0.1,<4.0.0)
 Requires-Dist: pyreadstat (>=1.2.7,<2.0.0)
 Requires-Dist: python-docx (>=1.1.0,<2.0.0)

edsl 0.1.41__py3-none-any.whl → 0.1.43__py3-none-any.whl

edsl 0.1.41py3-none-any.whl → 0.1.43py3-none-any.whl