PyPI - edsl - Versions diffs - 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl - Mend

edsl 0.1.54py3-none-any.whl → 0.1.56py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

edsl/__init__.py +8 -1
edsl/__init__original.py +134 -0
edsl/__version__.py +1 -1
edsl/agents/agent.py +29 -0
edsl/agents/agent_list.py +36 -1
edsl/base/base_class.py +281 -151
edsl/base/data_transfer_models.py +15 -4
edsl/buckets/__init__.py +8 -3
edsl/buckets/bucket_collection.py +9 -3
edsl/buckets/model_buckets.py +4 -2
edsl/buckets/token_bucket.py +2 -2
edsl/buckets/token_bucket_client.py +5 -3
edsl/caching/cache.py +131 -62
edsl/caching/cache_entry.py +70 -58
edsl/caching/sql_dict.py +17 -0
edsl/cli.py +99 -0
edsl/config/config_class.py +16 -0
edsl/conversation/__init__.py +31 -0
edsl/coop/coop.py +276 -242
edsl/coop/coop_jobs_objects.py +59 -0
edsl/coop/coop_objects.py +29 -0
edsl/coop/coop_regular_objects.py +26 -0
edsl/coop/utils.py +24 -19
edsl/dataset/dataset.py +338 -101
edsl/dataset/dataset_operations_mixin.py +216 -180
edsl/db_list/sqlite_list.py +349 -0
edsl/inference_services/__init__.py +40 -5
edsl/inference_services/exceptions.py +11 -0
edsl/inference_services/services/anthropic_service.py +5 -2
edsl/inference_services/services/aws_bedrock.py +6 -2
edsl/inference_services/services/azure_ai.py +6 -2
edsl/inference_services/services/google_service.py +7 -3
edsl/inference_services/services/mistral_ai_service.py +6 -2
edsl/inference_services/services/open_ai_service.py +6 -2
edsl/inference_services/services/perplexity_service.py +6 -2
edsl/inference_services/services/test_service.py +94 -5
edsl/interviews/answering_function.py +167 -59
edsl/interviews/interview.py +124 -72
edsl/interviews/interview_task_manager.py +10 -0
edsl/interviews/request_token_estimator.py +8 -0
edsl/invigilators/invigilators.py +35 -13
edsl/jobs/async_interview_runner.py +146 -104
edsl/jobs/data_structures.py +6 -4
edsl/jobs/decorators.py +61 -0
edsl/jobs/fetch_invigilator.py +61 -18
edsl/jobs/html_table_job_logger.py +14 -2
edsl/jobs/jobs.py +180 -104
edsl/jobs/jobs_component_constructor.py +2 -2
edsl/jobs/jobs_interview_constructor.py +2 -0
edsl/jobs/jobs_pricing_estimation.py +154 -113
edsl/jobs/jobs_remote_inference_logger.py +4 -0
edsl/jobs/jobs_runner_status.py +30 -25
edsl/jobs/progress_bar_manager.py +79 -0
edsl/jobs/remote_inference.py +35 -1
edsl/key_management/key_lookup_builder.py +6 -1
edsl/language_models/language_model.py +110 -12
edsl/language_models/model.py +10 -3
edsl/language_models/price_manager.py +176 -71
edsl/language_models/registry.py +5 -0
edsl/notebooks/notebook.py +77 -10
edsl/questions/VALIDATION_README.md +134 -0
edsl/questions/__init__.py +24 -1
edsl/questions/exceptions.py +21 -0
edsl/questions/question_dict.py +201 -16
edsl/questions/question_multiple_choice_with_other.py +624 -0
edsl/questions/question_registry.py +2 -1
edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
edsl/questions/validation_analysis.py +185 -0
edsl/questions/validation_cli.py +131 -0
edsl/questions/validation_html_report.py +404 -0
edsl/questions/validation_logger.py +136 -0
edsl/results/result.py +115 -46
edsl/results/results.py +702 -171
edsl/scenarios/construct_download_link.py +16 -3
edsl/scenarios/directory_scanner.py +226 -226
edsl/scenarios/file_methods.py +5 -0
edsl/scenarios/file_store.py +150 -9
edsl/scenarios/handlers/__init__.py +5 -1
edsl/scenarios/handlers/mp4_file_store.py +104 -0
edsl/scenarios/handlers/webm_file_store.py +104 -0
edsl/scenarios/scenario.py +120 -101
edsl/scenarios/scenario_list.py +800 -727
edsl/scenarios/scenario_list_gc_test.py +146 -0
edsl/scenarios/scenario_list_memory_test.py +214 -0
edsl/scenarios/scenario_list_source_refactor.md +35 -0
edsl/scenarios/scenario_selector.py +5 -4
edsl/scenarios/scenario_source.py +1990 -0
edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
edsl/surveys/survey.py +22 -0
edsl/tasks/__init__.py +4 -2
edsl/tasks/task_history.py +198 -36
edsl/tests/scenarios/test_ScenarioSource.py +51 -0
edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
edsl/utilities/__init__.py +2 -1
edsl/utilities/decorators.py +121 -0
edsl/utilities/memory_debugger.py +1010 -0
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/METADATA +51 -76
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/RECORD +103 -79
edsl/jobs/jobs_runner_asyncio.py +0 -281
edsl/language_models/unused/fake_openai_service.py +0 -60
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/LICENSE +0 -0
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/WHEEL +0 -0
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/entry_points.txt +0 -0

edsl/scenarios/file_store.py CHANGED Viewed

@@ -294,10 +294,23 @@ class FileStore(Scenario):
     def upload_google(self, refresh: bool = False) -> None:
         import google.generativeai as genai
+        import google
-        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
-        google_info = genai.upload_file(self.path, mime_type=self.mime_type)
-        self.external_locations["google"] = google_info.to_dict()
+        try:
+            genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+            google_info = genai.upload_file(self.path, mime_type=self.mime_type)
+            self.external_locations["google"] = google_info.to_dict()
+            while True:
+                file_metadata = genai.get_file(name=google_info.name)
+                file_state = file_metadata.state
+                if file_state == 2:  # "ACTIVE":
+                    break
+                elif file_state == 10:  # "FAILED":
+                    break
+        except Exception as e:
+            print(f"Error uploading to Google: {e}")
+            raise
     @classmethod
     @remove_edsl_version
@@ -593,6 +606,128 @@ class FileStore(Scenario):
         # Check if the mime type starts with 'image/'
         return self.mime_type.startswith("image/")
+    def is_video(self) -> bool:
+        """
+        Check if the file is a video by examining its MIME type.
+        Returns:
+            bool: True if the file is a video, False otherwise.
+        Examples:
+            >>> fs = FileStore.example("mp4")
+            >>> fs.is_video()
+            True
+            >>> fs = FileStore.example("webm")
+            >>> fs.is_video()
+            True
+            >>> fs = FileStore.example("txt")
+            >>> fs.is_video()
+            False
+        """
+        # Check if the mime type starts with 'video/'
+        return self.mime_type.startswith("video/")
+    def get_video_metadata(self) -> dict:
+        """
+        Get metadata about a video file such as duration, dimensions, codec, etc.
+        Uses FFmpeg to extract the information if available.
+        Returns:
+            dict: A dictionary containing video metadata, or a dictionary with
+                 error information if metadata extraction fails.
+        Raises:
+            ValueError: If the file is not a video.
+        Example:
+            >>> fs = FileStore.example("mp4")
+            >>> metadata = fs.get_video_metadata()
+            >>> isinstance(metadata, dict)
+            True
+        """
+        if not self.is_video():
+            raise ValueError("This file is not a video")
+        # We'll try to use ffprobe (part of ffmpeg) to get metadata
+        import subprocess
+        import json
+        try:
+            # Run ffprobe to get video metadata in JSON format
+            result = subprocess.run(
+                [
+                    "ffprobe",
+                    "-v",
+                    "quiet",
+                    "-print_format",
+                    "json",
+                    "-show_format",
+                    "-show_streams",
+                    self.path,
+                ],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            # Parse the JSON output
+            metadata = json.loads(result.stdout)
+            # Extract some common useful fields into a more user-friendly format
+            simplified = {
+                "format": metadata.get("format", {}).get("format_name", "unknown"),
+                "duration_seconds": float(
+                    metadata.get("format", {}).get("duration", 0)
+                ),
+                "size_bytes": int(metadata.get("format", {}).get("size", 0)),
+                "bit_rate": int(metadata.get("format", {}).get("bit_rate", 0)),
+                "streams": len(metadata.get("streams", [])),
+            }
+            # Add video stream info if available
+            video_streams = [
+                s for s in metadata.get("streams", []) if s.get("codec_type") == "video"
+            ]
+            if video_streams:
+                video = video_streams[0]  # Get the first video stream
+                simplified["video"] = {
+                    "codec": video.get("codec_name", "unknown"),
+                    "width": video.get("width", 0),
+                    "height": video.get("height", 0),
+                    "frame_rate": eval(
+                        video.get("r_frame_rate", "0/1")
+                    ),  # Convert "30/1" to 30.0
+                    "pixel_format": video.get("pix_fmt", "unknown"),
+                }
+            # Add audio stream info if available
+            audio_streams = [
+                s for s in metadata.get("streams", []) if s.get("codec_type") == "audio"
+            ]
+            if audio_streams:
+                audio = audio_streams[0]  # Get the first audio stream
+                simplified["audio"] = {
+                    "codec": audio.get("codec_name", "unknown"),
+                    "channels": audio.get("channels", 0),
+                    "sample_rate": audio.get("sample_rate", "unknown"),
+                }
+            # Return both the complete metadata and simplified version
+            return {"simplified": simplified, "full": metadata}
+        except (
+            subprocess.SubprocessError,
+            FileNotFoundError,
+            json.JSONDecodeError,
+        ) as e:
+            # If ffprobe is not available or fails, return basic info
+            return {
+                "error": str(e),
+                "format": self.suffix,
+                "mime_type": self.mime_type,
+                "size_bytes": self.size,
+            }
     def get_image_dimensions(self) -> tuple:
         """
         Get the dimensions (width, height) of an image file.
@@ -626,12 +761,18 @@ class FileStore(Scenario):
         """
         Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
         """
-        if self.suffix == "csv":
-            # Get the pandas DataFrame
-            df = self.to_pandas()
-            # Check if the requested attribute exists in the DataFrame
-            if hasattr(df, name):
-                return getattr(df, name)
+        # Special case for pickle protocol
+        if name.startswith("__") and name.endswith("__"):
+            raise AttributeError(name)
+        # Only try to access suffix if it's in our __dict__
+        if hasattr(self, "_data") and "suffix" in self._data:
+            if self._data["suffix"] == "csv":
+                # Get the pandas DataFrame
+                df = self.to_pandas()
+                # Check if the requested attribute exists in the DataFrame
+                if hasattr(df, name):
+                    return getattr(df, name)
         # If not a CSV or attribute doesn't exist in DataFrame, raise AttributeError
         raise AttributeError(
             f"'{self.__class__.__name__}' object has no attribute '{name}'"

edsl/scenarios/handlers/__init__.py CHANGED Viewed

@@ -12,7 +12,9 @@ __all__ = [
     "LaTeXMethods",
     "PyMethods",
     "SQLiteMethods",
-    "JpegMethods"
+    "JpegMethods",
+    "Mp4Methods",
+    "WebmMethods"
 ]
 from .pdf_file_store import PdfMethods
@@ -29,3 +31,5 @@ from .latex_file_store import LaTeXMethods
 from .py_file_store import PyMethods
 from .sqlite_file_store import SQLiteMethods
 from .jpeg_file_store import JpegMethods
+from .mp4_file_store import Mp4Methods
+from .webm_file_store import WebmMethods

edsl/scenarios/handlers/mp4_file_store.py ADDED Viewed

@@ -0,0 +1,104 @@
+import tempfile
+from ..file_methods import FileMethods
+class Mp4Methods(FileMethods):
+    """
+    Handler for MP4 video files.
+    This class provides methods to handle MP4 video files in both notebook
+    and system environments, including viewing and creating example videos.
+    """
+    suffix = "mp4"
+    def view_system(self):
+        """
+        Open the MP4 file with the system's default video player.
+        """
+        import os
+        import subprocess
+        if os.path.exists(self.path):
+            try:
+                if (os_name := os.name) == "posix":
+                    subprocess.run(["open", self.path], check=True)  # macOS
+                elif os_name == "nt":
+                    os.startfile(self.path)  # Windows
+                else:
+                    subprocess.run(["xdg-open", self.path], check=True)  # Linux
+            except Exception as e:
+                print(f"Error opening MP4: {e}")
+        else:
+            print("MP4 file was not found.")
+    def view_notebook(self):
+        """
+        Display the MP4 video in a Jupyter notebook using IPython's HTML display.
+        """
+        from IPython.display import HTML, display
+        import base64
+        # Read the video file and encode it as base64
+        with open(self.path, 'rb') as f:
+            video_data = f.read()
+        video_base64 = base64.b64encode(video_data).decode('utf-8')
+        # Create an HTML5 video element with the base64-encoded video
+        video_html = f"""
+        <video width="640" height="360" controls>
+            <source src="data:video/mp4;base64,{video_base64}" type="video/mp4">
+            Your browser does not support the video tag.
+        </video>
+        """
+        display(HTML(video_html))
+    def extract_text(self):
+        """
+        Extract text from the video using subtitle extraction (if available).
+        Currently returns a message that text extraction is not supported for videos.
+        Returns:
+            str: Message indicating text extraction is not supported
+        """
+        return "Text extraction is not supported for video files."
+    def example(self):
+        """
+        Create a simple example MP4 file.
+        Uses FFmpeg to generate a test video pattern if available,
+        otherwise creates a minimal MP4 header.
+        Returns:
+            str: Path to the created example MP4 file
+        """
+        import os
+        import subprocess
+        # Create a temporary file for the output
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
+            output_path = f.name
+        try:
+            # Try to use ffmpeg to generate a test pattern video
+            subprocess.run(
+                [
+                    "ffmpeg", "-y", "-f", "lavfi", "-i", "testsrc=duration=5:size=1280x720:rate=30",
+                    "-vcodec", "libx264", "-pix_fmt", "yuv420p", output_path
+                ],
+                check=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE
+            )
+            return output_path
+        except (subprocess.SubprocessError, FileNotFoundError):
+            # If ffmpeg is not available, create a tiny placeholder MP4 file
+            # Using a simple empty binary file with the .mp4 extension
+            with open(output_path, 'wb') as f:
+                # Just write a simple 1KB file with MP4 signature
+                f.write(b'\x00\x00\x00\x18\x66\x74\x79\x70\x6D\x70\x34\x32')  # MP4 file signature
+                f.write(b'\x00' * 1000)  # Fill with zeros
+            return output_path

edsl/scenarios/handlers/webm_file_store.py ADDED Viewed

@@ -0,0 +1,104 @@
+import tempfile
+from ..file_methods import FileMethods
+class WebmMethods(FileMethods):
+    """
+    Handler for WebM video files.
+    This class provides methods to handle WebM video files in both notebook
+    and system environments, including viewing and creating example videos.
+    WebM is an open, royalty-free video format designed for the web.
+    """
+    suffix = "webm"
+    def view_system(self):
+        """
+        Open the WebM file with the system's default video player.
+        """
+        import os
+        import subprocess
+        if os.path.exists(self.path):
+            try:
+                if (os_name := os.name) == "posix":
+                    subprocess.run(["open", self.path], check=True)  # macOS
+                elif os_name == "nt":
+                    os.startfile(self.path)  # Windows
+                else:
+                    subprocess.run(["xdg-open", self.path], check=True)  # Linux
+            except Exception as e:
+                print(f"Error opening WebM: {e}")
+        else:
+            print("WebM file was not found.")
+    def view_notebook(self):
+        """
+        Display the WebM video in a Jupyter notebook using IPython's HTML display.
+        """
+        from IPython.display import HTML, display
+        import base64
+        # Read the video file and encode it as base64
+        with open(self.path, 'rb') as f:
+            video_data = f.read()
+        video_base64 = base64.b64encode(video_data).decode('utf-8')
+        # Create an HTML5 video element with the base64-encoded video
+        video_html = f"""
+        <video width="640" height="360" controls>
+            <source src="data:video/webm;base64,{video_base64}" type="video/webm">
+            Your browser does not support the video tag.
+        </video>
+        """
+        display(HTML(video_html))
+    def extract_text(self):
+        """
+        Extract text from the video using subtitle extraction (if available).
+        Currently returns a message that text extraction is not supported for videos.
+        Returns:
+            str: Message indicating text extraction is not supported
+        """
+        return "Text extraction is not supported for video files."
+    def example(self):
+        """
+        Create a simple example WebM file.
+        Uses FFmpeg to generate a test video pattern in WebM format if available,
+        otherwise creates a minimal WebM header.
+        Returns:
+            str: Path to the created example WebM file
+        """
+        import os
+        import subprocess
+        # Create a temporary file for the output
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as f:
+            output_path = f.name
+        try:
+            # Try to use ffmpeg to generate a test pattern video in WebM format
+            subprocess.run(
+                [
+                    "ffmpeg", "-y", "-f", "lavfi", "-i", "testsrc=duration=5:size=1280x720:rate=30",
+                    "-c:v", "libvpx", "-b:v", "1M", output_path
+                ],
+                check=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE
+            )
+            return output_path
+        except (subprocess.SubprocessError, FileNotFoundError):
+            # If ffmpeg is not available, create a minimal WebM file
+            with open(output_path, 'wb') as f:
+                # WebM starts with EBML header (1A 45 DF A3)
+                f.write(b'\x1A\x45\xDF\xA3')  # EBML signature
+                f.write(b'\x00' * 1000)  # Fill with zeros
+            return output_path

edsl 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl

edsl 0.1.54py3-none-any.whl → 0.1.56py3-none-any.whl