edsl 0.1.54__py3-none-any.whl → 0.1.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. edsl/__init__.py +8 -1
  2. edsl/__init__original.py +134 -0
  3. edsl/__version__.py +1 -1
  4. edsl/agents/agent.py +29 -0
  5. edsl/agents/agent_list.py +36 -1
  6. edsl/base/base_class.py +281 -151
  7. edsl/buckets/__init__.py +8 -3
  8. edsl/buckets/bucket_collection.py +9 -3
  9. edsl/buckets/model_buckets.py +4 -2
  10. edsl/buckets/token_bucket.py +2 -2
  11. edsl/buckets/token_bucket_client.py +5 -3
  12. edsl/caching/cache.py +131 -62
  13. edsl/caching/cache_entry.py +70 -58
  14. edsl/caching/sql_dict.py +17 -0
  15. edsl/cli.py +99 -0
  16. edsl/config/config_class.py +16 -0
  17. edsl/conversation/__init__.py +31 -0
  18. edsl/coop/coop.py +276 -242
  19. edsl/coop/coop_jobs_objects.py +59 -0
  20. edsl/coop/coop_objects.py +29 -0
  21. edsl/coop/coop_regular_objects.py +26 -0
  22. edsl/coop/utils.py +24 -19
  23. edsl/dataset/dataset.py +338 -101
  24. edsl/db_list/sqlite_list.py +349 -0
  25. edsl/inference_services/__init__.py +40 -5
  26. edsl/inference_services/exceptions.py +11 -0
  27. edsl/inference_services/services/anthropic_service.py +5 -2
  28. edsl/inference_services/services/aws_bedrock.py +6 -2
  29. edsl/inference_services/services/azure_ai.py +6 -2
  30. edsl/inference_services/services/google_service.py +3 -2
  31. edsl/inference_services/services/mistral_ai_service.py +6 -2
  32. edsl/inference_services/services/open_ai_service.py +6 -2
  33. edsl/inference_services/services/perplexity_service.py +6 -2
  34. edsl/inference_services/services/test_service.py +94 -5
  35. edsl/interviews/answering_function.py +167 -59
  36. edsl/interviews/interview.py +124 -72
  37. edsl/interviews/interview_task_manager.py +10 -0
  38. edsl/invigilators/invigilators.py +9 -0
  39. edsl/jobs/async_interview_runner.py +146 -104
  40. edsl/jobs/data_structures.py +6 -4
  41. edsl/jobs/decorators.py +61 -0
  42. edsl/jobs/fetch_invigilator.py +61 -18
  43. edsl/jobs/html_table_job_logger.py +14 -2
  44. edsl/jobs/jobs.py +180 -104
  45. edsl/jobs/jobs_component_constructor.py +2 -2
  46. edsl/jobs/jobs_interview_constructor.py +2 -0
  47. edsl/jobs/jobs_remote_inference_logger.py +4 -0
  48. edsl/jobs/jobs_runner_status.py +30 -25
  49. edsl/jobs/progress_bar_manager.py +79 -0
  50. edsl/jobs/remote_inference.py +35 -1
  51. edsl/key_management/key_lookup_builder.py +6 -1
  52. edsl/language_models/language_model.py +86 -6
  53. edsl/language_models/model.py +10 -3
  54. edsl/language_models/price_manager.py +45 -75
  55. edsl/language_models/registry.py +5 -0
  56. edsl/notebooks/notebook.py +77 -10
  57. edsl/questions/VALIDATION_README.md +134 -0
  58. edsl/questions/__init__.py +24 -1
  59. edsl/questions/exceptions.py +21 -0
  60. edsl/questions/question_dict.py +201 -16
  61. edsl/questions/question_multiple_choice_with_other.py +624 -0
  62. edsl/questions/question_registry.py +2 -1
  63. edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
  64. edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
  65. edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
  66. edsl/questions/validation_analysis.py +185 -0
  67. edsl/questions/validation_cli.py +131 -0
  68. edsl/questions/validation_html_report.py +404 -0
  69. edsl/questions/validation_logger.py +136 -0
  70. edsl/results/result.py +63 -16
  71. edsl/results/results.py +702 -171
  72. edsl/scenarios/construct_download_link.py +16 -3
  73. edsl/scenarios/directory_scanner.py +226 -226
  74. edsl/scenarios/file_methods.py +5 -0
  75. edsl/scenarios/file_store.py +117 -6
  76. edsl/scenarios/handlers/__init__.py +5 -1
  77. edsl/scenarios/handlers/mp4_file_store.py +104 -0
  78. edsl/scenarios/handlers/webm_file_store.py +104 -0
  79. edsl/scenarios/scenario.py +120 -101
  80. edsl/scenarios/scenario_list.py +800 -727
  81. edsl/scenarios/scenario_list_gc_test.py +146 -0
  82. edsl/scenarios/scenario_list_memory_test.py +214 -0
  83. edsl/scenarios/scenario_list_source_refactor.md +35 -0
  84. edsl/scenarios/scenario_selector.py +5 -4
  85. edsl/scenarios/scenario_source.py +1990 -0
  86. edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
  87. edsl/surveys/survey.py +22 -0
  88. edsl/tasks/__init__.py +4 -2
  89. edsl/tasks/task_history.py +198 -36
  90. edsl/tests/scenarios/test_ScenarioSource.py +51 -0
  91. edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
  92. edsl/utilities/__init__.py +2 -1
  93. edsl/utilities/decorators.py +121 -0
  94. edsl/utilities/memory_debugger.py +1010 -0
  95. {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/METADATA +51 -76
  96. {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/RECORD +99 -75
  97. edsl/jobs/jobs_runner_asyncio.py +0 -281
  98. edsl/language_models/unused/fake_openai_service.py +0 -60
  99. {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/LICENSE +0 -0
  100. {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/WHEEL +0 -0
  101. {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/entry_points.txt +0 -0
@@ -592,6 +592,111 @@ class FileStore(Scenario):
592
592
  """
593
593
  # Check if the mime type starts with 'image/'
594
594
  return self.mime_type.startswith("image/")
595
+
596
+ def is_video(self) -> bool:
597
+ """
598
+ Check if the file is a video by examining its MIME type.
599
+
600
+ Returns:
601
+ bool: True if the file is a video, False otherwise.
602
+
603
+ Examples:
604
+ >>> fs = FileStore.example("mp4")
605
+ >>> fs.is_video()
606
+ True
607
+ >>> fs = FileStore.example("webm")
608
+ >>> fs.is_video()
609
+ True
610
+ >>> fs = FileStore.example("txt")
611
+ >>> fs.is_video()
612
+ False
613
+ """
614
+ # Check if the mime type starts with 'video/'
615
+ return self.mime_type.startswith("video/")
616
+
617
+ def get_video_metadata(self) -> dict:
618
+ """
619
+ Get metadata about a video file such as duration, dimensions, codec, etc.
620
+ Uses FFmpeg to extract the information if available.
621
+
622
+ Returns:
623
+ dict: A dictionary containing video metadata, or a dictionary with
624
+ error information if metadata extraction fails.
625
+
626
+ Raises:
627
+ ValueError: If the file is not a video.
628
+
629
+ Example:
630
+ >>> fs = FileStore.example("mp4")
631
+ >>> metadata = fs.get_video_metadata()
632
+ >>> isinstance(metadata, dict)
633
+ True
634
+ """
635
+ if not self.is_video():
636
+ raise ValueError("This file is not a video")
637
+
638
+ # We'll try to use ffprobe (part of ffmpeg) to get metadata
639
+ import subprocess
640
+ import json
641
+
642
+ try:
643
+ # Run ffprobe to get video metadata in JSON format
644
+ result = subprocess.run(
645
+ [
646
+ "ffprobe", "-v", "quiet", "-print_format", "json",
647
+ "-show_format", "-show_streams", self.path
648
+ ],
649
+ capture_output=True, text=True, check=True
650
+ )
651
+
652
+ # Parse the JSON output
653
+ metadata = json.loads(result.stdout)
654
+
655
+ # Extract some common useful fields into a more user-friendly format
656
+ simplified = {
657
+ "format": metadata.get("format", {}).get("format_name", "unknown"),
658
+ "duration_seconds": float(metadata.get("format", {}).get("duration", 0)),
659
+ "size_bytes": int(metadata.get("format", {}).get("size", 0)),
660
+ "bit_rate": int(metadata.get("format", {}).get("bit_rate", 0)),
661
+ "streams": len(metadata.get("streams", [])),
662
+ }
663
+
664
+ # Add video stream info if available
665
+ video_streams = [s for s in metadata.get("streams", []) if s.get("codec_type") == "video"]
666
+ if video_streams:
667
+ video = video_streams[0] # Get the first video stream
668
+ simplified["video"] = {
669
+ "codec": video.get("codec_name", "unknown"),
670
+ "width": video.get("width", 0),
671
+ "height": video.get("height", 0),
672
+ "frame_rate": eval(video.get("r_frame_rate", "0/1")), # Convert "30/1" to 30.0
673
+ "pixel_format": video.get("pix_fmt", "unknown"),
674
+ }
675
+
676
+ # Add audio stream info if available
677
+ audio_streams = [s for s in metadata.get("streams", []) if s.get("codec_type") == "audio"]
678
+ if audio_streams:
679
+ audio = audio_streams[0] # Get the first audio stream
680
+ simplified["audio"] = {
681
+ "codec": audio.get("codec_name", "unknown"),
682
+ "channels": audio.get("channels", 0),
683
+ "sample_rate": audio.get("sample_rate", "unknown"),
684
+ }
685
+
686
+ # Return both the complete metadata and simplified version
687
+ return {
688
+ "simplified": simplified,
689
+ "full": metadata
690
+ }
691
+
692
+ except (subprocess.SubprocessError, FileNotFoundError, json.JSONDecodeError) as e:
693
+ # If ffprobe is not available or fails, return basic info
694
+ return {
695
+ "error": str(e),
696
+ "format": self.suffix,
697
+ "mime_type": self.mime_type,
698
+ "size_bytes": self.size,
699
+ }
595
700
 
596
701
  def get_image_dimensions(self) -> tuple:
597
702
  """
@@ -626,12 +731,18 @@ class FileStore(Scenario):
626
731
  """
627
732
  Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
628
733
  """
629
- if self.suffix == "csv":
630
- # Get the pandas DataFrame
631
- df = self.to_pandas()
632
- # Check if the requested attribute exists in the DataFrame
633
- if hasattr(df, name):
634
- return getattr(df, name)
734
+ # Special case for pickle protocol
735
+ if name.startswith("__") and name.endswith("__"):
736
+ raise AttributeError(name)
737
+
738
+ # Only try to access suffix if it's in our __dict__
739
+ if hasattr(self, "_data") and "suffix" in self._data:
740
+ if self._data["suffix"] == "csv":
741
+ # Get the pandas DataFrame
742
+ df = self.to_pandas()
743
+ # Check if the requested attribute exists in the DataFrame
744
+ if hasattr(df, name):
745
+ return getattr(df, name)
635
746
  # If not a CSV or attribute doesn't exist in DataFrame, raise AttributeError
636
747
  raise AttributeError(
637
748
  f"'{self.__class__.__name__}' object has no attribute '{name}'"
@@ -12,7 +12,9 @@ __all__ = [
12
12
  "LaTeXMethods",
13
13
  "PyMethods",
14
14
  "SQLiteMethods",
15
- "JpegMethods"
15
+ "JpegMethods",
16
+ "Mp4Methods",
17
+ "WebmMethods"
16
18
  ]
17
19
 
18
20
  from .pdf_file_store import PdfMethods
@@ -29,3 +31,5 @@ from .latex_file_store import LaTeXMethods
29
31
  from .py_file_store import PyMethods
30
32
  from .sqlite_file_store import SQLiteMethods
31
33
  from .jpeg_file_store import JpegMethods
34
+ from .mp4_file_store import Mp4Methods
35
+ from .webm_file_store import WebmMethods
@@ -0,0 +1,104 @@
1
+ import tempfile
2
+ from ..file_methods import FileMethods
3
+
4
+
5
+ class Mp4Methods(FileMethods):
6
+ """
7
+ Handler for MP4 video files.
8
+
9
+ This class provides methods to handle MP4 video files in both notebook
10
+ and system environments, including viewing and creating example videos.
11
+ """
12
+ suffix = "mp4"
13
+
14
+ def view_system(self):
15
+ """
16
+ Open the MP4 file with the system's default video player.
17
+ """
18
+ import os
19
+ import subprocess
20
+
21
+ if os.path.exists(self.path):
22
+ try:
23
+ if (os_name := os.name) == "posix":
24
+ subprocess.run(["open", self.path], check=True) # macOS
25
+ elif os_name == "nt":
26
+ os.startfile(self.path) # Windows
27
+ else:
28
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
29
+ except Exception as e:
30
+ print(f"Error opening MP4: {e}")
31
+ else:
32
+ print("MP4 file was not found.")
33
+
34
+ def view_notebook(self):
35
+ """
36
+ Display the MP4 video in a Jupyter notebook using IPython's HTML display.
37
+ """
38
+ from IPython.display import HTML, display
39
+ import base64
40
+
41
+ # Read the video file and encode it as base64
42
+ with open(self.path, 'rb') as f:
43
+ video_data = f.read()
44
+
45
+ video_base64 = base64.b64encode(video_data).decode('utf-8')
46
+
47
+ # Create an HTML5 video element with the base64-encoded video
48
+ video_html = f"""
49
+ <video width="640" height="360" controls>
50
+ <source src="data:video/mp4;base64,{video_base64}" type="video/mp4">
51
+ Your browser does not support the video tag.
52
+ </video>
53
+ """
54
+
55
+ display(HTML(video_html))
56
+
57
+ def extract_text(self):
58
+ """
59
+ Extract text from the video using subtitle extraction (if available).
60
+ Currently returns a message that text extraction is not supported for videos.
61
+
62
+ Returns:
63
+ str: Message indicating text extraction is not supported
64
+ """
65
+ return "Text extraction is not supported for video files."
66
+
67
+ def example(self):
68
+ """
69
+ Create a simple example MP4 file.
70
+
71
+ Uses FFmpeg to generate a test video pattern if available,
72
+ otherwise creates a minimal MP4 header.
73
+
74
+ Returns:
75
+ str: Path to the created example MP4 file
76
+ """
77
+ import os
78
+ import subprocess
79
+
80
+ # Create a temporary file for the output
81
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
82
+ output_path = f.name
83
+
84
+ try:
85
+ # Try to use ffmpeg to generate a test pattern video
86
+ subprocess.run(
87
+ [
88
+ "ffmpeg", "-y", "-f", "lavfi", "-i", "testsrc=duration=5:size=1280x720:rate=30",
89
+ "-vcodec", "libx264", "-pix_fmt", "yuv420p", output_path
90
+ ],
91
+ check=True,
92
+ stdout=subprocess.PIPE,
93
+ stderr=subprocess.PIPE
94
+ )
95
+ return output_path
96
+ except (subprocess.SubprocessError, FileNotFoundError):
97
+ # If ffmpeg is not available, create a tiny placeholder MP4 file
98
+ # Using a simple empty binary file with the .mp4 extension
99
+ with open(output_path, 'wb') as f:
100
+ # Just write a simple 1KB file with MP4 signature
101
+ f.write(b'\x00\x00\x00\x18\x66\x74\x79\x70\x6D\x70\x34\x32') # MP4 file signature
102
+ f.write(b'\x00' * 1000) # Fill with zeros
103
+
104
+ return output_path
@@ -0,0 +1,104 @@
1
+ import tempfile
2
+ from ..file_methods import FileMethods
3
+
4
+
5
+ class WebmMethods(FileMethods):
6
+ """
7
+ Handler for WebM video files.
8
+
9
+ This class provides methods to handle WebM video files in both notebook
10
+ and system environments, including viewing and creating example videos.
11
+ WebM is an open, royalty-free video format designed for the web.
12
+ """
13
+ suffix = "webm"
14
+
15
+ def view_system(self):
16
+ """
17
+ Open the WebM file with the system's default video player.
18
+ """
19
+ import os
20
+ import subprocess
21
+
22
+ if os.path.exists(self.path):
23
+ try:
24
+ if (os_name := os.name) == "posix":
25
+ subprocess.run(["open", self.path], check=True) # macOS
26
+ elif os_name == "nt":
27
+ os.startfile(self.path) # Windows
28
+ else:
29
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
30
+ except Exception as e:
31
+ print(f"Error opening WebM: {e}")
32
+ else:
33
+ print("WebM file was not found.")
34
+
35
+ def view_notebook(self):
36
+ """
37
+ Display the WebM video in a Jupyter notebook using IPython's HTML display.
38
+ """
39
+ from IPython.display import HTML, display
40
+ import base64
41
+
42
+ # Read the video file and encode it as base64
43
+ with open(self.path, 'rb') as f:
44
+ video_data = f.read()
45
+
46
+ video_base64 = base64.b64encode(video_data).decode('utf-8')
47
+
48
+ # Create an HTML5 video element with the base64-encoded video
49
+ video_html = f"""
50
+ <video width="640" height="360" controls>
51
+ <source src="data:video/webm;base64,{video_base64}" type="video/webm">
52
+ Your browser does not support the video tag.
53
+ </video>
54
+ """
55
+
56
+ display(HTML(video_html))
57
+
58
+ def extract_text(self):
59
+ """
60
+ Extract text from the video using subtitle extraction (if available).
61
+ Currently returns a message that text extraction is not supported for videos.
62
+
63
+ Returns:
64
+ str: Message indicating text extraction is not supported
65
+ """
66
+ return "Text extraction is not supported for video files."
67
+
68
+ def example(self):
69
+ """
70
+ Create a simple example WebM file.
71
+
72
+ Uses FFmpeg to generate a test video pattern in WebM format if available,
73
+ otherwise creates a minimal WebM header.
74
+
75
+ Returns:
76
+ str: Path to the created example WebM file
77
+ """
78
+ import os
79
+ import subprocess
80
+
81
+ # Create a temporary file for the output
82
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as f:
83
+ output_path = f.name
84
+
85
+ try:
86
+ # Try to use ffmpeg to generate a test pattern video in WebM format
87
+ subprocess.run(
88
+ [
89
+ "ffmpeg", "-y", "-f", "lavfi", "-i", "testsrc=duration=5:size=1280x720:rate=30",
90
+ "-c:v", "libvpx", "-b:v", "1M", output_path
91
+ ],
92
+ check=True,
93
+ stdout=subprocess.PIPE,
94
+ stderr=subprocess.PIPE
95
+ )
96
+ return output_path
97
+ except (subprocess.SubprocessError, FileNotFoundError):
98
+ # If ffmpeg is not available, create a minimal WebM file
99
+ with open(output_path, 'wb') as f:
100
+ # WebM starts with EBML header (1A 45 DF A3)
101
+ f.write(b'\x1A\x45\xDF\xA3') # EBML signature
102
+ f.write(b'\x00' * 1000) # Fill with zeros
103
+
104
+ return output_path