edsl 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. edsl/__init__.py +8 -1
  2. edsl/__init__original.py +134 -0
  3. edsl/__version__.py +1 -1
  4. edsl/agents/agent.py +29 -0
  5. edsl/agents/agent_list.py +36 -1
  6. edsl/base/base_class.py +281 -151
  7. edsl/base/data_transfer_models.py +15 -4
  8. edsl/buckets/__init__.py +8 -3
  9. edsl/buckets/bucket_collection.py +9 -3
  10. edsl/buckets/model_buckets.py +4 -2
  11. edsl/buckets/token_bucket.py +2 -2
  12. edsl/buckets/token_bucket_client.py +5 -3
  13. edsl/caching/cache.py +131 -62
  14. edsl/caching/cache_entry.py +70 -58
  15. edsl/caching/sql_dict.py +17 -0
  16. edsl/cli.py +99 -0
  17. edsl/config/config_class.py +16 -0
  18. edsl/conversation/__init__.py +31 -0
  19. edsl/coop/coop.py +276 -242
  20. edsl/coop/coop_jobs_objects.py +59 -0
  21. edsl/coop/coop_objects.py +29 -0
  22. edsl/coop/coop_regular_objects.py +26 -0
  23. edsl/coop/utils.py +24 -19
  24. edsl/dataset/dataset.py +338 -101
  25. edsl/dataset/dataset_operations_mixin.py +216 -180
  26. edsl/db_list/sqlite_list.py +349 -0
  27. edsl/inference_services/__init__.py +40 -5
  28. edsl/inference_services/exceptions.py +11 -0
  29. edsl/inference_services/services/anthropic_service.py +5 -2
  30. edsl/inference_services/services/aws_bedrock.py +6 -2
  31. edsl/inference_services/services/azure_ai.py +6 -2
  32. edsl/inference_services/services/google_service.py +7 -3
  33. edsl/inference_services/services/mistral_ai_service.py +6 -2
  34. edsl/inference_services/services/open_ai_service.py +6 -2
  35. edsl/inference_services/services/perplexity_service.py +6 -2
  36. edsl/inference_services/services/test_service.py +94 -5
  37. edsl/interviews/answering_function.py +167 -59
  38. edsl/interviews/interview.py +124 -72
  39. edsl/interviews/interview_task_manager.py +10 -0
  40. edsl/interviews/request_token_estimator.py +8 -0
  41. edsl/invigilators/invigilators.py +35 -13
  42. edsl/jobs/async_interview_runner.py +146 -104
  43. edsl/jobs/data_structures.py +6 -4
  44. edsl/jobs/decorators.py +61 -0
  45. edsl/jobs/fetch_invigilator.py +61 -18
  46. edsl/jobs/html_table_job_logger.py +14 -2
  47. edsl/jobs/jobs.py +180 -104
  48. edsl/jobs/jobs_component_constructor.py +2 -2
  49. edsl/jobs/jobs_interview_constructor.py +2 -0
  50. edsl/jobs/jobs_pricing_estimation.py +154 -113
  51. edsl/jobs/jobs_remote_inference_logger.py +4 -0
  52. edsl/jobs/jobs_runner_status.py +30 -25
  53. edsl/jobs/progress_bar_manager.py +79 -0
  54. edsl/jobs/remote_inference.py +35 -1
  55. edsl/key_management/key_lookup_builder.py +6 -1
  56. edsl/language_models/language_model.py +110 -12
  57. edsl/language_models/model.py +10 -3
  58. edsl/language_models/price_manager.py +176 -71
  59. edsl/language_models/registry.py +5 -0
  60. edsl/notebooks/notebook.py +77 -10
  61. edsl/questions/VALIDATION_README.md +134 -0
  62. edsl/questions/__init__.py +24 -1
  63. edsl/questions/exceptions.py +21 -0
  64. edsl/questions/question_dict.py +201 -16
  65. edsl/questions/question_multiple_choice_with_other.py +624 -0
  66. edsl/questions/question_registry.py +2 -1
  67. edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
  68. edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
  69. edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
  70. edsl/questions/validation_analysis.py +185 -0
  71. edsl/questions/validation_cli.py +131 -0
  72. edsl/questions/validation_html_report.py +404 -0
  73. edsl/questions/validation_logger.py +136 -0
  74. edsl/results/result.py +115 -46
  75. edsl/results/results.py +702 -171
  76. edsl/scenarios/construct_download_link.py +16 -3
  77. edsl/scenarios/directory_scanner.py +226 -226
  78. edsl/scenarios/file_methods.py +5 -0
  79. edsl/scenarios/file_store.py +150 -9
  80. edsl/scenarios/handlers/__init__.py +5 -1
  81. edsl/scenarios/handlers/mp4_file_store.py +104 -0
  82. edsl/scenarios/handlers/webm_file_store.py +104 -0
  83. edsl/scenarios/scenario.py +120 -101
  84. edsl/scenarios/scenario_list.py +800 -727
  85. edsl/scenarios/scenario_list_gc_test.py +146 -0
  86. edsl/scenarios/scenario_list_memory_test.py +214 -0
  87. edsl/scenarios/scenario_list_source_refactor.md +35 -0
  88. edsl/scenarios/scenario_selector.py +5 -4
  89. edsl/scenarios/scenario_source.py +1990 -0
  90. edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
  91. edsl/surveys/survey.py +22 -0
  92. edsl/tasks/__init__.py +4 -2
  93. edsl/tasks/task_history.py +198 -36
  94. edsl/tests/scenarios/test_ScenarioSource.py +51 -0
  95. edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
  96. edsl/utilities/__init__.py +2 -1
  97. edsl/utilities/decorators.py +121 -0
  98. edsl/utilities/memory_debugger.py +1010 -0
  99. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/METADATA +51 -76
  100. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/RECORD +103 -79
  101. edsl/jobs/jobs_runner_asyncio.py +0 -281
  102. edsl/language_models/unused/fake_openai_service.py +0 -60
  103. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/LICENSE +0 -0
  104. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/WHEEL +0 -0
  105. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/entry_points.txt +0 -0
@@ -294,10 +294,23 @@ class FileStore(Scenario):
294
294
 
295
295
  def upload_google(self, refresh: bool = False) -> None:
296
296
  import google.generativeai as genai
297
+ import google
297
298
 
298
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
299
- google_info = genai.upload_file(self.path, mime_type=self.mime_type)
300
- self.external_locations["google"] = google_info.to_dict()
299
+ try:
300
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
301
+ google_info = genai.upload_file(self.path, mime_type=self.mime_type)
302
+ self.external_locations["google"] = google_info.to_dict()
303
+ while True:
304
+ file_metadata = genai.get_file(name=google_info.name)
305
+ file_state = file_metadata.state
306
+
307
+ if file_state == 2: # "ACTIVE":
308
+ break
309
+ elif file_state == 10: # "FAILED":
310
+ break
311
+ except Exception as e:
312
+ print(f"Error uploading to Google: {e}")
313
+ raise
301
314
 
302
315
  @classmethod
303
316
  @remove_edsl_version
@@ -593,6 +606,128 @@ class FileStore(Scenario):
593
606
  # Check if the mime type starts with 'image/'
594
607
  return self.mime_type.startswith("image/")
595
608
 
609
+ def is_video(self) -> bool:
610
+ """
611
+ Check if the file is a video by examining its MIME type.
612
+
613
+ Returns:
614
+ bool: True if the file is a video, False otherwise.
615
+
616
+ Examples:
617
+ >>> fs = FileStore.example("mp4")
618
+ >>> fs.is_video()
619
+ True
620
+ >>> fs = FileStore.example("webm")
621
+ >>> fs.is_video()
622
+ True
623
+ >>> fs = FileStore.example("txt")
624
+ >>> fs.is_video()
625
+ False
626
+ """
627
+ # Check if the mime type starts with 'video/'
628
+ return self.mime_type.startswith("video/")
629
+
630
+ def get_video_metadata(self) -> dict:
631
+ """
632
+ Get metadata about a video file such as duration, dimensions, codec, etc.
633
+ Uses FFmpeg to extract the information if available.
634
+
635
+ Returns:
636
+ dict: A dictionary containing video metadata, or a dictionary with
637
+ error information if metadata extraction fails.
638
+
639
+ Raises:
640
+ ValueError: If the file is not a video.
641
+
642
+ Example:
643
+ >>> fs = FileStore.example("mp4")
644
+ >>> metadata = fs.get_video_metadata()
645
+ >>> isinstance(metadata, dict)
646
+ True
647
+ """
648
+ if not self.is_video():
649
+ raise ValueError("This file is not a video")
650
+
651
+ # We'll try to use ffprobe (part of ffmpeg) to get metadata
652
+ import subprocess
653
+ import json
654
+
655
+ try:
656
+ # Run ffprobe to get video metadata in JSON format
657
+ result = subprocess.run(
658
+ [
659
+ "ffprobe",
660
+ "-v",
661
+ "quiet",
662
+ "-print_format",
663
+ "json",
664
+ "-show_format",
665
+ "-show_streams",
666
+ self.path,
667
+ ],
668
+ capture_output=True,
669
+ text=True,
670
+ check=True,
671
+ )
672
+
673
+ # Parse the JSON output
674
+ metadata = json.loads(result.stdout)
675
+
676
+ # Extract some common useful fields into a more user-friendly format
677
+ simplified = {
678
+ "format": metadata.get("format", {}).get("format_name", "unknown"),
679
+ "duration_seconds": float(
680
+ metadata.get("format", {}).get("duration", 0)
681
+ ),
682
+ "size_bytes": int(metadata.get("format", {}).get("size", 0)),
683
+ "bit_rate": int(metadata.get("format", {}).get("bit_rate", 0)),
684
+ "streams": len(metadata.get("streams", [])),
685
+ }
686
+
687
+ # Add video stream info if available
688
+ video_streams = [
689
+ s for s in metadata.get("streams", []) if s.get("codec_type") == "video"
690
+ ]
691
+ if video_streams:
692
+ video = video_streams[0] # Get the first video stream
693
+ simplified["video"] = {
694
+ "codec": video.get("codec_name", "unknown"),
695
+ "width": video.get("width", 0),
696
+ "height": video.get("height", 0),
697
+ "frame_rate": eval(
698
+ video.get("r_frame_rate", "0/1")
699
+ ), # Convert "30/1" to 30.0
700
+ "pixel_format": video.get("pix_fmt", "unknown"),
701
+ }
702
+
703
+ # Add audio stream info if available
704
+ audio_streams = [
705
+ s for s in metadata.get("streams", []) if s.get("codec_type") == "audio"
706
+ ]
707
+ if audio_streams:
708
+ audio = audio_streams[0] # Get the first audio stream
709
+ simplified["audio"] = {
710
+ "codec": audio.get("codec_name", "unknown"),
711
+ "channels": audio.get("channels", 0),
712
+ "sample_rate": audio.get("sample_rate", "unknown"),
713
+ }
714
+
715
+ # Return both the complete metadata and simplified version
716
+ return {"simplified": simplified, "full": metadata}
717
+
718
+ except (
719
+ subprocess.SubprocessError,
720
+ FileNotFoundError,
721
+ json.JSONDecodeError,
722
+ ) as e:
723
+ # If ffprobe is not available or fails, return basic info
724
+ return {
725
+ "error": str(e),
726
+ "format": self.suffix,
727
+ "mime_type": self.mime_type,
728
+ "size_bytes": self.size,
729
+ }
730
+
596
731
  def get_image_dimensions(self) -> tuple:
597
732
  """
598
733
  Get the dimensions (width, height) of an image file.
@@ -626,12 +761,18 @@ class FileStore(Scenario):
626
761
  """
627
762
  Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
628
763
  """
629
- if self.suffix == "csv":
630
- # Get the pandas DataFrame
631
- df = self.to_pandas()
632
- # Check if the requested attribute exists in the DataFrame
633
- if hasattr(df, name):
634
- return getattr(df, name)
764
+ # Special case for pickle protocol
765
+ if name.startswith("__") and name.endswith("__"):
766
+ raise AttributeError(name)
767
+
768
+ # Only try to access suffix if it's in our __dict__
769
+ if hasattr(self, "_data") and "suffix" in self._data:
770
+ if self._data["suffix"] == "csv":
771
+ # Get the pandas DataFrame
772
+ df = self.to_pandas()
773
+ # Check if the requested attribute exists in the DataFrame
774
+ if hasattr(df, name):
775
+ return getattr(df, name)
635
776
  # If not a CSV or attribute doesn't exist in DataFrame, raise AttributeError
636
777
  raise AttributeError(
637
778
  f"'{self.__class__.__name__}' object has no attribute '{name}'"
@@ -12,7 +12,9 @@ __all__ = [
12
12
  "LaTeXMethods",
13
13
  "PyMethods",
14
14
  "SQLiteMethods",
15
- "JpegMethods"
15
+ "JpegMethods",
16
+ "Mp4Methods",
17
+ "WebmMethods"
16
18
  ]
17
19
 
18
20
  from .pdf_file_store import PdfMethods
@@ -29,3 +31,5 @@ from .latex_file_store import LaTeXMethods
29
31
  from .py_file_store import PyMethods
30
32
  from .sqlite_file_store import SQLiteMethods
31
33
  from .jpeg_file_store import JpegMethods
34
+ from .mp4_file_store import Mp4Methods
35
+ from .webm_file_store import WebmMethods
@@ -0,0 +1,104 @@
1
+ import tempfile
2
+ from ..file_methods import FileMethods
3
+
4
+
5
+ class Mp4Methods(FileMethods):
6
+ """
7
+ Handler for MP4 video files.
8
+
9
+ This class provides methods to handle MP4 video files in both notebook
10
+ and system environments, including viewing and creating example videos.
11
+ """
12
+ suffix = "mp4"
13
+
14
+ def view_system(self):
15
+ """
16
+ Open the MP4 file with the system's default video player.
17
+ """
18
+ import os
19
+ import subprocess
20
+
21
+ if os.path.exists(self.path):
22
+ try:
23
+ if (os_name := os.name) == "posix":
24
+ subprocess.run(["open", self.path], check=True) # macOS
25
+ elif os_name == "nt":
26
+ os.startfile(self.path) # Windows
27
+ else:
28
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
29
+ except Exception as e:
30
+ print(f"Error opening MP4: {e}")
31
+ else:
32
+ print("MP4 file was not found.")
33
+
34
+ def view_notebook(self):
35
+ """
36
+ Display the MP4 video in a Jupyter notebook using IPython's HTML display.
37
+ """
38
+ from IPython.display import HTML, display
39
+ import base64
40
+
41
+ # Read the video file and encode it as base64
42
+ with open(self.path, 'rb') as f:
43
+ video_data = f.read()
44
+
45
+ video_base64 = base64.b64encode(video_data).decode('utf-8')
46
+
47
+ # Create an HTML5 video element with the base64-encoded video
48
+ video_html = f"""
49
+ <video width="640" height="360" controls>
50
+ <source src="data:video/mp4;base64,{video_base64}" type="video/mp4">
51
+ Your browser does not support the video tag.
52
+ </video>
53
+ """
54
+
55
+ display(HTML(video_html))
56
+
57
+ def extract_text(self):
58
+ """
59
+ Extract text from the video using subtitle extraction (if available).
60
+ Currently returns a message that text extraction is not supported for videos.
61
+
62
+ Returns:
63
+ str: Message indicating text extraction is not supported
64
+ """
65
+ return "Text extraction is not supported for video files."
66
+
67
+ def example(self):
68
+ """
69
+ Create a simple example MP4 file.
70
+
71
+ Uses FFmpeg to generate a test video pattern if available,
72
+ otherwise creates a minimal MP4 header.
73
+
74
+ Returns:
75
+ str: Path to the created example MP4 file
76
+ """
77
+ import os
78
+ import subprocess
79
+
80
+ # Create a temporary file for the output
81
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
82
+ output_path = f.name
83
+
84
+ try:
85
+ # Try to use ffmpeg to generate a test pattern video
86
+ subprocess.run(
87
+ [
88
+ "ffmpeg", "-y", "-f", "lavfi", "-i", "testsrc=duration=5:size=1280x720:rate=30",
89
+ "-vcodec", "libx264", "-pix_fmt", "yuv420p", output_path
90
+ ],
91
+ check=True,
92
+ stdout=subprocess.PIPE,
93
+ stderr=subprocess.PIPE
94
+ )
95
+ return output_path
96
+ except (subprocess.SubprocessError, FileNotFoundError):
97
+ # If ffmpeg is not available, create a tiny placeholder MP4 file
98
+ # Using a simple empty binary file with the .mp4 extension
99
+ with open(output_path, 'wb') as f:
100
+ # Just write a simple 1KB file with MP4 signature
101
+ f.write(b'\x00\x00\x00\x18\x66\x74\x79\x70\x6D\x70\x34\x32') # MP4 file signature
102
+ f.write(b'\x00' * 1000) # Fill with zeros
103
+
104
+ return output_path
@@ -0,0 +1,104 @@
1
+ import tempfile
2
+ from ..file_methods import FileMethods
3
+
4
+
5
+ class WebmMethods(FileMethods):
6
+ """
7
+ Handler for WebM video files.
8
+
9
+ This class provides methods to handle WebM video files in both notebook
10
+ and system environments, including viewing and creating example videos.
11
+ WebM is an open, royalty-free video format designed for the web.
12
+ """
13
+ suffix = "webm"
14
+
15
+ def view_system(self):
16
+ """
17
+ Open the WebM file with the system's default video player.
18
+ """
19
+ import os
20
+ import subprocess
21
+
22
+ if os.path.exists(self.path):
23
+ try:
24
+ if (os_name := os.name) == "posix":
25
+ subprocess.run(["open", self.path], check=True) # macOS
26
+ elif os_name == "nt":
27
+ os.startfile(self.path) # Windows
28
+ else:
29
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
30
+ except Exception as e:
31
+ print(f"Error opening WebM: {e}")
32
+ else:
33
+ print("WebM file was not found.")
34
+
35
+ def view_notebook(self):
36
+ """
37
+ Display the WebM video in a Jupyter notebook using IPython's HTML display.
38
+ """
39
+ from IPython.display import HTML, display
40
+ import base64
41
+
42
+ # Read the video file and encode it as base64
43
+ with open(self.path, 'rb') as f:
44
+ video_data = f.read()
45
+
46
+ video_base64 = base64.b64encode(video_data).decode('utf-8')
47
+
48
+ # Create an HTML5 video element with the base64-encoded video
49
+ video_html = f"""
50
+ <video width="640" height="360" controls>
51
+ <source src="data:video/webm;base64,{video_base64}" type="video/webm">
52
+ Your browser does not support the video tag.
53
+ </video>
54
+ """
55
+
56
+ display(HTML(video_html))
57
+
58
+ def extract_text(self):
59
+ """
60
+ Extract text from the video using subtitle extraction (if available).
61
+ Currently returns a message that text extraction is not supported for videos.
62
+
63
+ Returns:
64
+ str: Message indicating text extraction is not supported
65
+ """
66
+ return "Text extraction is not supported for video files."
67
+
68
+ def example(self):
69
+ """
70
+ Create a simple example WebM file.
71
+
72
+ Uses FFmpeg to generate a test video pattern in WebM format if available,
73
+ otherwise creates a minimal WebM header.
74
+
75
+ Returns:
76
+ str: Path to the created example WebM file
77
+ """
78
+ import os
79
+ import subprocess
80
+
81
+ # Create a temporary file for the output
82
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as f:
83
+ output_path = f.name
84
+
85
+ try:
86
+ # Try to use ffmpeg to generate a test pattern video in WebM format
87
+ subprocess.run(
88
+ [
89
+ "ffmpeg", "-y", "-f", "lavfi", "-i", "testsrc=duration=5:size=1280x720:rate=30",
90
+ "-c:v", "libvpx", "-b:v", "1M", output_path
91
+ ],
92
+ check=True,
93
+ stdout=subprocess.PIPE,
94
+ stderr=subprocess.PIPE
95
+ )
96
+ return output_path
97
+ except (subprocess.SubprocessError, FileNotFoundError):
98
+ # If ffmpeg is not available, create a minimal WebM file
99
+ with open(output_path, 'wb') as f:
100
+ # WebM starts with EBML header (1A 45 DF A3)
101
+ f.write(b'\x1A\x45\xDF\xA3') # EBML signature
102
+ f.write(b'\x00' * 1000) # Fill with zeros
103
+
104
+ return output_path