edsl 0.1.54__py3-none-any.whl → 0.1.55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +8 -1
- edsl/__init__original.py +134 -0
- edsl/__version__.py +1 -1
- edsl/agents/agent.py +29 -0
- edsl/agents/agent_list.py +36 -1
- edsl/base/base_class.py +281 -151
- edsl/buckets/__init__.py +8 -3
- edsl/buckets/bucket_collection.py +9 -3
- edsl/buckets/model_buckets.py +4 -2
- edsl/buckets/token_bucket.py +2 -2
- edsl/buckets/token_bucket_client.py +5 -3
- edsl/caching/cache.py +131 -62
- edsl/caching/cache_entry.py +70 -58
- edsl/caching/sql_dict.py +17 -0
- edsl/cli.py +99 -0
- edsl/config/config_class.py +16 -0
- edsl/conversation/__init__.py +31 -0
- edsl/coop/coop.py +276 -242
- edsl/coop/coop_jobs_objects.py +59 -0
- edsl/coop/coop_objects.py +29 -0
- edsl/coop/coop_regular_objects.py +26 -0
- edsl/coop/utils.py +24 -19
- edsl/dataset/dataset.py +338 -101
- edsl/db_list/sqlite_list.py +349 -0
- edsl/inference_services/__init__.py +40 -5
- edsl/inference_services/exceptions.py +11 -0
- edsl/inference_services/services/anthropic_service.py +5 -2
- edsl/inference_services/services/aws_bedrock.py +6 -2
- edsl/inference_services/services/azure_ai.py +6 -2
- edsl/inference_services/services/google_service.py +3 -2
- edsl/inference_services/services/mistral_ai_service.py +6 -2
- edsl/inference_services/services/open_ai_service.py +6 -2
- edsl/inference_services/services/perplexity_service.py +6 -2
- edsl/inference_services/services/test_service.py +94 -5
- edsl/interviews/answering_function.py +167 -59
- edsl/interviews/interview.py +124 -72
- edsl/interviews/interview_task_manager.py +10 -0
- edsl/invigilators/invigilators.py +9 -0
- edsl/jobs/async_interview_runner.py +146 -104
- edsl/jobs/data_structures.py +6 -4
- edsl/jobs/decorators.py +61 -0
- edsl/jobs/fetch_invigilator.py +61 -18
- edsl/jobs/html_table_job_logger.py +14 -2
- edsl/jobs/jobs.py +180 -104
- edsl/jobs/jobs_component_constructor.py +2 -2
- edsl/jobs/jobs_interview_constructor.py +2 -0
- edsl/jobs/jobs_remote_inference_logger.py +4 -0
- edsl/jobs/jobs_runner_status.py +30 -25
- edsl/jobs/progress_bar_manager.py +79 -0
- edsl/jobs/remote_inference.py +35 -1
- edsl/key_management/key_lookup_builder.py +6 -1
- edsl/language_models/language_model.py +86 -6
- edsl/language_models/model.py +10 -3
- edsl/language_models/price_manager.py +45 -75
- edsl/language_models/registry.py +5 -0
- edsl/notebooks/notebook.py +77 -10
- edsl/questions/VALIDATION_README.md +134 -0
- edsl/questions/__init__.py +24 -1
- edsl/questions/exceptions.py +21 -0
- edsl/questions/question_dict.py +201 -16
- edsl/questions/question_multiple_choice_with_other.py +624 -0
- edsl/questions/question_registry.py +2 -1
- edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
- edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
- edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
- edsl/questions/validation_analysis.py +185 -0
- edsl/questions/validation_cli.py +131 -0
- edsl/questions/validation_html_report.py +404 -0
- edsl/questions/validation_logger.py +136 -0
- edsl/results/result.py +63 -16
- edsl/results/results.py +702 -171
- edsl/scenarios/construct_download_link.py +16 -3
- edsl/scenarios/directory_scanner.py +226 -226
- edsl/scenarios/file_methods.py +5 -0
- edsl/scenarios/file_store.py +117 -6
- edsl/scenarios/handlers/__init__.py +5 -1
- edsl/scenarios/handlers/mp4_file_store.py +104 -0
- edsl/scenarios/handlers/webm_file_store.py +104 -0
- edsl/scenarios/scenario.py +120 -101
- edsl/scenarios/scenario_list.py +800 -727
- edsl/scenarios/scenario_list_gc_test.py +146 -0
- edsl/scenarios/scenario_list_memory_test.py +214 -0
- edsl/scenarios/scenario_list_source_refactor.md +35 -0
- edsl/scenarios/scenario_selector.py +5 -4
- edsl/scenarios/scenario_source.py +1990 -0
- edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
- edsl/surveys/survey.py +22 -0
- edsl/tasks/__init__.py +4 -2
- edsl/tasks/task_history.py +198 -36
- edsl/tests/scenarios/test_ScenarioSource.py +51 -0
- edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
- edsl/utilities/__init__.py +2 -1
- edsl/utilities/decorators.py +121 -0
- edsl/utilities/memory_debugger.py +1010 -0
- {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/METADATA +51 -76
- {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/RECORD +99 -75
- edsl/jobs/jobs_runner_asyncio.py +0 -281
- edsl/language_models/unused/fake_openai_service.py +0 -60
- {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/LICENSE +0 -0
- {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/WHEEL +0 -0
- {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/entry_points.txt +0 -0
edsl/scenarios/file_store.py
CHANGED
@@ -592,6 +592,111 @@ class FileStore(Scenario):
|
|
592
592
|
"""
|
593
593
|
# Check if the mime type starts with 'image/'
|
594
594
|
return self.mime_type.startswith("image/")
|
595
|
+
|
596
|
+
def is_video(self) -> bool:
|
597
|
+
"""
|
598
|
+
Check if the file is a video by examining its MIME type.
|
599
|
+
|
600
|
+
Returns:
|
601
|
+
bool: True if the file is a video, False otherwise.
|
602
|
+
|
603
|
+
Examples:
|
604
|
+
>>> fs = FileStore.example("mp4")
|
605
|
+
>>> fs.is_video()
|
606
|
+
True
|
607
|
+
>>> fs = FileStore.example("webm")
|
608
|
+
>>> fs.is_video()
|
609
|
+
True
|
610
|
+
>>> fs = FileStore.example("txt")
|
611
|
+
>>> fs.is_video()
|
612
|
+
False
|
613
|
+
"""
|
614
|
+
# Check if the mime type starts with 'video/'
|
615
|
+
return self.mime_type.startswith("video/")
|
616
|
+
|
617
|
+
def get_video_metadata(self) -> dict:
|
618
|
+
"""
|
619
|
+
Get metadata about a video file such as duration, dimensions, codec, etc.
|
620
|
+
Uses FFmpeg to extract the information if available.
|
621
|
+
|
622
|
+
Returns:
|
623
|
+
dict: A dictionary containing video metadata, or a dictionary with
|
624
|
+
error information if metadata extraction fails.
|
625
|
+
|
626
|
+
Raises:
|
627
|
+
ValueError: If the file is not a video.
|
628
|
+
|
629
|
+
Example:
|
630
|
+
>>> fs = FileStore.example("mp4")
|
631
|
+
>>> metadata = fs.get_video_metadata()
|
632
|
+
>>> isinstance(metadata, dict)
|
633
|
+
True
|
634
|
+
"""
|
635
|
+
if not self.is_video():
|
636
|
+
raise ValueError("This file is not a video")
|
637
|
+
|
638
|
+
# We'll try to use ffprobe (part of ffmpeg) to get metadata
|
639
|
+
import subprocess
|
640
|
+
import json
|
641
|
+
|
642
|
+
try:
|
643
|
+
# Run ffprobe to get video metadata in JSON format
|
644
|
+
result = subprocess.run(
|
645
|
+
[
|
646
|
+
"ffprobe", "-v", "quiet", "-print_format", "json",
|
647
|
+
"-show_format", "-show_streams", self.path
|
648
|
+
],
|
649
|
+
capture_output=True, text=True, check=True
|
650
|
+
)
|
651
|
+
|
652
|
+
# Parse the JSON output
|
653
|
+
metadata = json.loads(result.stdout)
|
654
|
+
|
655
|
+
# Extract some common useful fields into a more user-friendly format
|
656
|
+
simplified = {
|
657
|
+
"format": metadata.get("format", {}).get("format_name", "unknown"),
|
658
|
+
"duration_seconds": float(metadata.get("format", {}).get("duration", 0)),
|
659
|
+
"size_bytes": int(metadata.get("format", {}).get("size", 0)),
|
660
|
+
"bit_rate": int(metadata.get("format", {}).get("bit_rate", 0)),
|
661
|
+
"streams": len(metadata.get("streams", [])),
|
662
|
+
}
|
663
|
+
|
664
|
+
# Add video stream info if available
|
665
|
+
video_streams = [s for s in metadata.get("streams", []) if s.get("codec_type") == "video"]
|
666
|
+
if video_streams:
|
667
|
+
video = video_streams[0] # Get the first video stream
|
668
|
+
simplified["video"] = {
|
669
|
+
"codec": video.get("codec_name", "unknown"),
|
670
|
+
"width": video.get("width", 0),
|
671
|
+
"height": video.get("height", 0),
|
672
|
+
"frame_rate": eval(video.get("r_frame_rate", "0/1")), # Convert "30/1" to 30.0
|
673
|
+
"pixel_format": video.get("pix_fmt", "unknown"),
|
674
|
+
}
|
675
|
+
|
676
|
+
# Add audio stream info if available
|
677
|
+
audio_streams = [s for s in metadata.get("streams", []) if s.get("codec_type") == "audio"]
|
678
|
+
if audio_streams:
|
679
|
+
audio = audio_streams[0] # Get the first audio stream
|
680
|
+
simplified["audio"] = {
|
681
|
+
"codec": audio.get("codec_name", "unknown"),
|
682
|
+
"channels": audio.get("channels", 0),
|
683
|
+
"sample_rate": audio.get("sample_rate", "unknown"),
|
684
|
+
}
|
685
|
+
|
686
|
+
# Return both the complete metadata and simplified version
|
687
|
+
return {
|
688
|
+
"simplified": simplified,
|
689
|
+
"full": metadata
|
690
|
+
}
|
691
|
+
|
692
|
+
except (subprocess.SubprocessError, FileNotFoundError, json.JSONDecodeError) as e:
|
693
|
+
# If ffprobe is not available or fails, return basic info
|
694
|
+
return {
|
695
|
+
"error": str(e),
|
696
|
+
"format": self.suffix,
|
697
|
+
"mime_type": self.mime_type,
|
698
|
+
"size_bytes": self.size,
|
699
|
+
}
|
595
700
|
|
596
701
|
def get_image_dimensions(self) -> tuple:
|
597
702
|
"""
|
@@ -626,12 +731,18 @@ class FileStore(Scenario):
|
|
626
731
|
"""
|
627
732
|
Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
|
628
733
|
"""
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
734
|
+
# Special case for pickle protocol
|
735
|
+
if name.startswith("__") and name.endswith("__"):
|
736
|
+
raise AttributeError(name)
|
737
|
+
|
738
|
+
# Only try to access suffix if it's in our __dict__
|
739
|
+
if hasattr(self, "_data") and "suffix" in self._data:
|
740
|
+
if self._data["suffix"] == "csv":
|
741
|
+
# Get the pandas DataFrame
|
742
|
+
df = self.to_pandas()
|
743
|
+
# Check if the requested attribute exists in the DataFrame
|
744
|
+
if hasattr(df, name):
|
745
|
+
return getattr(df, name)
|
635
746
|
# If not a CSV or attribute doesn't exist in DataFrame, raise AttributeError
|
636
747
|
raise AttributeError(
|
637
748
|
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
@@ -12,7 +12,9 @@ __all__ = [
|
|
12
12
|
"LaTeXMethods",
|
13
13
|
"PyMethods",
|
14
14
|
"SQLiteMethods",
|
15
|
-
"JpegMethods"
|
15
|
+
"JpegMethods",
|
16
|
+
"Mp4Methods",
|
17
|
+
"WebmMethods"
|
16
18
|
]
|
17
19
|
|
18
20
|
from .pdf_file_store import PdfMethods
|
@@ -29,3 +31,5 @@ from .latex_file_store import LaTeXMethods
|
|
29
31
|
from .py_file_store import PyMethods
|
30
32
|
from .sqlite_file_store import SQLiteMethods
|
31
33
|
from .jpeg_file_store import JpegMethods
|
34
|
+
from .mp4_file_store import Mp4Methods
|
35
|
+
from .webm_file_store import WebmMethods
|
@@ -0,0 +1,104 @@
|
|
1
|
+
import tempfile
|
2
|
+
from ..file_methods import FileMethods
|
3
|
+
|
4
|
+
|
5
|
+
class Mp4Methods(FileMethods):
|
6
|
+
"""
|
7
|
+
Handler for MP4 video files.
|
8
|
+
|
9
|
+
This class provides methods to handle MP4 video files in both notebook
|
10
|
+
and system environments, including viewing and creating example videos.
|
11
|
+
"""
|
12
|
+
suffix = "mp4"
|
13
|
+
|
14
|
+
def view_system(self):
|
15
|
+
"""
|
16
|
+
Open the MP4 file with the system's default video player.
|
17
|
+
"""
|
18
|
+
import os
|
19
|
+
import subprocess
|
20
|
+
|
21
|
+
if os.path.exists(self.path):
|
22
|
+
try:
|
23
|
+
if (os_name := os.name) == "posix":
|
24
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
25
|
+
elif os_name == "nt":
|
26
|
+
os.startfile(self.path) # Windows
|
27
|
+
else:
|
28
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
29
|
+
except Exception as e:
|
30
|
+
print(f"Error opening MP4: {e}")
|
31
|
+
else:
|
32
|
+
print("MP4 file was not found.")
|
33
|
+
|
34
|
+
def view_notebook(self):
|
35
|
+
"""
|
36
|
+
Display the MP4 video in a Jupyter notebook using IPython's HTML display.
|
37
|
+
"""
|
38
|
+
from IPython.display import HTML, display
|
39
|
+
import base64
|
40
|
+
|
41
|
+
# Read the video file and encode it as base64
|
42
|
+
with open(self.path, 'rb') as f:
|
43
|
+
video_data = f.read()
|
44
|
+
|
45
|
+
video_base64 = base64.b64encode(video_data).decode('utf-8')
|
46
|
+
|
47
|
+
# Create an HTML5 video element with the base64-encoded video
|
48
|
+
video_html = f"""
|
49
|
+
<video width="640" height="360" controls>
|
50
|
+
<source src="data:video/mp4;base64,{video_base64}" type="video/mp4">
|
51
|
+
Your browser does not support the video tag.
|
52
|
+
</video>
|
53
|
+
"""
|
54
|
+
|
55
|
+
display(HTML(video_html))
|
56
|
+
|
57
|
+
def extract_text(self):
|
58
|
+
"""
|
59
|
+
Extract text from the video using subtitle extraction (if available).
|
60
|
+
Currently returns a message that text extraction is not supported for videos.
|
61
|
+
|
62
|
+
Returns:
|
63
|
+
str: Message indicating text extraction is not supported
|
64
|
+
"""
|
65
|
+
return "Text extraction is not supported for video files."
|
66
|
+
|
67
|
+
def example(self):
|
68
|
+
"""
|
69
|
+
Create a simple example MP4 file.
|
70
|
+
|
71
|
+
Uses FFmpeg to generate a test video pattern if available,
|
72
|
+
otherwise creates a minimal MP4 header.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
str: Path to the created example MP4 file
|
76
|
+
"""
|
77
|
+
import os
|
78
|
+
import subprocess
|
79
|
+
|
80
|
+
# Create a temporary file for the output
|
81
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
|
82
|
+
output_path = f.name
|
83
|
+
|
84
|
+
try:
|
85
|
+
# Try to use ffmpeg to generate a test pattern video
|
86
|
+
subprocess.run(
|
87
|
+
[
|
88
|
+
"ffmpeg", "-y", "-f", "lavfi", "-i", "testsrc=duration=5:size=1280x720:rate=30",
|
89
|
+
"-vcodec", "libx264", "-pix_fmt", "yuv420p", output_path
|
90
|
+
],
|
91
|
+
check=True,
|
92
|
+
stdout=subprocess.PIPE,
|
93
|
+
stderr=subprocess.PIPE
|
94
|
+
)
|
95
|
+
return output_path
|
96
|
+
except (subprocess.SubprocessError, FileNotFoundError):
|
97
|
+
# If ffmpeg is not available, create a tiny placeholder MP4 file
|
98
|
+
# Using a simple empty binary file with the .mp4 extension
|
99
|
+
with open(output_path, 'wb') as f:
|
100
|
+
# Just write a simple 1KB file with MP4 signature
|
101
|
+
f.write(b'\x00\x00\x00\x18\x66\x74\x79\x70\x6D\x70\x34\x32') # MP4 file signature
|
102
|
+
f.write(b'\x00' * 1000) # Fill with zeros
|
103
|
+
|
104
|
+
return output_path
|
@@ -0,0 +1,104 @@
|
|
1
|
+
import tempfile
|
2
|
+
from ..file_methods import FileMethods
|
3
|
+
|
4
|
+
|
5
|
+
class WebmMethods(FileMethods):
|
6
|
+
"""
|
7
|
+
Handler for WebM video files.
|
8
|
+
|
9
|
+
This class provides methods to handle WebM video files in both notebook
|
10
|
+
and system environments, including viewing and creating example videos.
|
11
|
+
WebM is an open, royalty-free video format designed for the web.
|
12
|
+
"""
|
13
|
+
suffix = "webm"
|
14
|
+
|
15
|
+
def view_system(self):
|
16
|
+
"""
|
17
|
+
Open the WebM file with the system's default video player.
|
18
|
+
"""
|
19
|
+
import os
|
20
|
+
import subprocess
|
21
|
+
|
22
|
+
if os.path.exists(self.path):
|
23
|
+
try:
|
24
|
+
if (os_name := os.name) == "posix":
|
25
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
26
|
+
elif os_name == "nt":
|
27
|
+
os.startfile(self.path) # Windows
|
28
|
+
else:
|
29
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
30
|
+
except Exception as e:
|
31
|
+
print(f"Error opening WebM: {e}")
|
32
|
+
else:
|
33
|
+
print("WebM file was not found.")
|
34
|
+
|
35
|
+
def view_notebook(self):
|
36
|
+
"""
|
37
|
+
Display the WebM video in a Jupyter notebook using IPython's HTML display.
|
38
|
+
"""
|
39
|
+
from IPython.display import HTML, display
|
40
|
+
import base64
|
41
|
+
|
42
|
+
# Read the video file and encode it as base64
|
43
|
+
with open(self.path, 'rb') as f:
|
44
|
+
video_data = f.read()
|
45
|
+
|
46
|
+
video_base64 = base64.b64encode(video_data).decode('utf-8')
|
47
|
+
|
48
|
+
# Create an HTML5 video element with the base64-encoded video
|
49
|
+
video_html = f"""
|
50
|
+
<video width="640" height="360" controls>
|
51
|
+
<source src="data:video/webm;base64,{video_base64}" type="video/webm">
|
52
|
+
Your browser does not support the video tag.
|
53
|
+
</video>
|
54
|
+
"""
|
55
|
+
|
56
|
+
display(HTML(video_html))
|
57
|
+
|
58
|
+
def extract_text(self):
|
59
|
+
"""
|
60
|
+
Extract text from the video using subtitle extraction (if available).
|
61
|
+
Currently returns a message that text extraction is not supported for videos.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
str: Message indicating text extraction is not supported
|
65
|
+
"""
|
66
|
+
return "Text extraction is not supported for video files."
|
67
|
+
|
68
|
+
def example(self):
|
69
|
+
"""
|
70
|
+
Create a simple example WebM file.
|
71
|
+
|
72
|
+
Uses FFmpeg to generate a test video pattern in WebM format if available,
|
73
|
+
otherwise creates a minimal WebM header.
|
74
|
+
|
75
|
+
Returns:
|
76
|
+
str: Path to the created example WebM file
|
77
|
+
"""
|
78
|
+
import os
|
79
|
+
import subprocess
|
80
|
+
|
81
|
+
# Create a temporary file for the output
|
82
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as f:
|
83
|
+
output_path = f.name
|
84
|
+
|
85
|
+
try:
|
86
|
+
# Try to use ffmpeg to generate a test pattern video in WebM format
|
87
|
+
subprocess.run(
|
88
|
+
[
|
89
|
+
"ffmpeg", "-y", "-f", "lavfi", "-i", "testsrc=duration=5:size=1280x720:rate=30",
|
90
|
+
"-c:v", "libvpx", "-b:v", "1M", output_path
|
91
|
+
],
|
92
|
+
check=True,
|
93
|
+
stdout=subprocess.PIPE,
|
94
|
+
stderr=subprocess.PIPE
|
95
|
+
)
|
96
|
+
return output_path
|
97
|
+
except (subprocess.SubprocessError, FileNotFoundError):
|
98
|
+
# If ffmpeg is not available, create a minimal WebM file
|
99
|
+
with open(output_path, 'wb') as f:
|
100
|
+
# WebM starts with EBML header (1A 45 DF A3)
|
101
|
+
f.write(b'\x1A\x45\xDF\xA3') # EBML signature
|
102
|
+
f.write(b'\x00' * 1000) # Fill with zeros
|
103
|
+
|
104
|
+
return output_path
|