edsl 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +8 -1
- edsl/__init__original.py +134 -0
- edsl/__version__.py +1 -1
- edsl/agents/agent.py +29 -0
- edsl/agents/agent_list.py +36 -1
- edsl/base/base_class.py +281 -151
- edsl/base/data_transfer_models.py +15 -4
- edsl/buckets/__init__.py +8 -3
- edsl/buckets/bucket_collection.py +9 -3
- edsl/buckets/model_buckets.py +4 -2
- edsl/buckets/token_bucket.py +2 -2
- edsl/buckets/token_bucket_client.py +5 -3
- edsl/caching/cache.py +131 -62
- edsl/caching/cache_entry.py +70 -58
- edsl/caching/sql_dict.py +17 -0
- edsl/cli.py +99 -0
- edsl/config/config_class.py +16 -0
- edsl/conversation/__init__.py +31 -0
- edsl/coop/coop.py +276 -242
- edsl/coop/coop_jobs_objects.py +59 -0
- edsl/coop/coop_objects.py +29 -0
- edsl/coop/coop_regular_objects.py +26 -0
- edsl/coop/utils.py +24 -19
- edsl/dataset/dataset.py +338 -101
- edsl/dataset/dataset_operations_mixin.py +216 -180
- edsl/db_list/sqlite_list.py +349 -0
- edsl/inference_services/__init__.py +40 -5
- edsl/inference_services/exceptions.py +11 -0
- edsl/inference_services/services/anthropic_service.py +5 -2
- edsl/inference_services/services/aws_bedrock.py +6 -2
- edsl/inference_services/services/azure_ai.py +6 -2
- edsl/inference_services/services/google_service.py +7 -3
- edsl/inference_services/services/mistral_ai_service.py +6 -2
- edsl/inference_services/services/open_ai_service.py +6 -2
- edsl/inference_services/services/perplexity_service.py +6 -2
- edsl/inference_services/services/test_service.py +94 -5
- edsl/interviews/answering_function.py +167 -59
- edsl/interviews/interview.py +124 -72
- edsl/interviews/interview_task_manager.py +10 -0
- edsl/interviews/request_token_estimator.py +8 -0
- edsl/invigilators/invigilators.py +35 -13
- edsl/jobs/async_interview_runner.py +146 -104
- edsl/jobs/data_structures.py +6 -4
- edsl/jobs/decorators.py +61 -0
- edsl/jobs/fetch_invigilator.py +61 -18
- edsl/jobs/html_table_job_logger.py +14 -2
- edsl/jobs/jobs.py +180 -104
- edsl/jobs/jobs_component_constructor.py +2 -2
- edsl/jobs/jobs_interview_constructor.py +2 -0
- edsl/jobs/jobs_pricing_estimation.py +154 -113
- edsl/jobs/jobs_remote_inference_logger.py +4 -0
- edsl/jobs/jobs_runner_status.py +30 -25
- edsl/jobs/progress_bar_manager.py +79 -0
- edsl/jobs/remote_inference.py +35 -1
- edsl/key_management/key_lookup_builder.py +6 -1
- edsl/language_models/language_model.py +110 -12
- edsl/language_models/model.py +10 -3
- edsl/language_models/price_manager.py +176 -71
- edsl/language_models/registry.py +5 -0
- edsl/notebooks/notebook.py +77 -10
- edsl/questions/VALIDATION_README.md +134 -0
- edsl/questions/__init__.py +24 -1
- edsl/questions/exceptions.py +21 -0
- edsl/questions/question_dict.py +201 -16
- edsl/questions/question_multiple_choice_with_other.py +624 -0
- edsl/questions/question_registry.py +2 -1
- edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
- edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
- edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
- edsl/questions/validation_analysis.py +185 -0
- edsl/questions/validation_cli.py +131 -0
- edsl/questions/validation_html_report.py +404 -0
- edsl/questions/validation_logger.py +136 -0
- edsl/results/result.py +115 -46
- edsl/results/results.py +702 -171
- edsl/scenarios/construct_download_link.py +16 -3
- edsl/scenarios/directory_scanner.py +226 -226
- edsl/scenarios/file_methods.py +5 -0
- edsl/scenarios/file_store.py +150 -9
- edsl/scenarios/handlers/__init__.py +5 -1
- edsl/scenarios/handlers/mp4_file_store.py +104 -0
- edsl/scenarios/handlers/webm_file_store.py +104 -0
- edsl/scenarios/scenario.py +120 -101
- edsl/scenarios/scenario_list.py +800 -727
- edsl/scenarios/scenario_list_gc_test.py +146 -0
- edsl/scenarios/scenario_list_memory_test.py +214 -0
- edsl/scenarios/scenario_list_source_refactor.md +35 -0
- edsl/scenarios/scenario_selector.py +5 -4
- edsl/scenarios/scenario_source.py +1990 -0
- edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
- edsl/surveys/survey.py +22 -0
- edsl/tasks/__init__.py +4 -2
- edsl/tasks/task_history.py +198 -36
- edsl/tests/scenarios/test_ScenarioSource.py +51 -0
- edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
- edsl/utilities/__init__.py +2 -1
- edsl/utilities/decorators.py +121 -0
- edsl/utilities/memory_debugger.py +1010 -0
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/METADATA +51 -76
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/RECORD +103 -79
- edsl/jobs/jobs_runner_asyncio.py +0 -281
- edsl/language_models/unused/fake_openai_service.py +0 -60
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/LICENSE +0 -0
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/WHEEL +0 -0
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/entry_points.txt +0 -0
@@ -32,7 +32,7 @@ class ConstructDownloadLink:
|
|
32
32
|
|
33
33
|
def create_link(
|
34
34
|
self, custom_filename: Optional[str] = None, style: Optional[dict] = None
|
35
|
-
) -> HTML:
|
35
|
+
) -> "HTML":
|
36
36
|
"""Create an HTML download link wrapped in an HTML display object.
|
37
37
|
|
38
38
|
Args:
|
@@ -44,7 +44,14 @@ class ConstructDownloadLink:
|
|
44
44
|
Returns:
|
45
45
|
HTML: A displayable HTML object containing the styled download link.
|
46
46
|
"""
|
47
|
-
|
47
|
+
# We'll use a string annotation instead of an import for doctests
|
48
|
+
try:
|
49
|
+
from edsl.display import HTML
|
50
|
+
except ImportError:
|
51
|
+
# For doctest, provide a mock HTML class
|
52
|
+
class HTML:
|
53
|
+
def __init__(self, content): self.content = content
|
54
|
+
def _repr_html_(self): return self.content
|
48
55
|
|
49
56
|
html = self.html_create_link(custom_filename, style)
|
50
57
|
return HTML(html)
|
@@ -137,7 +144,13 @@ class ConstructDownloadLink:
|
|
137
144
|
)._repr_html_()
|
138
145
|
)
|
139
146
|
|
140
|
-
|
147
|
+
try:
|
148
|
+
from edsl.display import HTML
|
149
|
+
except ImportError:
|
150
|
+
# For doctest, provide a mock HTML class
|
151
|
+
class HTML:
|
152
|
+
def __init__(self, content): self.content = content
|
153
|
+
def _repr_html_(self): return self.content
|
141
154
|
|
142
155
|
return HTML(
|
143
156
|
'<div style="display: flex; gap: 10px;">' + "".join(html_parts) + "</div>"
|
@@ -1,258 +1,258 @@
|
|
1
1
|
"""
|
2
|
-
|
2
|
+
DirectoryScanner provides functionality for scanning directories and creating ScenarioLists from files.
|
3
3
|
|
4
|
-
This module
|
5
|
-
|
6
|
-
recursive scanning, filtering by file extensions, and both eager and lazy iteration over
|
7
|
-
the matching files.
|
4
|
+
This module contains the DirectoryScanner class which handles scanning directories,
|
5
|
+
filtering files based on patterns, and creating Scenario objects from files.
|
8
6
|
"""
|
9
7
|
|
10
|
-
from dataclasses import dataclass
|
11
|
-
from typing import Optional, List, Iterator, TypeVar, Callable
|
12
8
|
import os
|
9
|
+
from typing import Optional, List, Callable, Any
|
10
|
+
from .scenario import Scenario
|
11
|
+
from .file_store import FileStore
|
12
|
+
from .exceptions import FileNotFoundScenarioError
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
class DirectoryScanner:
|
15
|
+
"""A class for scanning directories and creating ScenarioLists from files."""
|
16
16
|
|
17
|
+
def __init__(self, directory_path: str):
|
18
|
+
"""Initialize the DirectoryScanner with a directory path.
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
-
"""
|
21
|
-
A utility class for finding and processing files in directories.
|
22
|
-
|
23
|
-
DirectoryScanner provides methods to scan directories for files that match specific
|
24
|
-
criteria, such as file extensions. It can process matching files using a factory
|
25
|
-
function that converts file paths to objects of a specified type.
|
26
|
-
|
27
|
-
The scanner supports both eager (scan) and lazy (iter_scan) iteration, recursive
|
28
|
-
directory traversal, and flexible filtering based on file extensions.
|
29
|
-
|
30
|
-
Attributes:
|
31
|
-
directory_path: The path to the directory to scan.
|
32
|
-
|
33
|
-
Examples:
|
34
|
-
>>> import tempfile
|
35
|
-
>>> import os
|
36
|
-
>>> # Create a temporary directory with some files
|
37
|
-
>>> with tempfile.TemporaryDirectory() as tmpdir:
|
38
|
-
... # Create a few files with different extensions
|
39
|
-
... _ = open(os.path.join(tmpdir, "file1.txt"), "w").write("content")
|
40
|
-
... _ = open(os.path.join(tmpdir, "file2.txt"), "w").write("content")
|
41
|
-
... _ = open(os.path.join(tmpdir, "image.jpg"), "w").write("content")
|
42
|
-
... # Create a scanner and find all text files
|
43
|
-
... scanner = DirectoryScanner(tmpdir)
|
44
|
-
... txt_files = scanner.scan(lambda path: path, suffix_allow_list=["txt"])
|
45
|
-
... len(txt_files)
|
46
|
-
... # Use a factory to process files
|
47
|
-
... def get_filename(path):
|
48
|
-
... return os.path.basename(path)
|
49
|
-
... filenames = scanner.scan(get_filename)
|
50
|
-
... sorted(filenames)
|
51
|
-
2
|
52
|
-
['file1.txt', 'file2.txt', 'image.jpg']
|
53
|
-
"""
|
20
|
+
Args:
|
21
|
+
directory_path (str): The path to the directory to scan.
|
54
22
|
|
55
|
-
|
23
|
+
Raises:
|
24
|
+
FileNotFoundScenarioError: If the specified directory does not exist.
|
25
|
+
"""
|
26
|
+
self.directory_path = directory_path
|
27
|
+
if not os.path.isdir(directory_path):
|
28
|
+
raise FileNotFoundScenarioError(f"Directory not found: {directory_path}")
|
56
29
|
|
57
30
|
def scan(
|
58
31
|
self,
|
59
|
-
factory: Callable[[str],
|
32
|
+
factory: Callable[[str], Any] = FileStore,
|
60
33
|
recursive: bool = False,
|
61
34
|
suffix_allow_list: Optional[List[str]] = None,
|
62
|
-
suffix_exclude_list: Optional[List[str]] = None,
|
63
35
|
example_suffix: Optional[str] = None,
|
64
|
-
|
65
|
-
|
36
|
+
) -> List[Any]:
|
37
|
+
"""Scan the directory and create objects from files.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
factory (Callable[[str], Any]): A function that creates objects from file paths.
|
41
|
+
Defaults to FileStore.
|
42
|
+
recursive (bool): Whether to scan subdirectories recursively.
|
43
|
+
suffix_allow_list (Optional[List[str]]): List of file extensions to include.
|
44
|
+
example_suffix (Optional[str]): Example suffix pattern for filtering.
|
45
|
+
|
46
|
+
Returns:
|
47
|
+
List[Any]: List of objects created by the factory function.
|
66
48
|
"""
|
67
|
-
|
49
|
+
result = []
|
68
50
|
|
69
|
-
|
70
|
-
|
71
|
-
|
51
|
+
def should_include_file(filename: str) -> bool:
|
52
|
+
if suffix_allow_list:
|
53
|
+
return any(filename.endswith(f".{suffix}") for suffix in suffix_allow_list)
|
54
|
+
if example_suffix:
|
55
|
+
if example_suffix.startswith("*."):
|
56
|
+
return filename.endswith(example_suffix[1:])
|
57
|
+
# Handle other wildcard patterns if needed
|
58
|
+
return True
|
59
|
+
|
60
|
+
def scan_dir(current_path: str):
|
61
|
+
for entry in os.scandir(current_path):
|
62
|
+
if entry.is_file() and should_include_file(entry.name):
|
63
|
+
try:
|
64
|
+
result.append(factory(entry.path))
|
65
|
+
except Exception as e:
|
66
|
+
import warnings
|
67
|
+
warnings.warn(f"Failed to process file {entry.path}: {str(e)}")
|
68
|
+
elif entry.is_dir() and recursive:
|
69
|
+
scan_dir(entry.path)
|
70
|
+
|
71
|
+
scan_dir(self.directory_path)
|
72
|
+
return result
|
73
|
+
|
74
|
+
@classmethod
|
75
|
+
def scan_directory(
|
76
|
+
cls,
|
77
|
+
directory: str,
|
78
|
+
pattern: str = "*",
|
79
|
+
recursive: bool = False,
|
80
|
+
metadata: bool = True,
|
81
|
+
ignore_dirs: List[str] = None,
|
82
|
+
ignore_files: List[str] = None,
|
83
|
+
) -> Any:
|
84
|
+
"""Scan a directory and create a ScenarioList from the files.
|
72
85
|
|
73
86
|
Args:
|
74
|
-
|
75
|
-
|
76
|
-
recursive:
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
suffix_exclude_list: A list of file extensions to exclude. This takes precedence
|
81
|
-
over suffix_allow_list.
|
82
|
-
example_suffix: If provided, only include files ending with this exact suffix.
|
83
|
-
This checks the entire filename, not just the extension.
|
84
|
-
include_no_extension: Whether to include files without extensions. Defaults to True.
|
87
|
+
directory (str): The directory path to scan
|
88
|
+
pattern (str): File pattern to match (e.g., "*.txt", "*.{jpg,png}")
|
89
|
+
recursive (bool): Whether to scan subdirectories recursively
|
90
|
+
metadata (bool): Whether to include file metadata in the scenarios
|
91
|
+
ignore_dirs (List[str]): List of directory names to ignore
|
92
|
+
ignore_files (List[str]): List of file patterns to ignore
|
85
93
|
|
86
94
|
Returns:
|
87
|
-
A
|
88
|
-
|
89
|
-
Examples:
|
90
|
-
>>> import tempfile
|
91
|
-
>>> import os
|
92
|
-
>>> with tempfile.TemporaryDirectory() as tmpdir:
|
93
|
-
... # Create test files
|
94
|
-
... _ = open(os.path.join(tmpdir, "doc1.txt"), "w").write("content")
|
95
|
-
... _ = open(os.path.join(tmpdir, "doc2.md"), "w").write("content")
|
96
|
-
... os.mkdir(os.path.join(tmpdir, "subdir"))
|
97
|
-
... _ = open(os.path.join(tmpdir, "subdir", "doc3.txt"), "w").write("content")
|
98
|
-
... # Scan for text files only
|
99
|
-
... scanner = DirectoryScanner(tmpdir)
|
100
|
-
... paths = scanner.scan(lambda p: p, suffix_allow_list=["txt"])
|
101
|
-
... len(paths)
|
102
|
-
... # Recursive scan for all files
|
103
|
-
... all_paths = scanner.scan(lambda p: p, recursive=True)
|
104
|
-
... len(all_paths)
|
105
|
-
... # Exclude specific extensions
|
106
|
-
... no_md = scanner.scan(lambda p: p, recursive=True, suffix_exclude_list=["md"])
|
107
|
-
... len(no_md)
|
108
|
-
1
|
109
|
-
3
|
110
|
-
2
|
111
|
-
|
112
|
-
Notes:
|
113
|
-
- This method is eager and collects all results into memory. For large directories,
|
114
|
-
consider using iter_scan instead.
|
115
|
-
- The filtering logic applies filters in this order: exclude list, example suffix,
|
116
|
-
allow list, and no extension.
|
95
|
+
ScenarioList: A ScenarioList containing one scenario per matching file
|
117
96
|
"""
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
97
|
+
from .scenario_list import ScenarioList
|
98
|
+
|
99
|
+
# Handle default values
|
100
|
+
ignore_dirs = ignore_dirs or []
|
101
|
+
ignore_files = ignore_files or []
|
102
|
+
|
103
|
+
# Import glob for pattern matching
|
104
|
+
import glob
|
105
|
+
import fnmatch
|
106
|
+
|
107
|
+
# Normalize directory path
|
108
|
+
directory = os.path.abspath(directory)
|
109
|
+
|
110
|
+
# Prepare result container
|
111
|
+
scenarios = []
|
112
|
+
|
113
|
+
# Pattern matching function
|
114
|
+
def matches_pattern(filename, pattern):
|
115
|
+
return fnmatch.fnmatch(filename, pattern)
|
116
|
+
|
117
|
+
# File gathering function
|
118
|
+
def gather_files(current_dir, current_pattern):
|
119
|
+
# Create the full path pattern
|
120
|
+
path_pattern = os.path.join(current_dir, current_pattern)
|
121
|
+
|
122
|
+
# Get all matching files
|
123
|
+
for file_path in glob.glob(path_pattern, recursive=recursive):
|
124
|
+
if os.path.isfile(file_path):
|
125
|
+
# Check if file should be ignored
|
126
|
+
file_name = os.path.basename(file_path)
|
127
|
+
if any(matches_pattern(file_name, ignore_pattern) for ignore_pattern in ignore_files):
|
128
|
+
continue
|
129
|
+
|
130
|
+
# Create FileStore object
|
131
|
+
file_store = FileStore(file_path)
|
132
|
+
|
133
|
+
# Create scenario
|
134
|
+
scenario_data = {"file": file_store}
|
135
|
+
|
136
|
+
# Add metadata if requested
|
137
|
+
if metadata:
|
138
|
+
file_stat = os.stat(file_path)
|
139
|
+
scenario_data.update({
|
140
|
+
"file_path": file_path,
|
141
|
+
"file_name": file_name,
|
142
|
+
"file_size": file_stat.st_size,
|
143
|
+
"file_created": file_stat.st_ctime,
|
144
|
+
"file_modified": file_stat.st_mtime,
|
145
|
+
})
|
146
|
+
|
147
|
+
scenarios.append(Scenario(scenario_data))
|
148
|
+
|
149
|
+
# Process the directory
|
150
|
+
if recursive:
|
151
|
+
for root, dirs, files in os.walk(directory):
|
152
|
+
# Skip ignored directories
|
153
|
+
dirs[:] = [d for d in dirs if d not in ignore_dirs]
|
154
|
+
|
155
|
+
# Process files in this directory
|
156
|
+
gather_files(root, pattern)
|
157
|
+
else:
|
158
|
+
gather_files(directory, pattern)
|
159
|
+
|
160
|
+
# Return as ScenarioList
|
161
|
+
return ScenarioList(scenarios)
|
162
|
+
|
163
|
+
@classmethod
|
164
|
+
def create_scenario_list(
|
165
|
+
cls,
|
166
|
+
path: Optional[str] = None,
|
132
167
|
recursive: bool = False,
|
168
|
+
key_name: str = "content",
|
169
|
+
factory: Callable[[str], Any] = FileStore,
|
133
170
|
suffix_allow_list: Optional[List[str]] = None,
|
134
|
-
suffix_exclude_list: Optional[List[str]] = None,
|
135
171
|
example_suffix: Optional[str] = None,
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
Lazily scan directory and yield objects created by the factory function.
|
140
|
-
|
141
|
-
This method performs a lazy scan of the directory, filtering files based on the provided
|
142
|
-
criteria, and applies the factory function to each matching file path. It yields
|
143
|
-
results one by one, allowing for memory-efficient processing of large directories.
|
144
|
-
|
172
|
+
) -> Any:
|
173
|
+
"""Create a ScenarioList from files in a directory.
|
174
|
+
|
145
175
|
Args:
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
suffix_allow_list:
|
151
|
-
|
152
|
-
suffix_exclude_list: A list of file extensions to exclude. This takes precedence
|
153
|
-
over suffix_allow_list.
|
154
|
-
example_suffix: If provided, only include files ending with this exact suffix.
|
155
|
-
This checks the entire filename, not just the extension.
|
156
|
-
include_no_extension: Whether to include files without extensions. Defaults to True.
|
157
|
-
|
158
|
-
Yields:
|
159
|
-
Objects created by applying the factory function to each matching file path,
|
160
|
-
yielded one at a time.
|
161
|
-
|
162
|
-
Examples:
|
163
|
-
>>> import tempfile
|
164
|
-
>>> import os
|
165
|
-
>>> with tempfile.TemporaryDirectory() as tmpdir:
|
166
|
-
... # Create test files
|
167
|
-
... _ = open(os.path.join(tmpdir, "doc1.txt"), "w").write("content")
|
168
|
-
... _ = open(os.path.join(tmpdir, "doc2.md"), "w").write("content")
|
169
|
-
... # Process files lazily
|
170
|
-
... scanner = DirectoryScanner(tmpdir)
|
171
|
-
... for path in scanner.iter_scan(lambda p: p):
|
172
|
-
... # Process each file path without loading all into memory
|
173
|
-
... file_exists = os.path.exists(path)
|
174
|
-
... assert file_exists
|
175
|
-
|
176
|
-
Notes:
|
177
|
-
- This method is lazy and yields results as they are processed, making it
|
178
|
-
suitable for memory-efficient processing of large directories.
|
179
|
-
- The filtering logic is identical to the scan method.
|
180
|
-
"""
|
176
|
+
path (Optional[str]): The directory path to scan, optionally including a wildcard pattern.
|
177
|
+
recursive (bool): Whether to scan subdirectories recursively.
|
178
|
+
key_name (str): The key to use for the FileStore object in each Scenario.
|
179
|
+
factory (Callable[[str], Any]): Factory function to create objects from files.
|
180
|
+
suffix_allow_list (Optional[List[str]]): List of file extensions to include.
|
181
|
+
example_suffix (Optional[str]): Example suffix pattern for filtering.
|
181
182
|
|
182
|
-
|
183
|
-
|
184
|
-
Determine if a file should be included based on filtering criteria.
|
185
|
-
|
186
|
-
This helper function applies all the filtering rules to determine
|
187
|
-
if a given file path should be included in the results.
|
188
|
-
|
189
|
-
Args:
|
190
|
-
filepath: The path to the file to check.
|
191
|
-
|
192
|
-
Returns:
|
193
|
-
True if the file should be included, False otherwise.
|
194
|
-
"""
|
195
|
-
# Get filename and extension
|
196
|
-
basename = os.path.basename(filepath)
|
197
|
-
_, ext = os.path.splitext(filepath)
|
198
|
-
ext = ext[1:] if ext else "" # Remove leading dot from extension
|
199
|
-
|
200
|
-
# Skip system files like .DS_Store by default
|
201
|
-
if basename == '.DS_Store':
|
202
|
-
return False
|
203
|
-
|
204
|
-
# If there's a specific allow list and we have a wildcard filter
|
205
|
-
if suffix_allow_list:
|
206
|
-
# Only include files with the allowed extensions
|
207
|
-
return ext in suffix_allow_list
|
208
|
-
|
209
|
-
# Check exclusions (they take precedence)
|
210
|
-
if suffix_exclude_list and ext in suffix_exclude_list:
|
211
|
-
return False
|
183
|
+
Returns:
|
184
|
+
ScenarioList: A ScenarioList containing Scenario objects for all matching files.
|
212
185
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
return False
|
227
|
-
elif not filepath.endswith(example_suffix):
|
228
|
-
return False
|
229
|
-
|
230
|
-
# Handle no extension case
|
231
|
-
if not ext:
|
232
|
-
return include_no_extension
|
186
|
+
Raises:
|
187
|
+
FileNotFoundScenarioError: If the specified directory does not exist.
|
188
|
+
"""
|
189
|
+
# Import here to avoid circular import
|
190
|
+
from .scenario_list import ScenarioList
|
191
|
+
|
192
|
+
# Handle default case - use current directory
|
193
|
+
if path is None:
|
194
|
+
directory_path = os.getcwd()
|
195
|
+
pattern = None
|
196
|
+
else:
|
197
|
+
# Special handling for "**" pattern which indicates recursive scanning
|
198
|
+
has_recursive_pattern = "**" in path if path else False
|
233
199
|
|
234
|
-
|
200
|
+
# Check if path contains any wildcard
|
201
|
+
if path and ("*" in path):
|
202
|
+
# Handle "**/*.ext" pattern - find the directory part before the **
|
203
|
+
if has_recursive_pattern:
|
204
|
+
# Extract the base directory by finding the part before **
|
205
|
+
parts = path.split("**")
|
206
|
+
if parts and parts[0]:
|
207
|
+
# Remove trailing slash if any
|
208
|
+
directory_path = parts[0].rstrip("/")
|
209
|
+
if not directory_path:
|
210
|
+
directory_path = os.getcwd()
|
211
|
+
# Get the pattern after **
|
212
|
+
pattern = parts[1] if len(parts) > 1 else None
|
213
|
+
if pattern and pattern.startswith("/"):
|
214
|
+
pattern = pattern[1:] # Remove leading slash
|
215
|
+
else:
|
216
|
+
directory_path = os.getcwd()
|
217
|
+
pattern = None
|
218
|
+
# Handle case where path is just a pattern (e.g., "*.py")
|
219
|
+
elif os.path.dirname(path) == "":
|
220
|
+
directory_path = os.getcwd()
|
221
|
+
pattern = os.path.basename(path)
|
222
|
+
else:
|
223
|
+
# Split into directory and pattern
|
224
|
+
directory_path = os.path.dirname(path)
|
225
|
+
if not directory_path:
|
226
|
+
directory_path = os.getcwd()
|
227
|
+
pattern = os.path.basename(path)
|
228
|
+
else:
|
229
|
+
# Path is a directory with no pattern
|
230
|
+
directory_path = path
|
231
|
+
pattern = None
|
235
232
|
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
"""
|
246
|
-
if recursive:
|
247
|
-
for root, _, files in os.walk(self.directory_path):
|
248
|
-
for file in files:
|
249
|
-
yield os.path.join(root, file)
|
233
|
+
# Create scanner and get file stores
|
234
|
+
scanner = cls(directory_path)
|
235
|
+
|
236
|
+
# Configure suffix filtering
|
237
|
+
if pattern:
|
238
|
+
if pattern.startswith("*."):
|
239
|
+
suffix_allow_list = [pattern[2:]]
|
240
|
+
elif "*" in pattern:
|
241
|
+
example_suffix = pattern
|
250
242
|
else:
|
251
|
-
|
252
|
-
file_path = os.path.join(self.directory_path, file)
|
253
|
-
if os.path.isfile(file_path):
|
254
|
-
yield file_path
|
243
|
+
example_suffix = pattern
|
255
244
|
|
256
|
-
|
257
|
-
|
258
|
-
|
245
|
+
# Use scanner to find files and create objects
|
246
|
+
file_stores = scanner.scan(
|
247
|
+
factory=factory,
|
248
|
+
recursive=recursive,
|
249
|
+
suffix_allow_list=suffix_allow_list,
|
250
|
+
example_suffix=example_suffix,
|
251
|
+
)
|
252
|
+
|
253
|
+
# Convert to ScenarioList
|
254
|
+
result = ScenarioList()
|
255
|
+
for file_store in file_stores:
|
256
|
+
result.append(Scenario({key_name: file_store}))
|
257
|
+
|
258
|
+
return result
|
edsl/scenarios/file_methods.py
CHANGED
@@ -2,8 +2,13 @@ from typing import Optional, Dict, Type
|
|
2
2
|
from abc import ABC, abstractmethod
|
3
3
|
import importlib.metadata
|
4
4
|
import importlib.util
|
5
|
+
import mimetypes
|
5
6
|
from ..utilities import is_notebook
|
6
7
|
|
8
|
+
# Register MIME types for video formats if they aren't already
|
9
|
+
mimetypes.add_type('video/mp4', '.mp4')
|
10
|
+
mimetypes.add_type('video/webm', '.webm')
|
11
|
+
|
7
12
|
|
8
13
|
class FileMethods(ABC):
|
9
14
|
_handlers: Dict[str, Type["FileMethods"]] = {}
|