edsl 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. edsl/__init__.py +8 -1
  2. edsl/__init__original.py +134 -0
  3. edsl/__version__.py +1 -1
  4. edsl/agents/agent.py +29 -0
  5. edsl/agents/agent_list.py +36 -1
  6. edsl/base/base_class.py +281 -151
  7. edsl/base/data_transfer_models.py +15 -4
  8. edsl/buckets/__init__.py +8 -3
  9. edsl/buckets/bucket_collection.py +9 -3
  10. edsl/buckets/model_buckets.py +4 -2
  11. edsl/buckets/token_bucket.py +2 -2
  12. edsl/buckets/token_bucket_client.py +5 -3
  13. edsl/caching/cache.py +131 -62
  14. edsl/caching/cache_entry.py +70 -58
  15. edsl/caching/sql_dict.py +17 -0
  16. edsl/cli.py +99 -0
  17. edsl/config/config_class.py +16 -0
  18. edsl/conversation/__init__.py +31 -0
  19. edsl/coop/coop.py +276 -242
  20. edsl/coop/coop_jobs_objects.py +59 -0
  21. edsl/coop/coop_objects.py +29 -0
  22. edsl/coop/coop_regular_objects.py +26 -0
  23. edsl/coop/utils.py +24 -19
  24. edsl/dataset/dataset.py +338 -101
  25. edsl/dataset/dataset_operations_mixin.py +216 -180
  26. edsl/db_list/sqlite_list.py +349 -0
  27. edsl/inference_services/__init__.py +40 -5
  28. edsl/inference_services/exceptions.py +11 -0
  29. edsl/inference_services/services/anthropic_service.py +5 -2
  30. edsl/inference_services/services/aws_bedrock.py +6 -2
  31. edsl/inference_services/services/azure_ai.py +6 -2
  32. edsl/inference_services/services/google_service.py +7 -3
  33. edsl/inference_services/services/mistral_ai_service.py +6 -2
  34. edsl/inference_services/services/open_ai_service.py +6 -2
  35. edsl/inference_services/services/perplexity_service.py +6 -2
  36. edsl/inference_services/services/test_service.py +94 -5
  37. edsl/interviews/answering_function.py +167 -59
  38. edsl/interviews/interview.py +124 -72
  39. edsl/interviews/interview_task_manager.py +10 -0
  40. edsl/interviews/request_token_estimator.py +8 -0
  41. edsl/invigilators/invigilators.py +35 -13
  42. edsl/jobs/async_interview_runner.py +146 -104
  43. edsl/jobs/data_structures.py +6 -4
  44. edsl/jobs/decorators.py +61 -0
  45. edsl/jobs/fetch_invigilator.py +61 -18
  46. edsl/jobs/html_table_job_logger.py +14 -2
  47. edsl/jobs/jobs.py +180 -104
  48. edsl/jobs/jobs_component_constructor.py +2 -2
  49. edsl/jobs/jobs_interview_constructor.py +2 -0
  50. edsl/jobs/jobs_pricing_estimation.py +154 -113
  51. edsl/jobs/jobs_remote_inference_logger.py +4 -0
  52. edsl/jobs/jobs_runner_status.py +30 -25
  53. edsl/jobs/progress_bar_manager.py +79 -0
  54. edsl/jobs/remote_inference.py +35 -1
  55. edsl/key_management/key_lookup_builder.py +6 -1
  56. edsl/language_models/language_model.py +110 -12
  57. edsl/language_models/model.py +10 -3
  58. edsl/language_models/price_manager.py +176 -71
  59. edsl/language_models/registry.py +5 -0
  60. edsl/notebooks/notebook.py +77 -10
  61. edsl/questions/VALIDATION_README.md +134 -0
  62. edsl/questions/__init__.py +24 -1
  63. edsl/questions/exceptions.py +21 -0
  64. edsl/questions/question_dict.py +201 -16
  65. edsl/questions/question_multiple_choice_with_other.py +624 -0
  66. edsl/questions/question_registry.py +2 -1
  67. edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
  68. edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
  69. edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
  70. edsl/questions/validation_analysis.py +185 -0
  71. edsl/questions/validation_cli.py +131 -0
  72. edsl/questions/validation_html_report.py +404 -0
  73. edsl/questions/validation_logger.py +136 -0
  74. edsl/results/result.py +115 -46
  75. edsl/results/results.py +702 -171
  76. edsl/scenarios/construct_download_link.py +16 -3
  77. edsl/scenarios/directory_scanner.py +226 -226
  78. edsl/scenarios/file_methods.py +5 -0
  79. edsl/scenarios/file_store.py +150 -9
  80. edsl/scenarios/handlers/__init__.py +5 -1
  81. edsl/scenarios/handlers/mp4_file_store.py +104 -0
  82. edsl/scenarios/handlers/webm_file_store.py +104 -0
  83. edsl/scenarios/scenario.py +120 -101
  84. edsl/scenarios/scenario_list.py +800 -727
  85. edsl/scenarios/scenario_list_gc_test.py +146 -0
  86. edsl/scenarios/scenario_list_memory_test.py +214 -0
  87. edsl/scenarios/scenario_list_source_refactor.md +35 -0
  88. edsl/scenarios/scenario_selector.py +5 -4
  89. edsl/scenarios/scenario_source.py +1990 -0
  90. edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
  91. edsl/surveys/survey.py +22 -0
  92. edsl/tasks/__init__.py +4 -2
  93. edsl/tasks/task_history.py +198 -36
  94. edsl/tests/scenarios/test_ScenarioSource.py +51 -0
  95. edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
  96. edsl/utilities/__init__.py +2 -1
  97. edsl/utilities/decorators.py +121 -0
  98. edsl/utilities/memory_debugger.py +1010 -0
  99. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/METADATA +51 -76
  100. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/RECORD +103 -79
  101. edsl/jobs/jobs_runner_asyncio.py +0 -281
  102. edsl/language_models/unused/fake_openai_service.py +0 -60
  103. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/LICENSE +0 -0
  104. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/WHEEL +0 -0
  105. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/entry_points.txt +0 -0
@@ -32,7 +32,7 @@ class ConstructDownloadLink:
32
32
 
33
33
  def create_link(
34
34
  self, custom_filename: Optional[str] = None, style: Optional[dict] = None
35
- ) -> HTML:
35
+ ) -> "HTML":
36
36
  """Create an HTML download link wrapped in an HTML display object.
37
37
 
38
38
  Args:
@@ -44,7 +44,14 @@ class ConstructDownloadLink:
44
44
  Returns:
45
45
  HTML: A displayable HTML object containing the styled download link.
46
46
  """
47
- from ..display import HTML
47
+ # We'll use a string annotation instead of an import for doctests
48
+ try:
49
+ from edsl.display import HTML
50
+ except ImportError:
51
+ # For doctest, provide a mock HTML class
52
+ class HTML:
53
+ def __init__(self, content): self.content = content
54
+ def _repr_html_(self): return self.content
48
55
 
49
56
  html = self.html_create_link(custom_filename, style)
50
57
  return HTML(html)
@@ -137,7 +144,13 @@ class ConstructDownloadLink:
137
144
  )._repr_html_()
138
145
  )
139
146
 
140
- from ..display import HTML
147
+ try:
148
+ from edsl.display import HTML
149
+ except ImportError:
150
+ # For doctest, provide a mock HTML class
151
+ class HTML:
152
+ def __init__(self, content): self.content = content
153
+ def _repr_html_(self): return self.content
141
154
 
142
155
  return HTML(
143
156
  '<div style="display: flex; gap: 10px;">' + "".join(html_parts) + "</div>"
@@ -1,258 +1,258 @@
1
1
  """
2
- The DirectoryScanner module provides functionality for finding and processing files in directories.
2
+ DirectoryScanner provides functionality for scanning directories and creating ScenarioLists from files.
3
3
 
4
- This module implements the DirectoryScanner class, which is designed to scan directories
5
- for files matching specific criteria and process them using a factory function. It supports
6
- recursive scanning, filtering by file extensions, and both eager and lazy iteration over
7
- the matching files.
4
+ This module contains the DirectoryScanner class which handles scanning directories,
5
+ filtering files based on patterns, and creating Scenario objects from files.
8
6
  """
9
7
 
10
- from dataclasses import dataclass
11
- from typing import Optional, List, Iterator, TypeVar, Callable
12
8
  import os
9
+ from typing import Optional, List, Callable, Any
10
+ from .scenario import Scenario
11
+ from .file_store import FileStore
12
+ from .exceptions import FileNotFoundScenarioError
13
13
 
14
- # Generic type variable for the factory function's return type
15
- T = TypeVar("T")
14
+ class DirectoryScanner:
15
+ """A class for scanning directories and creating ScenarioLists from files."""
16
16
 
17
+ def __init__(self, directory_path: str):
18
+ """Initialize the DirectoryScanner with a directory path.
17
19
 
18
- @dataclass
19
- class DirectoryScanner:
20
- """
21
- A utility class for finding and processing files in directories.
22
-
23
- DirectoryScanner provides methods to scan directories for files that match specific
24
- criteria, such as file extensions. It can process matching files using a factory
25
- function that converts file paths to objects of a specified type.
26
-
27
- The scanner supports both eager (scan) and lazy (iter_scan) iteration, recursive
28
- directory traversal, and flexible filtering based on file extensions.
29
-
30
- Attributes:
31
- directory_path: The path to the directory to scan.
32
-
33
- Examples:
34
- >>> import tempfile
35
- >>> import os
36
- >>> # Create a temporary directory with some files
37
- >>> with tempfile.TemporaryDirectory() as tmpdir:
38
- ... # Create a few files with different extensions
39
- ... _ = open(os.path.join(tmpdir, "file1.txt"), "w").write("content")
40
- ... _ = open(os.path.join(tmpdir, "file2.txt"), "w").write("content")
41
- ... _ = open(os.path.join(tmpdir, "image.jpg"), "w").write("content")
42
- ... # Create a scanner and find all text files
43
- ... scanner = DirectoryScanner(tmpdir)
44
- ... txt_files = scanner.scan(lambda path: path, suffix_allow_list=["txt"])
45
- ... len(txt_files)
46
- ... # Use a factory to process files
47
- ... def get_filename(path):
48
- ... return os.path.basename(path)
49
- ... filenames = scanner.scan(get_filename)
50
- ... sorted(filenames)
51
- 2
52
- ['file1.txt', 'file2.txt', 'image.jpg']
53
- """
20
+ Args:
21
+ directory_path (str): The path to the directory to scan.
54
22
 
55
- directory_path: str
23
+ Raises:
24
+ FileNotFoundScenarioError: If the specified directory does not exist.
25
+ """
26
+ self.directory_path = directory_path
27
+ if not os.path.isdir(directory_path):
28
+ raise FileNotFoundScenarioError(f"Directory not found: {directory_path}")
56
29
 
57
30
  def scan(
58
31
  self,
59
- factory: Callable[[str], T],
32
+ factory: Callable[[str], Any] = FileStore,
60
33
  recursive: bool = False,
61
34
  suffix_allow_list: Optional[List[str]] = None,
62
- suffix_exclude_list: Optional[List[str]] = None,
63
35
  example_suffix: Optional[str] = None,
64
- include_no_extension: bool = True,
65
- ) -> List[T]:
36
+ ) -> List[Any]:
37
+ """Scan the directory and create objects from files.
38
+
39
+ Args:
40
+ factory (Callable[[str], Any]): A function that creates objects from file paths.
41
+ Defaults to FileStore.
42
+ recursive (bool): Whether to scan subdirectories recursively.
43
+ suffix_allow_list (Optional[List[str]]): List of file extensions to include.
44
+ example_suffix (Optional[str]): Example suffix pattern for filtering.
45
+
46
+ Returns:
47
+ List[Any]: List of objects created by the factory function.
66
48
  """
67
- Eagerly scan directory and return a list of objects created by the factory function.
49
+ result = []
68
50
 
69
- This method performs a scan of the directory, filtering files based on the provided
70
- criteria, and applies the factory function to each matching file path. It returns
71
- a complete list of processed results.
51
+ def should_include_file(filename: str) -> bool:
52
+ if suffix_allow_list:
53
+ return any(filename.endswith(f".{suffix}") for suffix in suffix_allow_list)
54
+ if example_suffix:
55
+ if example_suffix.startswith("*."):
56
+ return filename.endswith(example_suffix[1:])
57
+ # Handle other wildcard patterns if needed
58
+ return True
59
+
60
+ def scan_dir(current_path: str):
61
+ for entry in os.scandir(current_path):
62
+ if entry.is_file() and should_include_file(entry.name):
63
+ try:
64
+ result.append(factory(entry.path))
65
+ except Exception as e:
66
+ import warnings
67
+ warnings.warn(f"Failed to process file {entry.path}: {str(e)}")
68
+ elif entry.is_dir() and recursive:
69
+ scan_dir(entry.path)
70
+
71
+ scan_dir(self.directory_path)
72
+ return result
73
+
74
+ @classmethod
75
+ def scan_directory(
76
+ cls,
77
+ directory: str,
78
+ pattern: str = "*",
79
+ recursive: bool = False,
80
+ metadata: bool = True,
81
+ ignore_dirs: List[str] = None,
82
+ ignore_files: List[str] = None,
83
+ ) -> Any:
84
+ """Scan a directory and create a ScenarioList from the files.
72
85
 
73
86
  Args:
74
- factory: A callable that takes a file path string and returns an object of type T.
75
- This is applied to each matching file path.
76
- recursive: If True, traverses subdirectories recursively. If False, only scans
77
- the top-level directory.
78
- suffix_allow_list: A list of file extensions (without dots) to include.
79
- If provided, only files with these extensions are included.
80
- suffix_exclude_list: A list of file extensions to exclude. This takes precedence
81
- over suffix_allow_list.
82
- example_suffix: If provided, only include files ending with this exact suffix.
83
- This checks the entire filename, not just the extension.
84
- include_no_extension: Whether to include files without extensions. Defaults to True.
87
+ directory (str): The directory path to scan
88
+ pattern (str): File pattern to match (e.g., "*.txt", "*.{jpg,png}")
89
+ recursive (bool): Whether to scan subdirectories recursively
90
+ metadata (bool): Whether to include file metadata in the scenarios
91
+ ignore_dirs (List[str]): List of directory names to ignore
92
+ ignore_files (List[str]): List of file patterns to ignore
85
93
 
86
94
  Returns:
87
- A list of objects created by applying the factory function to each matching file path.
88
-
89
- Examples:
90
- >>> import tempfile
91
- >>> import os
92
- >>> with tempfile.TemporaryDirectory() as tmpdir:
93
- ... # Create test files
94
- ... _ = open(os.path.join(tmpdir, "doc1.txt"), "w").write("content")
95
- ... _ = open(os.path.join(tmpdir, "doc2.md"), "w").write("content")
96
- ... os.mkdir(os.path.join(tmpdir, "subdir"))
97
- ... _ = open(os.path.join(tmpdir, "subdir", "doc3.txt"), "w").write("content")
98
- ... # Scan for text files only
99
- ... scanner = DirectoryScanner(tmpdir)
100
- ... paths = scanner.scan(lambda p: p, suffix_allow_list=["txt"])
101
- ... len(paths)
102
- ... # Recursive scan for all files
103
- ... all_paths = scanner.scan(lambda p: p, recursive=True)
104
- ... len(all_paths)
105
- ... # Exclude specific extensions
106
- ... no_md = scanner.scan(lambda p: p, recursive=True, suffix_exclude_list=["md"])
107
- ... len(no_md)
108
- 1
109
- 3
110
- 2
111
-
112
- Notes:
113
- - This method is eager and collects all results into memory. For large directories,
114
- consider using iter_scan instead.
115
- - The filtering logic applies filters in this order: exclude list, example suffix,
116
- allow list, and no extension.
95
+ ScenarioList: A ScenarioList containing one scenario per matching file
117
96
  """
118
- return list(
119
- self.iter_scan(
120
- factory,
121
- recursive=recursive,
122
- suffix_allow_list=suffix_allow_list,
123
- suffix_exclude_list=suffix_exclude_list,
124
- example_suffix=example_suffix,
125
- include_no_extension=include_no_extension,
126
- )
127
- )
128
-
129
- def iter_scan(
130
- self,
131
- factory: Callable[[str], T],
97
+ from .scenario_list import ScenarioList
98
+
99
+ # Handle default values
100
+ ignore_dirs = ignore_dirs or []
101
+ ignore_files = ignore_files or []
102
+
103
+ # Import glob for pattern matching
104
+ import glob
105
+ import fnmatch
106
+
107
+ # Normalize directory path
108
+ directory = os.path.abspath(directory)
109
+
110
+ # Prepare result container
111
+ scenarios = []
112
+
113
+ # Pattern matching function
114
+ def matches_pattern(filename, pattern):
115
+ return fnmatch.fnmatch(filename, pattern)
116
+
117
+ # File gathering function
118
+ def gather_files(current_dir, current_pattern):
119
+ # Create the full path pattern
120
+ path_pattern = os.path.join(current_dir, current_pattern)
121
+
122
+ # Get all matching files
123
+ for file_path in glob.glob(path_pattern, recursive=recursive):
124
+ if os.path.isfile(file_path):
125
+ # Check if file should be ignored
126
+ file_name = os.path.basename(file_path)
127
+ if any(matches_pattern(file_name, ignore_pattern) for ignore_pattern in ignore_files):
128
+ continue
129
+
130
+ # Create FileStore object
131
+ file_store = FileStore(file_path)
132
+
133
+ # Create scenario
134
+ scenario_data = {"file": file_store}
135
+
136
+ # Add metadata if requested
137
+ if metadata:
138
+ file_stat = os.stat(file_path)
139
+ scenario_data.update({
140
+ "file_path": file_path,
141
+ "file_name": file_name,
142
+ "file_size": file_stat.st_size,
143
+ "file_created": file_stat.st_ctime,
144
+ "file_modified": file_stat.st_mtime,
145
+ })
146
+
147
+ scenarios.append(Scenario(scenario_data))
148
+
149
+ # Process the directory
150
+ if recursive:
151
+ for root, dirs, files in os.walk(directory):
152
+ # Skip ignored directories
153
+ dirs[:] = [d for d in dirs if d not in ignore_dirs]
154
+
155
+ # Process files in this directory
156
+ gather_files(root, pattern)
157
+ else:
158
+ gather_files(directory, pattern)
159
+
160
+ # Return as ScenarioList
161
+ return ScenarioList(scenarios)
162
+
163
+ @classmethod
164
+ def create_scenario_list(
165
+ cls,
166
+ path: Optional[str] = None,
132
167
  recursive: bool = False,
168
+ key_name: str = "content",
169
+ factory: Callable[[str], Any] = FileStore,
133
170
  suffix_allow_list: Optional[List[str]] = None,
134
- suffix_exclude_list: Optional[List[str]] = None,
135
171
  example_suffix: Optional[str] = None,
136
- include_no_extension: bool = True,
137
- ) -> Iterator[T]:
138
- """
139
- Lazily scan directory and yield objects created by the factory function.
140
-
141
- This method performs a lazy scan of the directory, filtering files based on the provided
142
- criteria, and applies the factory function to each matching file path. It yields
143
- results one by one, allowing for memory-efficient processing of large directories.
144
-
172
+ ) -> Any:
173
+ """Create a ScenarioList from files in a directory.
174
+
145
175
  Args:
146
- factory: A callable that takes a file path string and returns an object of type T.
147
- This is applied to each matching file path.
148
- recursive: If True, traverses subdirectories recursively. If False, only scans
149
- the top-level directory.
150
- suffix_allow_list: A list of file extensions (without dots) to include.
151
- If provided, only files with these extensions are included.
152
- suffix_exclude_list: A list of file extensions to exclude. This takes precedence
153
- over suffix_allow_list.
154
- example_suffix: If provided, only include files ending with this exact suffix.
155
- This checks the entire filename, not just the extension.
156
- include_no_extension: Whether to include files without extensions. Defaults to True.
157
-
158
- Yields:
159
- Objects created by applying the factory function to each matching file path,
160
- yielded one at a time.
161
-
162
- Examples:
163
- >>> import tempfile
164
- >>> import os
165
- >>> with tempfile.TemporaryDirectory() as tmpdir:
166
- ... # Create test files
167
- ... _ = open(os.path.join(tmpdir, "doc1.txt"), "w").write("content")
168
- ... _ = open(os.path.join(tmpdir, "doc2.md"), "w").write("content")
169
- ... # Process files lazily
170
- ... scanner = DirectoryScanner(tmpdir)
171
- ... for path in scanner.iter_scan(lambda p: p):
172
- ... # Process each file path without loading all into memory
173
- ... file_exists = os.path.exists(path)
174
- ... assert file_exists
175
-
176
- Notes:
177
- - This method is lazy and yields results as they are processed, making it
178
- suitable for memory-efficient processing of large directories.
179
- - The filtering logic is identical to the scan method.
180
- """
176
+ path (Optional[str]): The directory path to scan, optionally including a wildcard pattern.
177
+ recursive (bool): Whether to scan subdirectories recursively.
178
+ key_name (str): The key to use for the FileStore object in each Scenario.
179
+ factory (Callable[[str], Any]): Factory function to create objects from files.
180
+ suffix_allow_list (Optional[List[str]]): List of file extensions to include.
181
+ example_suffix (Optional[str]): Example suffix pattern for filtering.
181
182
 
182
- def should_include_file(filepath: str) -> bool:
183
- """
184
- Determine if a file should be included based on filtering criteria.
185
-
186
- This helper function applies all the filtering rules to determine
187
- if a given file path should be included in the results.
188
-
189
- Args:
190
- filepath: The path to the file to check.
191
-
192
- Returns:
193
- True if the file should be included, False otherwise.
194
- """
195
- # Get filename and extension
196
- basename = os.path.basename(filepath)
197
- _, ext = os.path.splitext(filepath)
198
- ext = ext[1:] if ext else "" # Remove leading dot from extension
199
-
200
- # Skip system files like .DS_Store by default
201
- if basename == '.DS_Store':
202
- return False
203
-
204
- # If there's a specific allow list and we have a wildcard filter
205
- if suffix_allow_list:
206
- # Only include files with the allowed extensions
207
- return ext in suffix_allow_list
208
-
209
- # Check exclusions (they take precedence)
210
- if suffix_exclude_list and ext in suffix_exclude_list:
211
- return False
183
+ Returns:
184
+ ScenarioList: A ScenarioList containing Scenario objects for all matching files.
212
185
 
213
- # Check example suffix if specified
214
- if example_suffix:
215
- # Handle wildcard patterns
216
- if '*' in example_suffix:
217
- import fnmatch
218
- basename = os.path.basename(filepath)
219
- # Try to match just the filename if the pattern doesn't contain path separators
220
- if '/' not in example_suffix and '\\' not in example_suffix:
221
- if not fnmatch.fnmatch(basename, example_suffix):
222
- return False
223
- else:
224
- # Match the full path
225
- if not fnmatch.fnmatch(filepath, example_suffix):
226
- return False
227
- elif not filepath.endswith(example_suffix):
228
- return False
229
-
230
- # Handle no extension case
231
- if not ext:
232
- return include_no_extension
186
+ Raises:
187
+ FileNotFoundScenarioError: If the specified directory does not exist.
188
+ """
189
+ # Import here to avoid circular import
190
+ from .scenario_list import ScenarioList
191
+
192
+ # Handle default case - use current directory
193
+ if path is None:
194
+ directory_path = os.getcwd()
195
+ pattern = None
196
+ else:
197
+ # Special handling for "**" pattern which indicates recursive scanning
198
+ has_recursive_pattern = "**" in path if path else False
233
199
 
234
- return True
200
+ # Check if path contains any wildcard
201
+ if path and ("*" in path):
202
+ # Handle "**/*.ext" pattern - find the directory part before the **
203
+ if has_recursive_pattern:
204
+ # Extract the base directory by finding the part before **
205
+ parts = path.split("**")
206
+ if parts and parts[0]:
207
+ # Remove trailing slash if any
208
+ directory_path = parts[0].rstrip("/")
209
+ if not directory_path:
210
+ directory_path = os.getcwd()
211
+ # Get the pattern after **
212
+ pattern = parts[1] if len(parts) > 1 else None
213
+ if pattern and pattern.startswith("/"):
214
+ pattern = pattern[1:] # Remove leading slash
215
+ else:
216
+ directory_path = os.getcwd()
217
+ pattern = None
218
+ # Handle case where path is just a pattern (e.g., "*.py")
219
+ elif os.path.dirname(path) == "":
220
+ directory_path = os.getcwd()
221
+ pattern = os.path.basename(path)
222
+ else:
223
+ # Split into directory and pattern
224
+ directory_path = os.path.dirname(path)
225
+ if not directory_path:
226
+ directory_path = os.getcwd()
227
+ pattern = os.path.basename(path)
228
+ else:
229
+ # Path is a directory with no pattern
230
+ directory_path = path
231
+ pattern = None
235
232
 
236
- def iter_files() -> Iterator[str]:
237
- """
238
- Generate paths to all files in the directory, optionally recursively.
239
-
240
- This helper function yields file paths from the directory, handling
241
- the recursive option appropriately.
242
-
243
- Yields:
244
- Paths to files in the directory.
245
- """
246
- if recursive:
247
- for root, _, files in os.walk(self.directory_path):
248
- for file in files:
249
- yield os.path.join(root, file)
233
+ # Create scanner and get file stores
234
+ scanner = cls(directory_path)
235
+
236
+ # Configure suffix filtering
237
+ if pattern:
238
+ if pattern.startswith("*."):
239
+ suffix_allow_list = [pattern[2:]]
240
+ elif "*" in pattern:
241
+ example_suffix = pattern
250
242
  else:
251
- for file in os.listdir(self.directory_path):
252
- file_path = os.path.join(self.directory_path, file)
253
- if os.path.isfile(file_path):
254
- yield file_path
243
+ example_suffix = pattern
255
244
 
256
- for file_path in iter_files():
257
- if should_include_file(file_path):
258
- yield factory(file_path)
245
+ # Use scanner to find files and create objects
246
+ file_stores = scanner.scan(
247
+ factory=factory,
248
+ recursive=recursive,
249
+ suffix_allow_list=suffix_allow_list,
250
+ example_suffix=example_suffix,
251
+ )
252
+
253
+ # Convert to ScenarioList
254
+ result = ScenarioList()
255
+ for file_store in file_stores:
256
+ result.append(Scenario({key_name: file_store}))
257
+
258
+ return result
@@ -2,8 +2,13 @@ from typing import Optional, Dict, Type
2
2
  from abc import ABC, abstractmethod
3
3
  import importlib.metadata
4
4
  import importlib.util
5
+ import mimetypes
5
6
  from ..utilities import is_notebook
6
7
 
8
+ # Register MIME types for video formats if they aren't already
9
+ mimetypes.add_type('video/mp4', '.mp4')
10
+ mimetypes.add_type('video/webm', '.webm')
11
+
7
12
 
8
13
  class FileMethods(ABC):
9
14
  _handlers: Dict[str, Type["FileMethods"]] = {}