brkraw 0.5.3__py3-none-any.whl → 0.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import logging
5
+ from typing import Optional
6
+
7
+ from ...core import cache
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def cmd_cache(args: argparse.Namespace) -> int:
13
+ handler = getattr(args, "cache_func", None)
14
+ if handler is None:
15
+ args.parser.print_help()
16
+ return 2
17
+ return handler(args)
18
+
19
+
20
+ def cmd_info(args: argparse.Namespace) -> int:
21
+ info = cache.get_info(root=args.root)
22
+ path = info["path"]
23
+ size = info["size"]
24
+ count = info["count"]
25
+
26
+ # Format size
27
+ unit = "B"
28
+ size_f = float(size)
29
+ for u in ["B", "KB", "MB", "GB", "TB"]:
30
+ unit = u
31
+ if size_f < 1024:
32
+ break
33
+ size_f /= 1024
34
+
35
+ print(f"Path: {path}")
36
+ print(f"Size: {size_f:.2f} {unit}")
37
+ print(f"Files: {count}")
38
+ return 0
39
+
40
+
41
+ def cmd_clear(args: argparse.Namespace) -> int:
42
+ if not args.yes:
43
+ info = cache.get_info(root=args.root)
44
+ if info["count"] == 0:
45
+ print("Cache is already empty.")
46
+ return 0
47
+ path = info["path"]
48
+ prompt = f"Clear {info['count']} files from {path}? [y/N]: "
49
+ try:
50
+ reply = input(prompt).strip().lower()
51
+ except EOFError:
52
+ reply = ""
53
+ if reply not in {"y", "yes"}:
54
+ return 1
55
+
56
+ cache.clear(root=args.root)
57
+ print("Cache cleared.")
58
+ return 0
59
+
60
+
61
+ def register(subparsers: argparse._SubParsersAction) -> None: # type: ignore[name-defined]
62
+ cache_parser = subparsers.add_parser(
63
+ "cache",
64
+ help="Manage brkraw cache.",
65
+ )
66
+ cache_parser.add_argument(
67
+ "--root",
68
+ help="Override config root directory (default: BRKRAW_CONFIG_HOME or ~/.brkraw).",
69
+ )
70
+ cache_parser.set_defaults(func=cmd_cache, parser=cache_parser)
71
+ cache_sub = cache_parser.add_subparsers(dest="cache_command")
72
+
73
+ info_parser = cache_sub.add_parser("info", help="Show cache information.")
74
+ info_parser.set_defaults(cache_func=cmd_info)
75
+
76
+ clear_parser = cache_sub.add_parser("clear", help="Clear cache contents.")
77
+ clear_parser.add_argument(
78
+ "--yes", "-y",
79
+ action="store_true",
80
+ help="Do not prompt for confirmation.",
81
+ )
82
+ clear_parser.set_defaults(cache_func=cmd_clear)
@@ -11,7 +11,7 @@ import subprocess
11
11
 
12
12
  from brkraw.core import config as config_core
13
13
 
14
- logger = logging.getLogger("brkraw")
14
+ logger = logging.getLogger(__name__)
15
15
 
16
16
 
17
17
  def cmd_config(args: argparse.Namespace) -> int:
@@ -179,7 +179,7 @@ def register(subparsers: argparse._SubParsersAction) -> None: # type: ignore[na
179
179
  path_parser = config_sub.add_parser("path", help="Print a specific config path.")
180
180
  path_parser.add_argument(
181
181
  "name",
182
- choices=["root", "config", "rules", "specs", "transforms"],
182
+ choices=["root", "config", "rules", "specs", "transforms", "cache"],
183
183
  help="Path key to print.",
184
184
  )
185
185
  path_parser.set_defaults(config_func=cmd_path)
@@ -11,7 +11,6 @@ import json
11
11
  import logging
12
12
  import os
13
13
  import re
14
- import sys
15
14
  from pathlib import Path
16
15
  from typing import Any, Mapping, Optional, Dict, List, Tuple, cast, get_args
17
16
 
@@ -27,7 +26,7 @@ from brkraw.resolver.affine import SubjectPose, SubjectType
27
26
  from brkraw.apps.loader.types import AffineSpace
28
27
 
29
28
 
30
- logger = logging.getLogger("brkraw")
29
+ logger = logging.getLogger(__name__)
31
30
 
32
31
  _INVALID_CHARS = re.compile(r"[^A-Za-z0-9._-]+")
33
32
 
@@ -86,6 +85,28 @@ def cmd_convert(args: argparse.Namespace) -> int:
86
85
  return 2
87
86
  if not args.flatten_fg:
88
87
  args.flatten_fg = _env_flag("BRKRAW_CONVERT_FLATTEN_FG")
88
+
89
+ # resolve cycle_index/cycle_count from env
90
+ if args.cycle_index is None:
91
+ value = os.environ.get("BRKRAW_CONVERT_CYCLE_INDEX")
92
+ if value:
93
+ try:
94
+ args.cycle_index = int(value)
95
+ except ValueError:
96
+ logger.error("Invalid BRKRAW_CONVERT_CYCLE_INDEX: %s", value)
97
+ return 2
98
+ if args.cycle_count is None:
99
+ value = os.environ.get("BRKRAW_CONVERT_CYCLE_COUNT")
100
+ if value:
101
+ try:
102
+ args.cycle_count = int(value)
103
+ except ValueError:
104
+ logger.error("Invalid BRKRAW_CONVERT_CYCLE_COUNT: %s", value)
105
+ return 2
106
+ # if cycle_count is set but cycle_index is not, default cycle_index to 0
107
+ if args.cycle_index is None and args.cycle_count is not None:
108
+ args.cycle_index = 0
109
+
89
110
  if args.space is None:
90
111
  args.space = os.environ.get("BRKRAW_CONVERT_SPACE")
91
112
  if args.override_subject_type is None:
@@ -169,6 +190,7 @@ def cmd_convert(args: argparse.Namespace) -> int:
169
190
  hook_args_by_name = merge_hook_args(hook_args_by_name, hook_args_cli)
170
191
 
171
192
  loader = load(args.path, prefix="Loading")
193
+ logger.debug("Dataset: %s loaded", args.path)
172
194
  try:
173
195
  override_header = nifti_resolver.load_header_overrides(args.header)
174
196
  except ValueError:
@@ -224,7 +246,9 @@ def cmd_convert(args: argparse.Namespace) -> int:
224
246
  if scan_id is None:
225
247
  continue
226
248
  scan = loader.get_scan(scan_id)
249
+ logger.debug("Processing scan %s.", scan_id)
227
250
  reco_ids = [args.reco_id] if args.reco_id is not None else list(scan.avail.keys())
251
+ logger.debug("Recos: %s", reco_ids or "None")
228
252
  if not reco_ids:
229
253
  if getattr(scan, "_converter_hook", None):
230
254
  reco_ids = [None]
@@ -252,18 +276,26 @@ def cmd_convert(args: argparse.Namespace) -> int:
252
276
  nii_list: List[Any] = []
253
277
  output_count = 1
254
278
  else:
255
- nii = loader.convert(
256
- scan_id,
257
- reco_id=reco_id,
258
- space=cast(AffineSpace, args.space),
259
- override_header=cast(Nifti1HeaderContents, override_header) if override_header else None,
260
- override_subject_type=cast(Optional[SubjectType], args.override_subject_type),
261
- override_subject_pose=cast(Optional[SubjectPose], args.override_subject_pose),
262
- flatten_fg=args.flatten_fg,
263
- xyz_units=cast(XYZUNIT, args.xyz_units),
264
- t_units=cast(TUNIT, args.t_units),
265
- hook_args_by_name=hook_args_by_name,
266
- )
279
+ try:
280
+ nii = loader.convert(
281
+ scan_id,
282
+ reco_id=reco_id,
283
+ space=cast(AffineSpace, args.space),
284
+ override_header=cast(Nifti1HeaderContents, override_header) if override_header else None,
285
+ override_subject_type=cast(Optional[SubjectType], args.override_subject_type),
286
+ override_subject_pose=cast(Optional[SubjectPose], args.override_subject_pose),
287
+ flatten_fg=args.flatten_fg,
288
+ xyz_units=cast(XYZUNIT, args.xyz_units),
289
+ t_units=cast(TUNIT, args.t_units),
290
+ hook_args_by_name=hook_args_by_name,
291
+ cycle_index=args.cycle_index,
292
+ cycle_count=args.cycle_count,
293
+ )
294
+ except Exception as exc:
295
+ logger.error("Conversion failed for scan %s reco %s: %s", scan_id, reco_id, exc)
296
+ if not batch_all and args.reco_id is not None:
297
+ return 2
298
+ continue
267
299
  if nii is None:
268
300
  if not batch_all and args.reco_id is not None:
269
301
  logger.error("No NIfTI output generated for scan %s reco %s.", scan_id, reco_id)
@@ -845,6 +877,16 @@ def _add_convert_args(
845
877
  action="store_true",
846
878
  help="Flatten frame-group dimensions to 4D when data is 5D or higher.",
847
879
  )
880
+ parser.add_argument(
881
+ "--cycle-index",
882
+ type=int,
883
+ help="Start cycle index (last axis). When set, read only a subset of cycles.",
884
+ )
885
+ parser.add_argument(
886
+ "--cycle-count",
887
+ type=int,
888
+ help="Number of cycles to read starting at --cycle-index. When omitted, reads to the end.",
889
+ )
848
890
  parser.add_argument(
849
891
  "--no-compress",
850
892
  dest="compress",
@@ -15,7 +15,7 @@ from brkraw.core import formatter
15
15
  from brkraw.specs import hook as converter_core
16
16
  import yaml
17
17
 
18
- logger = logging.getLogger("brkraw")
18
+ logger = logging.getLogger(__name__)
19
19
 
20
20
 
21
21
  def cmd_hook(args: argparse.Namespace) -> int:
@@ -7,7 +7,7 @@ from pathlib import Path
7
7
  from brkraw.core import config as config_core
8
8
  from brkraw.cli.utils import load
9
9
 
10
- logger = logging.getLogger("brkraw")
10
+ logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
13
  def cmd_info(args: argparse.Namespace) -> int:
@@ -13,7 +13,7 @@ import yaml
13
13
  from brkraw.core import config as config_core
14
14
  from brkraw.apps import addon as addon_app
15
15
 
16
- logger = logging.getLogger("brkraw")
16
+ logger = logging.getLogger(__name__)
17
17
 
18
18
 
19
19
  def cmd_init(args: argparse.Namespace) -> int:
@@ -14,7 +14,7 @@ import numpy as np
14
14
 
15
15
  from brkraw.cli.utils import load
16
16
 
17
- logger = logging.getLogger("brkraw")
17
+ logger = logging.getLogger(__name__)
18
18
 
19
19
 
20
20
  def cmd_params(args: argparse.Namespace) -> int:
@@ -6,7 +6,7 @@ import argparse
6
6
  import logging
7
7
  from datetime import datetime
8
8
  from pathlib import Path
9
- from typing import Optional, Union
9
+ from typing import Optional
10
10
 
11
11
  import yaml
12
12
 
@@ -14,7 +14,7 @@ from brkraw.cli.utils import spinner
14
14
  from brkraw.core import config as config_core
15
15
  from brkraw.specs.pruner import prune_dataset_to_zip_from_spec
16
16
 
17
- logger = logging.getLogger("brkraw")
17
+ logger = logging.getLogger(__name__)
18
18
 
19
19
 
20
20
  def cmd_prune(args: argparse.Namespace) -> int:
brkraw/cli/main.py CHANGED
@@ -1,5 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import re
4
+ import os
5
+ import shutil
6
+ import subprocess
3
7
  import argparse
4
8
  from typing import Callable, List, Optional
5
9
  from ..core.entrypoints import list_entry_points as _iter_entry_points
@@ -10,6 +14,50 @@ from brkraw.core import config as config_core
10
14
  PLUGIN_GROUP = "brkraw.cli"
11
15
 
12
16
 
17
+ def _run_capture(cmd: list[str]) -> str:
18
+ p = subprocess.run(cmd, check=True, text=True, capture_output=True)
19
+ return p.stdout
20
+
21
+
22
+ def _pv_autoset_env() -> None:
23
+ if shutil.which("pvcmd") is None:
24
+ return
25
+
26
+ p = subprocess.run(["pvcmd", "-e", "ParxServer"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
27
+ if p.returncode != 0:
28
+ return
29
+
30
+ out = _run_capture(["pvcmd", "-a", "ParxServer", "-r", "ListPs", "-csv"])
31
+ matches = [line for line in out.splitlines() if "REQUEST_ATTR" in line]
32
+
33
+ if len(matches) == 0:
34
+ raise SystemExit("ERROR: No ps entry with REQUEST_ATTR found")
35
+ if len(matches) > 1:
36
+ msg = "ERROR: Multiple ps entries with REQUEST_ATTR found\n" + "\n".join(matches)
37
+ raise SystemExit(msg)
38
+
39
+ line = matches[0]
40
+ parts = line.split(";")
41
+
42
+ m = None
43
+ for f in parts:
44
+ f = f.strip()
45
+ m = re.match(r"^(?P<exp_path>.+)/(?P<scan_id>\d+)/pdata/(?P<reco_id>\d+)$", f)
46
+ if m:
47
+ break
48
+
49
+ if not m:
50
+ raise SystemExit("ERROR: No valid <exp_path>/<scan_id>/pdata/<reco_id> path found")
51
+
52
+ exp_path = m.group("exp_path")
53
+ scan_id = m.group("scan_id")
54
+ reco_id = m.group("reco_id")
55
+
56
+ os.environ["BRKRAW_PATH"] = exp_path
57
+ os.environ["BRKRAW_SCAN_ID"] = scan_id
58
+ os.environ["BRKRAW_RECO_ID"] = reco_id
59
+
60
+
13
61
  def _register_entry_point_commands(
14
62
  subparsers: argparse._SubParsersAction, # type: ignore[name-defined]
15
63
  ) -> None:
@@ -26,6 +74,7 @@ def _register_entry_point_commands(
26
74
  preferred = [
27
75
  "init",
28
76
  "config",
77
+ "cache",
29
78
  "session",
30
79
  "info",
31
80
  "params",
@@ -70,7 +119,8 @@ def main(argv: Optional[List[str]] = None) -> int:
70
119
  )
71
120
 
72
121
  _register_entry_point_commands(subparsers)
73
-
122
+ _pv_autoset_env()
123
+
74
124
  args = parser.parse_args(argv)
75
125
  if not hasattr(args, "func"):
76
126
  parser.print_help()
brkraw/cli/utils.py CHANGED
@@ -14,7 +14,7 @@ from typing import Iterator, List
14
14
 
15
15
  from brkraw.apps.loader import BrukerLoader
16
16
 
17
- logger = logging.getLogger("brkraw")
17
+ logger = logging.getLogger(__name__)
18
18
 
19
19
  @contextmanager
20
20
  def spinner(prefix: str = "Loading") -> Iterator[None]:
brkraw/core/cache.py ADDED
@@ -0,0 +1,87 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import logging
5
+ import shutil
6
+ from pathlib import Path
7
+ from typing import Any, Dict, Optional, Union
8
+
9
+ from . import config
10
+
11
+ logger = logging.getLogger("brkraw.cache")
12
+
13
+
14
+ def get_info(
15
+ root: Optional[Union[str, Path]] = None,
16
+ path: Optional[Union[str, Path]] = None,
17
+ ) -> Dict[str, Any]:
18
+ """
19
+ Get information about the current cache directory.
20
+
21
+ Args:
22
+ root: Configuration root directory (used to resolve default cache path).
23
+ path: Explicit path to the cache directory. If provided, overrides 'root'.
24
+
25
+ Returns:
26
+ Dict with keys:
27
+ - path: Path to cache directory
28
+ - size: Total size in bytes
29
+ - count: Number of files
30
+ """
31
+ if path is not None:
32
+ cache_path = Path(path)
33
+ else:
34
+ cache_path = config.cache_dir(root)
35
+
36
+ if not cache_path.exists():
37
+ return {"path": cache_path, "size": 0, "count": 0}
38
+
39
+ total_size = 0
40
+ file_count = 0
41
+
42
+ for dirpath, _, filenames in os.walk(str(cache_path), followlinks=True):
43
+ for f in filenames:
44
+ try:
45
+ fp = Path(dirpath) / f
46
+ if fp.is_symlink():
47
+ continue
48
+ total_size += fp.stat().st_size
49
+ file_count += 1
50
+ except OSError as e:
51
+ continue
52
+
53
+ return {
54
+ "path": cache_path,
55
+ "size": total_size,
56
+ "count": file_count
57
+ }
58
+
59
+
60
+ def clear(
61
+ root: Optional[Union[str, Path]] = None,
62
+ path: Optional[Union[str, Path]] = None,
63
+ ) -> None:
64
+ """
65
+ Clear all files in the cache directory.
66
+
67
+ Args:
68
+ root: Configuration root directory (used to resolve default cache path).
69
+ path: Explicit path to the cache directory. If provided, overrides 'root'.
70
+ """
71
+ if path is not None:
72
+ cache_path = Path(path)
73
+ else:
74
+ cache_path = config.cache_dir(root)
75
+
76
+ if not cache_path.exists():
77
+ return
78
+
79
+ logger.info("Clearing cache at: %s", cache_path)
80
+ for item in cache_path.iterdir():
81
+ try:
82
+ if item.is_file() or item.is_symlink():
83
+ item.unlink()
84
+ elif item.is_dir():
85
+ shutil.rmtree(item)
86
+ except Exception as exc:
87
+ logger.warning("Failed to remove %s: %s", item, exc)
brkraw/core/config.py CHANGED
@@ -65,6 +65,7 @@ class ConfigPaths:
65
65
  pruner_specs_dir: Path
66
66
  rules_dir: Path
67
67
  transforms_dir: Path
68
+ cache_dir: Path
68
69
 
69
70
 
70
71
  def resolve_root(root: Optional[Union[str, Path]] = None) -> Path:
@@ -85,6 +86,7 @@ def get_paths(root: Optional[Union[str, Path]] = None) -> ConfigPaths:
85
86
  pruner_specs_dir=base / "pruner_specs",
86
87
  rules_dir=base / "rules",
87
88
  transforms_dir=base / "transforms",
89
+ cache_dir=base / "cache",
88
90
  )
89
91
 
90
92
 
@@ -101,6 +103,7 @@ def get_path(name: str, root: Optional[Union[str, Path]] = None) -> Path:
101
103
  "pruner_specs": paths_obj.pruner_specs_dir,
102
104
  "rules": paths_obj.rules_dir,
103
105
  "transforms": paths_obj.transforms_dir,
106
+ "cache": paths_obj.cache_dir,
104
107
  }
105
108
  if name not in mapping:
106
109
  raise KeyError(f"Unknown config path: {name}")
@@ -126,6 +129,7 @@ def ensure_initialized(
126
129
  paths.pruner_specs_dir.mkdir(parents=True, exist_ok=True)
127
130
  paths.rules_dir.mkdir(parents=True, exist_ok=True)
128
131
  paths.transforms_dir.mkdir(parents=True, exist_ok=True)
132
+ paths.cache_dir.mkdir(parents=True, exist_ok=True)
129
133
  if create_config and not paths.config_file.exists():
130
134
  paths.config_file.write_text(DEFAULT_CONFIG_YAML, encoding="utf-8")
131
135
  return paths
@@ -140,6 +144,10 @@ def init(
140
144
  return ensure_initialized(root=root, create_config=create_config, exist_ok=exist_ok)
141
145
 
142
146
 
147
+ def cache_dir(root: Optional[Union[str, Path]] = None) -> Path:
148
+ return get_paths(root=root).cache_dir
149
+
150
+
143
151
  def load_config(root: Optional[Union[str, Path]] = None) -> Optional[Dict[str, Any]]:
144
152
  paths = get_paths(root)
145
153
  if not paths.config_file.exists():
@@ -211,6 +219,7 @@ def clear_config(
211
219
  keep_specs: bool = False,
212
220
  keep_pruner_specs: bool = False,
213
221
  keep_transforms: bool = False,
222
+ keep_cache: bool = False,
214
223
  ) -> None:
215
224
  paths = get_paths(root=root)
216
225
  if not paths.root.exists():
@@ -225,6 +234,8 @@ def clear_config(
225
234
  _remove_tree(paths.pruner_specs_dir)
226
235
  if paths.transforms_dir.exists() and not keep_transforms:
227
236
  _remove_tree(paths.transforms_dir)
237
+ if paths.cache_dir.exists() and not keep_cache:
238
+ _remove_tree(paths.cache_dir)
228
239
  try:
229
240
  paths.root.rmdir()
230
241
  except OSError:
@@ -239,6 +250,7 @@ def clear(
239
250
  keep_specs: bool = False,
240
251
  keep_pruner_specs: bool = False,
241
252
  keep_transforms: bool = False,
253
+ keep_cache: bool = False,
242
254
  ) -> None:
243
255
  clear_config(
244
256
  root=root,
@@ -247,15 +259,19 @@ def clear(
247
259
  keep_specs=keep_specs,
248
260
  keep_pruner_specs=keep_pruner_specs,
249
261
  keep_transforms=keep_transforms,
262
+ keep_cache=keep_cache,
250
263
  )
251
264
 
252
265
 
253
266
  def configure_logging(
254
267
  *,
268
+ name: Optional[str] = None,
255
269
  root: Optional[Union[str, Path]] = None,
256
270
  level: Optional[Union[str, int]] = None,
257
271
  stream=None,
258
272
  ) -> logging.Logger:
273
+ if name is None:
274
+ name = "brkraw"
259
275
  config = resolve_config(root=root)
260
276
  if level is None:
261
277
  level = config.get("logging", {}).get("level", "INFO")
@@ -265,9 +281,9 @@ def configure_logging(
265
281
  if level == logging.INFO:
266
282
  fmt = "%(message)s"
267
283
  else:
268
- fmt = "%(levelname)s %(asctime)s %(message)s"
284
+ fmt = "%(asctime)s(%(levelname).1s): %(name)s:%(funcName)s - %(message)s"
269
285
  logging.basicConfig(level=level, format=fmt, stream=stream)
270
- return logging.getLogger("brkraw")
286
+ return logging.getLogger(name)
271
287
 
272
288
 
273
289
  def output_width(root: Optional[Union[str, Path]] = None, default: int = 120) -> int:
brkraw/core/fs.py CHANGED
@@ -221,6 +221,7 @@ class DatasetFS:
221
221
  top: str = "",
222
222
  *,
223
223
  as_objects: bool = False,
224
+ sort_entries: bool = True,
224
225
  ) -> Iterable[Tuple[str, List, List]]:
225
226
  """Yield (dirpath, direntries, fileentries) with archive-style paths.
226
227
 
@@ -228,13 +229,17 @@ class DatasetFS:
228
229
  top: Optional subdirectory to start from (anchor-aware).
229
230
  as_objects: When True, return DatasetDir/ZippedDir and
230
231
  DatasetFile/ZippedFile entries; otherwise return name strings.
232
+ sort_entries: When True, sort directory and file entries for deterministic output.
233
+ Set to False for faster traversal when ordering does not matter.
231
234
 
232
235
  Yields:
233
236
  Tuples of `(dirpath, direntries, fileentries)` using posix-style paths.
234
237
  """
235
238
  norm_top = top.strip("/")
236
- if self._anchor and norm_top and not norm_top.startswith(self._anchor):
237
- norm_top = f"{self._anchor}/{norm_top}"
239
+ if self._anchor and norm_top:
240
+ anchored = norm_top == self._anchor or norm_top.startswith(f"{self._anchor}/")
241
+ if not anchored:
242
+ norm_top = f"{self._anchor}/{norm_top}"
238
243
 
239
244
  if self._mode == "dir":
240
245
  base = self.root
@@ -254,17 +259,16 @@ class DatasetFS:
254
259
  rel = os.path.relpath(dirpath, base)
255
260
  rel = "" if rel == "." else rel.replace(os.sep, "/")
256
261
  rel = self._ensure_anchor(rel)
257
- dirnames = sorted(dirnames)
258
- filenames = sorted(filenames)
262
+ if sort_entries:
263
+ dirnames.sort()
264
+ filenames.sort()
265
+
259
266
  if as_objects:
260
267
  dir_objs = [
261
268
  DatasetDir(name=d, path=(f"{rel}/{d}".strip("/")), fs=self) for d in dirnames
262
269
  ]
263
270
  file_objs = [
264
- DatasetFile(
265
- name=f, path=(f"{rel}/{f}".strip("/")), fs=self
266
- )
267
- for f in filenames
271
+ DatasetFile(name=f, path=(f"{rel}/{f}".strip("/")), fs=self) for f in filenames
268
272
  ]
269
273
  yield rel, dir_objs, file_objs
270
274
  else:
@@ -272,10 +276,23 @@ class DatasetFS:
272
276
  else:
273
277
  assert self._zip is not None
274
278
  for dirpath, direntries, files in zipcore.walk(self._zip, top=norm_top):
279
+ if sort_entries:
280
+ try:
281
+ direntries = sorted(direntries, key=lambda d: d.name)
282
+ files = sorted(files, key=lambda f: f.name)
283
+ except Exception:
284
+ # If entries are plain strings or otherwise unsortable, fall back.
285
+ pass
286
+
275
287
  if as_objects:
276
288
  yield dirpath, direntries, files
277
289
  else:
278
- yield dirpath, [d.name for d in direntries], [f.name for f in files]
290
+ dnames = [d.name for d in direntries]
291
+ fnames = [f.name for f in files]
292
+ if sort_entries:
293
+ dnames.sort()
294
+ fnames.sort()
295
+ yield dirpath, dnames, fnames
279
296
 
280
297
  def open_binary(self, relpath: str) -> IO[bytes]:
281
298
  """Open a file by archive-relative path.