lsst-resources 29.0.0rc7__py3-none-any.whl → 29.2025.4600__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,11 +22,12 @@ import logging
22
22
  import os
23
23
  import posixpath
24
24
  import re
25
- import shutil
26
- import tempfile
25
+ import sys
27
26
  import urllib.parse
27
+ from collections import defaultdict
28
28
  from pathlib import Path, PurePath, PurePosixPath
29
29
  from random import Random
30
+ from typing import TypeAlias
30
31
 
31
32
  try:
32
33
  import fsspec
@@ -36,10 +37,10 @@ except ImportError:
36
37
  AbstractFileSystem = type
37
38
 
38
39
  from collections.abc import Iterable, Iterator
39
- from typing import TYPE_CHECKING, Any, Literal, overload
40
+ from typing import TYPE_CHECKING, Any, Literal, NamedTuple, overload
40
41
 
41
42
  from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
42
- from .utils import ensure_directory_is_writeable
43
+ from .utils import _get_num_workers, get_tempdir
43
44
 
44
45
  if TYPE_CHECKING:
45
46
  from .utils import TransactionProtocol
@@ -53,10 +54,81 @@ ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
53
54
  # Precomputed escaped hash
54
55
  ESCAPED_HASH = urllib.parse.quote("#")
55
56
 
56
- # Maximum number of worker threads for parallelized operations.
57
- # If greater than 10, be aware that this number has to be consistent
58
- # with connection pool sizing (for example in urllib3).
59
- MAX_WORKERS = 10
57
+
58
+ class MBulkResult(NamedTuple):
59
+ """Report on a bulk operation."""
60
+
61
+ success: bool
62
+ exception: Exception | None
63
+
64
+
65
+ _EXECUTOR_TYPE: TypeAlias = type[
66
+ concurrent.futures.ThreadPoolExecutor | concurrent.futures.ProcessPoolExecutor
67
+ ]
68
+
69
+ # Cache value for executor class so as not to issue warning multiple
70
+ # times but still allow tests to override the value.
71
+ _POOL_EXECUTOR_CLASS: _EXECUTOR_TYPE | None = None
72
+
73
+
74
+ def _get_executor_class() -> _EXECUTOR_TYPE:
75
+ """Return the executor class used for parallelized execution.
76
+
77
+ Returns
78
+ -------
79
+ cls : `concurrent.futures.Executor`
80
+ The ``Executor`` class. Default is
81
+ `concurrent.futures.ThreadPoolExecutor`. Can be set explicitly by
82
+ setting the ``$LSST_RESOURCES_EXECUTOR`` environment variable to
83
+ "thread" or "process". Returns "thread" pool if the value of the
84
+ variable is not recognized.
85
+ """
86
+ global _POOL_EXECUTOR_CLASS
87
+
88
+ if _POOL_EXECUTOR_CLASS is not None:
89
+ return _POOL_EXECUTOR_CLASS
90
+
91
+ pool_executor_classes = {
92
+ "threads": concurrent.futures.ThreadPoolExecutor,
93
+ "process": concurrent.futures.ProcessPoolExecutor,
94
+ }
95
+ default_executor = "threads"
96
+ external = os.getenv("LSST_RESOURCES_EXECUTOR", default_executor)
97
+ if not external:
98
+ external = default_executor
99
+ if external not in pool_executor_classes:
100
+ log.warning(
101
+ "Unrecognized value of '%s' for LSST_RESOURCES_EXECUTOR env var. Using '%s'",
102
+ external,
103
+ default_executor,
104
+ )
105
+ external = default_executor
106
+ _POOL_EXECUTOR_CLASS = pool_executor_classes[external]
107
+ return _POOL_EXECUTOR_CLASS
108
+
109
+
110
+ @contextlib.contextmanager
111
+ def _patch_environ(new_values: dict[str, str]) -> Iterator[None]:
112
+ """Patch os.environ temporarily using the supplied values.
113
+
114
+ Parameters
115
+ ----------
116
+ new_values : `dict` [ `str`, `str` ]
117
+ New values to be stored in the environment.
118
+ """
119
+ old_values: dict[str, str] = {}
120
+ for k, v in new_values.items():
121
+ if k in os.environ:
122
+ old_values[k] = os.environ[k]
123
+ os.environ[k] = v
124
+
125
+ try:
126
+ yield
127
+ finally:
128
+ for k in new_values:
129
+ del os.environ[k]
130
+ if k in old_values:
131
+ os.environ[k] = old_values[k]
60
132
 
61
133
 
62
134
  class ResourcePath: # numpydoc ignore=PR02
@@ -296,6 +368,10 @@ class ResourcePath: # numpydoc ignore=PR02
296
368
  from .http import HttpResourcePath
297
369
 
298
370
  subclass = HttpResourcePath
371
+ elif parsed.scheme in {"dav", "davs"}:
372
+ from .dav import DavResourcePath
373
+
374
+ subclass = DavResourcePath
299
375
  elif parsed.scheme == "gs":
300
376
  from .gs import GSResourcePath
301
377
 
@@ -474,7 +550,7 @@ class ResourcePath: # numpydoc ignore=PR02
474
550
  return self, ""
475
551
 
476
552
  head, tail = self._pathModule.split(self.path)
477
- headuri = self._uri._replace(path=head)
553
+ headuri = self._uri._replace(path=head, fragment="", query="", params="")
478
554
 
479
555
  # The file part should never include quoted metacharacters
480
556
  tail = urllib.parse.unquote(tail)
@@ -544,7 +620,7 @@ class ResourcePath: # numpydoc ignore=PR02
544
620
  # regardless of the presence of a trailing separator
545
621
  originalPath = self._pathLib(self.path)
546
622
  parentPath = originalPath.parent
547
- return self.replace(path=str(parentPath), forceDirectory=True)
623
+ return self.replace(path=str(parentPath), forceDirectory=True, fragment="", query="", params="")
548
624
 
549
625
  def replace(
550
626
  self, forceDirectory: bool | None = None, isTemporary: bool = False, **kwargs: Any
@@ -574,9 +650,11 @@ class ResourcePath: # numpydoc ignore=PR02
574
650
  # Disallow a change in scheme
575
651
  if "scheme" in kwargs:
576
652
  raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
577
- return self.__class__(
653
+ result = self.__class__(
578
654
  self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
579
655
  )
656
+ result._copy_extra_attributes(self)
657
+ return result
580
658
 
581
659
  def updatedFile(self, newfile: str) -> ResourcePath:
582
660
  """Return new URI with an updated final component of the path.
@@ -789,9 +867,11 @@ class ResourcePath: # numpydoc ignore=PR02
789
867
  forceDirectory=forceDirectory,
790
868
  isTemporary=isTemporary,
791
869
  fragment=path_uri.fragment,
870
+ query=path_uri.query,
871
+ params=path_uri.params,
792
872
  )
793
873
 
794
- def relative_to(self, other: ResourcePath) -> str | None:
874
+ def relative_to(self, other: ResourcePath, walk_up: bool = False) -> str | None:
795
875
  """Return the relative path from this URI to the other URI.
796
876
 
797
877
  Parameters
@@ -799,6 +879,9 @@ class ResourcePath: # numpydoc ignore=PR02
799
879
  other : `ResourcePath`
800
880
  URI to use to calculate the relative path. Must be a parent
801
881
  of this URI.
882
+ walk_up : `bool`, optional
883
+ Control whether "``..``" can be used to resolve a relative path.
884
+ Default is `False`. Can not be `True` on Python version 3.11.
802
885
 
803
886
  Returns
804
887
  -------
@@ -817,11 +900,22 @@ class ResourcePath: # numpydoc ignore=PR02
817
900
  if not {self.netloc, other.netloc}.issubset(local_netlocs):
818
901
  return None
819
902
 
903
+ # Rather than trying to guess a failure reason from the TypeError
904
+ # explicitly check for python 3.11. Doing this will simplify the
905
+ # rediscovery of a useless python version check when we set a new
906
+ # minimum version.
907
+ kwargs = {}
908
+ if walk_up:
909
+ if sys.version_info < (3, 12, 0):
910
+ raise TypeError("walk_up parameter can not be true in python 3.11 and older")
911
+
912
+ kwargs["walk_up"] = True
913
+
820
914
  enclosed_path = self._pathLib(self.relativeToPathRoot)
821
915
  parent_path = other.relativeToPathRoot
822
916
  subpath: str | None
823
917
  try:
824
- subpath = str(enclosed_path.relative_to(parent_path))
918
+ subpath = str(enclosed_path.relative_to(parent_path, **kwargs))
825
919
  except ValueError:
826
920
  subpath = None
827
921
  else:
@@ -839,67 +933,322 @@ class ResourcePath: # numpydoc ignore=PR02
839
933
  raise NotImplementedError()
840
934
 
841
935
  @classmethod
842
- def mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
936
+ def _group_uris(cls, uris: Iterable[ResourcePath]) -> dict[type[ResourcePath], list[ResourcePath]]:
937
+ """Group URIs by class/scheme."""
938
+ grouped: dict[type, list[ResourcePath]] = defaultdict(list)
939
+ for uri in uris:
940
+ grouped[uri.__class__].append(uri)
941
+ return grouped
942
+
943
+ @classmethod
944
+ def mexists(
945
+ cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
946
+ ) -> dict[ResourcePath, bool]:
843
947
  """Check for existence of multiple URIs at once.
844
948
 
845
949
  Parameters
846
950
  ----------
847
951
  uris : iterable of `ResourcePath`
848
952
  The URIs to test.
953
+ num_workers : `int` or `None`, optional
954
+ The number of parallel workers to use when checking for existence
955
+ If `None`, the default value will be taken from the environment.
956
+ If this number is higher than the default and a thread pool is
957
+ used, there may not be enough cached connections available.
849
958
 
850
959
  Returns
851
960
  -------
852
961
  existence : `dict` of [`ResourcePath`, `bool`]
853
962
  Mapping of original URI to boolean indicating existence.
854
963
  """
855
- # Group by scheme to allow a subclass to be able to use
856
- # specialized implementations.
857
- grouped: dict[type, list[ResourcePath]] = {}
858
- for uri in uris:
859
- uri_class = uri.__class__
860
- if uri_class not in grouped:
861
- grouped[uri_class] = []
862
- grouped[uri_class].append(uri)
863
-
864
964
  existence: dict[ResourcePath, bool] = {}
865
- for uri_class in grouped:
866
- existence.update(uri_class._mexists(grouped[uri_class]))
965
+ for uri_class, group in cls._group_uris(uris).items():
966
+ existence.update(uri_class._mexists(group, num_workers=num_workers))
867
967
 
868
968
  return existence
869
969
 
870
970
  @classmethod
871
- def _mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
971
+ def _mexists(
972
+ cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
973
+ ) -> dict[ResourcePath, bool]:
872
974
  """Check for existence of multiple URIs at once.
873
975
 
874
976
  Implementation helper method for `mexists`.
875
977
 
978
+
979
+ Parameters
980
+ ----------
981
+ uris : iterable of `ResourcePath`
982
+ The URIs to test.
983
+ num_workers : `int` or `None`, optional
984
+ The number of parallel workers to use when checking for existence
985
+ If `None`, the default value will be taken from the environment.
986
+
987
+ Returns
988
+ -------
989
+ existence : `dict` of [`ResourcePath`, `bool`]
990
+ Mapping of original URI to boolean indicating existence.
991
+ """
992
+ pool_executor_class = _get_executor_class()
993
+ if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
994
+ # Patch the environment to make it think there is only one worker
995
+ # for each subprocess.
996
+ with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
997
+ return cls._mexists_pool(pool_executor_class, uris)
998
+ else:
999
+ return cls._mexists_pool(pool_executor_class, uris, num_workers=num_workers)
1000
+
1001
+ @classmethod
1002
+ def _mexists_pool(
1003
+ cls,
1004
+ pool_executor_class: _EXECUTOR_TYPE,
1005
+ uris: Iterable[ResourcePath],
1006
+ *,
1007
+ num_workers: int | None = None,
1008
+ ) -> dict[ResourcePath, bool]:
1009
+ """Check for existence of multiple URIs at once using specified pool
1010
+ executor.
1011
+
1012
+ Implementation helper method for `_mexists`.
1013
+
876
1014
  Parameters
877
1015
  ----------
1016
+ pool_executor_class : `type` [ `concurrent.futures.Executor` ]
1017
+ Type of executor pool to use.
878
1018
  uris : iterable of `ResourcePath`
879
1019
  The URIs to test.
1020
+ num_workers : `int` or `None`, optional
1021
+ The number of parallel workers to use when checking for existence
1022
+ If `None`, the default value will be taken from the environment.
880
1023
 
881
1024
  Returns
882
1025
  -------
883
1026
  existence : `dict` of [`ResourcePath`, `bool`]
884
1027
  Mapping of original URI to boolean indicating existence.
885
1028
  """
886
- exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
887
- future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
888
-
889
- results: dict[ResourcePath, bool] = {}
890
- for future in concurrent.futures.as_completed(future_exists):
891
- uri = future_exists[future]
892
- try:
893
- exists = future.result()
894
- except Exception:
895
- exists = False
896
- results[uri] = exists
1029
+ max_workers = num_workers if num_workers is not None else _get_num_workers()
1030
+ with pool_executor_class(max_workers=max_workers) as exists_executor:
1031
+ future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
1032
+
1033
+ results: dict[ResourcePath, bool] = {}
1034
+ for future in concurrent.futures.as_completed(future_exists):
1035
+ uri = future_exists[future]
1036
+ try:
1037
+ exists = future.result()
1038
+ except Exception:
1039
+ exists = False
1040
+ results[uri] = exists
1041
+ return results
1042
+
1043
+ @classmethod
1044
+ def mtransfer(
1045
+ cls,
1046
+ transfer: str,
1047
+ from_to: Iterable[tuple[ResourcePath, ResourcePath]],
1048
+ overwrite: bool = False,
1049
+ transaction: TransactionProtocol | None = None,
1050
+ do_raise: bool = True,
1051
+ ) -> dict[ResourcePath, MBulkResult]:
1052
+ """Transfer many files in bulk.
1053
+
1054
+ Parameters
1055
+ ----------
1056
+ transfer : `str`
1057
+ Mode to use for transferring the resource. Generically there are
1058
+ many standard options: copy, link, symlink, hardlink, relsymlink.
1059
+ Not all URIs support all modes.
1060
+ from_to : `list` [ `tuple` [ `ResourcePath`, `ResourcePath` ] ]
1061
+ A sequence of the source URIs and the target URIs.
1062
+ overwrite : `bool`, optional
1063
+ Allow an existing file to be overwritten. Defaults to `False`.
1064
+ transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1065
+ A transaction object that can (depending on implementation)
1066
+ rollback transfers on error. Not guaranteed to be implemented.
1067
+ The transaction object must be thread safe.
1068
+ do_raise : `bool`, optional
1069
+ If `True` an `ExceptionGroup` will be raised containing any
1070
+ exceptions raised by the individual transfers. If `False`, or if
1071
+ there were no exceptions, a dict reporting the status of each
1072
+ `ResourcePath` will be returned.
1073
+
1074
+ Returns
1075
+ -------
1076
+ copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
1077
+ A dict of all the transfer attempts with a value indicating
1078
+ whether the transfer succeeded for the target URI. If ``do_raise``
1079
+ is `True`, this will only be returned if there are no errors.
1080
+ """
1081
+ pool_executor_class = _get_executor_class()
1082
+ if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
1083
+ # Patch the environment to make it think there is only one worker
1084
+ # for each subprocess.
1085
+ with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
1086
+ return cls._mtransfer(
1087
+ pool_executor_class,
1088
+ transfer,
1089
+ from_to,
1090
+ overwrite=overwrite,
1091
+ transaction=transaction,
1092
+ do_raise=do_raise,
1093
+ )
1094
+ return cls._mtransfer(
1095
+ pool_executor_class,
1096
+ transfer,
1097
+ from_to,
1098
+ overwrite=overwrite,
1099
+ transaction=transaction,
1100
+ do_raise=do_raise,
1101
+ )
1102
+
1103
+ @classmethod
1104
+ def _mtransfer(
1105
+ cls,
1106
+ pool_executor_class: _EXECUTOR_TYPE,
1107
+ transfer: str,
1108
+ from_to: Iterable[tuple[ResourcePath, ResourcePath]],
1109
+ overwrite: bool = False,
1110
+ transaction: TransactionProtocol | None = None,
1111
+ do_raise: bool = True,
1112
+ ) -> dict[ResourcePath, MBulkResult]:
1113
+ """Transfer many files in bulk.
1114
+
1115
+ Parameters
1116
+ ----------
1117
+ transfer : `str`
1118
+ Mode to use for transferring the resource. Generically there are
1119
+ many standard options: copy, link, symlink, hardlink, relsymlink.
1120
+ Not all URIs support all modes.
1121
+ from_to : `list` [ `tuple` [ `ResourcePath`, `ResourcePath` ] ]
1122
+ A sequence of the source URIs and the target URIs.
1123
+ overwrite : `bool`, optional
1124
+ Allow an existing file to be overwritten. Defaults to `False`.
1125
+ transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1126
+ A transaction object that can (depending on implementation)
1127
+ rollback transfers on error. Not guaranteed to be implemented.
1128
+ The transaction object must be thread safe.
1129
+ do_raise : `bool`, optional
1130
+ If `True` an `ExceptionGroup` will be raised containing any
1131
+ exceptions raised by the individual transfers. Else a dict
1132
+ reporting the status of each `ResourcePath` will be returned.
1133
+
1134
+ Returns
1135
+ -------
1136
+ copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
1137
+ A dict of all the transfer attempts with a value indicating
1138
+ whether the transfer succeeded for the target URI.
1139
+ """
1140
+ with pool_executor_class(max_workers=_get_num_workers()) as transfer_executor:
1141
+ future_transfers = {
1142
+ transfer_executor.submit(
1143
+ to_uri.transfer_from,
1144
+ from_uri,
1145
+ transfer=transfer,
1146
+ overwrite=overwrite,
1147
+ transaction=transaction,
1148
+ multithreaded=False,
1149
+ ): to_uri
1150
+ for from_uri, to_uri in from_to
1151
+ }
1152
+ results: dict[ResourcePath, MBulkResult] = {}
1153
+ failed = False
1154
+ for future in concurrent.futures.as_completed(future_transfers):
1155
+ to_uri = future_transfers[future]
1156
+ try:
1157
+ future.result()
1158
+ except Exception as e:
1159
+ transferred = MBulkResult(False, e)
1160
+ failed = True
1161
+ else:
1162
+ transferred = MBulkResult(True, None)
1163
+ results[to_uri] = transferred
1164
+
1165
+ if do_raise and failed:
1166
+ raise ExceptionGroup(
1167
+ f"Errors transferring {len(results)} artifacts",
1168
+ tuple(res.exception for res in results.values() if res.exception is not None),
1169
+ )
1170
+
897
1171
  return results
898
1172
 
899
1173
  def remove(self) -> None:
900
1174
  """Remove the resource."""
901
1175
  raise NotImplementedError()
902
1176
 
1177
+ @classmethod
1178
+ def mremove(
1179
+ cls, uris: Iterable[ResourcePath], *, do_raise: bool = True
1180
+ ) -> dict[ResourcePath, MBulkResult]:
1181
+ """Remove multiple URIs at once.
1182
+
1183
+ Parameters
1184
+ ----------
1185
+ uris : iterable of `ResourcePath`
1186
+ URIs to remove.
1187
+ do_raise : `bool`, optional
1188
+ If `True` an `ExceptionGroup` will be raised containing any
1189
+ exceptions raised by the individual transfers. If `False`, or if
1190
+ there were no exceptions, a dict reporting the status of each
1191
+ `ResourcePath` will be returned.
1192
+
1193
+ Returns
1194
+ -------
1195
+ results : `dict` [ `ResourcePath`, `MBulkResult` ]
1196
+ Dictionary mapping each URI to a result object indicating whether
1197
+ the removal succeeded or resulted in an exception. If ``do_raise``
1198
+ is `True` this will only be returned if everything succeeded.
1199
+ """
1200
+ # Group URIs by scheme since some URI schemes support native bulk
1201
+ # APIs.
1202
+ results: dict[ResourcePath, MBulkResult] = {}
1203
+ for uri_class, group in cls._group_uris(uris).items():
1204
+ results.update(uri_class._mremove(group))
1205
+ if do_raise:
1206
+ failed = any(not r.success for r in results.values())
1207
+ if failed:
1208
+ s = "s" if len(results) != 1 else ""
1209
+ raise ExceptionGroup(
1210
+ f"Error{s} removing {len(results)} artifact{s}",
1211
+ tuple(res.exception for res in results.values() if res.exception is not None),
1212
+ )
1213
+
1214
+ return results
1215
+
1216
+ @classmethod
1217
+ def _mremove(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, MBulkResult]:
1218
+ """Remove multiple URIs using futures."""
1219
+ pool_executor_class = _get_executor_class()
1220
+ if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
1221
+ # Patch the environment to make it think there is only one worker
1222
+ # for each subprocess.
1223
+ with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
1224
+ return cls._mremove_pool(pool_executor_class, uris)
1225
+ else:
1226
+ return cls._mremove_pool(pool_executor_class, uris)
1227
+
1228
+ @classmethod
1229
+ def _mremove_pool(
1230
+ cls,
1231
+ pool_executor_class: _EXECUTOR_TYPE,
1232
+ uris: Iterable[ResourcePath],
1233
+ *,
1234
+ num_workers: int | None = None,
1235
+ ) -> dict[ResourcePath, MBulkResult]:
1236
+ """Remove URIs using a futures pool."""
1237
+ max_workers = num_workers if num_workers is not None else _get_num_workers()
1238
+ results: dict[ResourcePath, MBulkResult] = {}
1239
+ with pool_executor_class(max_workers=max_workers) as remove_executor:
1240
+ future_remove = {remove_executor.submit(uri.remove): uri for uri in uris}
1241
+ for future in concurrent.futures.as_completed(future_remove):
1242
+ try:
1243
+ future.result()
1244
+ except Exception as e:
1245
+ removed = MBulkResult(False, e)
1246
+ else:
1247
+ removed = MBulkResult(True, None)
1248
+ uri = future_remove[future]
1249
+ results[uri] = removed
1250
+ return results
1251
+
903
1252
  def isabs(self) -> bool:
904
1253
  """Indicate that the resource is fully specified.
905
1254
 
@@ -923,27 +1272,53 @@ class ResourcePath: # numpydoc ignore=PR02
923
1272
  """
924
1273
  return self
925
1274
 
926
- def _as_local(self) -> tuple[str, bool]:
1275
+ @contextlib.contextmanager
1276
+ def _as_local(
1277
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
1278
+ ) -> Iterator[ResourcePath]:
927
1279
  """Return the location of the (possibly remote) resource as local file.
928
1280
 
929
1281
  This is a helper function for `as_local` context manager.
930
1282
 
1283
+ Parameters
1284
+ ----------
1285
+ multithreaded : `bool`, optional
1286
+ If `True` the transfer will be allowed to attempt to improve
1287
+ throughput by using parallel download streams. This may of no
1288
+ effect if the URI scheme does not support parallel streams or
1289
+ if a global override has been applied. If `False` parallel
1290
+ streams will be disabled.
1291
+ tmpdir : `ResourcePath` or `None`, optional
1292
+ Explicit override of the temporary directory to use for remote
1293
+ downloads.
1294
+
931
1295
  Returns
932
1296
  -------
933
- path : `str`
934
- If this is a remote resource, it will be a copy of the resource
935
- on the local file system, probably in a temporary directory.
936
- For a local resource this should be the actual path to the
937
- resource.
938
- is_temporary : `bool`
939
- Indicates if the local path is a temporary file or not.
1297
+ local_uri : `ResourcePath`
1298
+ A URI to a local POSIX file. This can either be the same resource
1299
+ or a local downloaded copy of the resource.
940
1300
  """
941
1301
  raise NotImplementedError()
942
1302
 
943
1303
  @contextlib.contextmanager
944
- def as_local(self) -> Iterator[ResourcePath]:
1304
+ def as_local(
1305
+ self, multithreaded: bool = True, tmpdir: ResourcePathExpression | None = None
1306
+ ) -> Iterator[ResourcePath]:
945
1307
  """Return the location of the (possibly remote) resource as local file.
946
1308
 
1309
+ Parameters
1310
+ ----------
1311
+ multithreaded : `bool`, optional
1312
+ If `True` the transfer will be allowed to attempt to improve
1313
+ throughput by using parallel download streams. This may of no
1314
+ effect if the URI scheme does not support parallel streams or
1315
+ if a global override has been applied. If `False` parallel
1316
+ streams will be disabled.
1317
+ tmpdir : `lsst.resources.ResourcePathExpression` or `None`, optional
1318
+ Explicit override of the temporary directory to use for remote
1319
+ downloads. This directory must be a local POSIX directory and
1320
+ must exist.
1321
+
947
1322
  Yields
948
1323
  ------
949
1324
  local : `ResourcePath`
@@ -968,18 +1343,11 @@ class ResourcePath: # numpydoc ignore=PR02
968
1343
  """
969
1344
  if self.isdir():
970
1345
  raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
971
- local_src, is_temporary = self._as_local()
972
- local_uri = ResourcePath(local_src, isTemporary=is_temporary)
973
-
974
- try:
1346
+ temp_dir = ResourcePath(tmpdir, forceDirectory=True) if tmpdir is not None else None
1347
+ if temp_dir is not None and not temp_dir.isLocal:
1348
+ raise ValueError(f"Temporary directory for as_local must be local resource not {temp_dir}")
1349
+ with self._as_local(multithreaded=multithreaded, tmpdir=temp_dir) as local_uri:
975
1350
  yield local_uri
976
- finally:
977
- # The caller might have relocated the temporary file.
978
- # Do not ever delete if the temporary matches self
979
- # (since it may have been that a temporary file was made local
980
- # but already was local).
981
- if self != local_uri and is_temporary and local_uri.exists():
982
- local_uri.remove()
983
1351
 
984
1352
  @classmethod
985
1353
  @contextlib.contextmanager
@@ -994,36 +1362,25 @@ class ResourcePath: # numpydoc ignore=PR02
994
1362
  Parameters
995
1363
  ----------
996
1364
  prefix : `ResourcePath`, optional
997
- Prefix to use. Without this the path will be formed as a local
998
- file URI in a temporary directory. Ensuring that the prefix
999
- location exists is the responsibility of the caller.
1365
+ Temporary directory to use (can be any scheme). Without this the
1366
+ path will be formed as a local file URI in a temporary directory
1367
+ obtained from `lsst.resources.utils.get_tempdir`. Ensuring that the
1368
+ prefix location exists is the responsibility of the caller.
1000
1369
  suffix : `str`, optional
1001
1370
  A file suffix to be used. The ``.`` should be included in this
1002
1371
  suffix.
1003
1372
  delete : `bool`, optional
1004
1373
  By default the resource will be deleted when the context manager
1005
1374
  is exited. Setting this flag to `False` will leave the resource
1006
- alone. `False` will also retain any directories that may have
1007
- been created.
1375
+ alone.
1008
1376
 
1009
1377
  Yields
1010
1378
  ------
1011
1379
  uri : `ResourcePath`
1012
1380
  The temporary URI. Will be removed when the context is completed.
1013
1381
  """
1014
- use_tempdir = False
1015
1382
  if prefix is None:
1016
- directory = tempfile.mkdtemp()
1017
- # If the user has set a umask that restricts the owner-write bit,
1018
- # the directory returned from mkdtemp may not initially be
1019
- # writeable by us
1020
- ensure_directory_is_writeable(directory)
1021
-
1022
- prefix = ResourcePath(directory, forceDirectory=True, isTemporary=True)
1023
- # Record that we need to delete this directory. Can not rely
1024
- # on isTemporary flag since an external prefix may have that
1025
- # set as well.
1026
- use_tempdir = True
1383
+ prefix = ResourcePath(get_tempdir(), forceDirectory=True)
1027
1384
 
1028
1385
  # Need to create a randomized file name. For consistency do not
1029
1386
  # use mkstemp for local and something else for remote. Additionally
@@ -1042,13 +1399,10 @@ class ResourcePath: # numpydoc ignore=PR02
1042
1399
  yield temporary_uri
1043
1400
  finally:
1044
1401
  if delete:
1045
- if use_tempdir:
1046
- shutil.rmtree(prefix.ospath, ignore_errors=True)
1047
- else:
1048
- with contextlib.suppress(FileNotFoundError):
1049
- # It's okay if this does not work because the user
1050
- # removed the file.
1051
- temporary_uri.remove()
1402
+ with contextlib.suppress(FileNotFoundError):
1403
+ # It's okay if this does not work because the user
1404
+ # removed the file.
1405
+ temporary_uri.remove()
1052
1406
 
1053
1407
  def read(self, size: int = -1) -> bytes:
1054
1408
  """Open the resource and return the contents in bytes.
@@ -1247,6 +1601,7 @@ class ResourcePath: # numpydoc ignore=PR02
1247
1601
  transfer: str,
1248
1602
  overwrite: bool = False,
1249
1603
  transaction: TransactionProtocol | None = None,
1604
+ multithreaded: bool = True,
1250
1605
  ) -> None:
1251
1606
  """Transfer to this URI from another.
1252
1607
 
@@ -1263,6 +1618,12 @@ class ResourcePath: # numpydoc ignore=PR02
1263
1618
  transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1264
1619
  A transaction object that can (depending on implementation)
1265
1620
  rollback transfers on error. Not guaranteed to be implemented.
1621
+ multithreaded : `bool`, optional
1622
+ If `True` the transfer will be allowed to attempt to improve
1623
+ throughput by using parallel download streams. This may of no
1624
+ effect if the URI scheme does not support parallel streams or
1625
+ if a global override has been applied. If `False` parallel
1626
+ streams will be disabled.
1266
1627
 
1267
1628
  Notes
1268
1629
  -----
@@ -1550,6 +1911,12 @@ class ResourcePath: # numpydoc ignore=PR02
1550
1911
  """
1551
1912
  raise NotImplementedError(f"URL signing is not supported for '{self.scheme}'")
1552
1913
 
1914
+ def _copy_extra_attributes(self, original_uri: ResourcePath) -> None:
1915
+ # May be overridden by subclasses to transfer attributes when a
1916
+ # ResourcePath is constructed using the "clone" version of the
1917
+ # ResourcePath constructor by passing in a ResourcePath object.
1918
+ pass
1919
+
1553
1920
 
1554
1921
  ResourcePathExpression = str | urllib.parse.ParseResult | ResourcePath | Path
1555
1922
  """Type-annotation alias for objects that can be coerced to ResourcePath.