lsst-daf-butler 29.2025.1800__py3-none-any.whl → 29.2025.2000__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. lsst/daf/butler/_quantum_backed.py +3 -2
  2. lsst/daf/butler/cli/butler.py +1 -1
  3. lsst/daf/butler/cli/cmd/_remove_runs.py +2 -0
  4. lsst/daf/butler/cli/utils.py +32 -4
  5. lsst/daf/butler/configs/datastores/formatters.yaml +1 -0
  6. lsst/daf/butler/configs/storageClasses.yaml +2 -0
  7. lsst/daf/butler/datastore/_datastore.py +20 -2
  8. lsst/daf/butler/datastore/generic_base.py +2 -2
  9. lsst/daf/butler/datastores/chainedDatastore.py +8 -4
  10. lsst/daf/butler/datastores/fileDatastore.py +169 -61
  11. lsst/daf/butler/datastores/inMemoryDatastore.py +32 -4
  12. lsst/daf/butler/direct_butler/_direct_butler.py +35 -10
  13. lsst/daf/butler/queries/_expression_strings.py +4 -0
  14. lsst/daf/butler/registry/bridge/ephemeral.py +16 -6
  15. lsst/daf/butler/registry/bridge/monolithic.py +21 -6
  16. lsst/daf/butler/registry/collections/_base.py +23 -6
  17. lsst/daf/butler/registry/interfaces/_bridge.py +13 -1
  18. lsst/daf/butler/registry/queries/expressions/_predicate.py +5 -0
  19. lsst/daf/butler/registry/queries/expressions/check.py +5 -0
  20. lsst/daf/butler/registry/queries/expressions/normalForm.py +5 -0
  21. lsst/daf/butler/registry/queries/expressions/parser/exprTree.py +64 -20
  22. lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py +26 -9
  23. lsst/daf/butler/registry/queries/expressions/parser/treeVisitor.py +13 -0
  24. lsst/daf/butler/registry/tests/_registry.py +26 -11
  25. lsst/daf/butler/remote_butler/server/_server.py +2 -0
  26. lsst/daf/butler/remote_butler/server/_telemetry.py +105 -0
  27. lsst/daf/butler/remote_butler/server/handlers/_query_streaming.py +7 -3
  28. lsst/daf/butler/script/removeRuns.py +9 -3
  29. lsst/daf/butler/tests/butler_queries.py +51 -0
  30. lsst/daf/butler/tests/cliCmdTestBase.py +1 -1
  31. lsst/daf/butler/version.py +1 -1
  32. {lsst_daf_butler-29.2025.1800.dist-info → lsst_daf_butler-29.2025.2000.dist-info}/METADATA +1 -1
  33. {lsst_daf_butler-29.2025.1800.dist-info → lsst_daf_butler-29.2025.2000.dist-info}/RECORD +41 -40
  34. {lsst_daf_butler-29.2025.1800.dist-info → lsst_daf_butler-29.2025.2000.dist-info}/WHEEL +1 -1
  35. {lsst_daf_butler-29.2025.1800.dist-info → lsst_daf_butler-29.2025.2000.dist-info}/entry_points.txt +0 -0
  36. {lsst_daf_butler-29.2025.1800.dist-info → lsst_daf_butler-29.2025.2000.dist-info}/licenses/COPYRIGHT +0 -0
  37. {lsst_daf_butler-29.2025.1800.dist-info → lsst_daf_butler-29.2025.2000.dist-info}/licenses/LICENSE +0 -0
  38. {lsst_daf_butler-29.2025.1800.dist-info → lsst_daf_butler-29.2025.2000.dist-info}/licenses/bsd_license.txt +0 -0
  39. {lsst_daf_butler-29.2025.1800.dist-info → lsst_daf_butler-29.2025.2000.dist-info}/licenses/gpl-v3.0.txt +0 -0
  40. {lsst_daf_butler-29.2025.1800.dist-info → lsst_daf_butler-29.2025.2000.dist-info}/top_level.txt +0 -0
  41. {lsst_daf_butler-29.2025.1800.dist-info → lsst_daf_butler-29.2025.2000.dist-info}/zip-safe +0 -0
@@ -516,8 +516,9 @@ class QuantumBackedButler(LimitedButler):
516
516
  self._actual_output_refs.discard(ref)
517
517
 
518
518
  if unstore:
519
- # Point of no return for removing artifacts
520
- self._datastore.emptyTrash()
519
+ # Point of no return for removing artifacts. Only try to remove
520
+ # refs associated with this pruning.
521
+ self._datastore.emptyTrash(refs=refs)
521
522
 
522
523
  def retrieve_artifacts_zip(
523
524
  self,
@@ -102,7 +102,7 @@ class PluginCommand:
102
102
  """Where the command came from (`str`)."""
103
103
 
104
104
 
105
- class LoaderCLI(click.MultiCommand, abc.ABC):
105
+ class LoaderCLI(click.Group, abc.ABC):
106
106
  """Extends `click.MultiCommand`, which dispatches to subcommands, to load
107
107
  subcommands at runtime.
108
108
 
@@ -81,7 +81,9 @@ def _print_remove(will: bool, runs: Sequence[script.RemoveRun], datasets: Mappin
81
81
  else:
82
82
  print(run.name)
83
83
  print("\n" + willRemoveDatasetsMsg if will else didRemoveDatasetsMsg)
84
+ total = sum(datasets.values())
84
85
  print(", ".join([f"{i[0]}({i[1]})" for i in datasets.items()]))
86
+ print("Total number of datasets to remove: ", total)
85
87
 
86
88
 
87
89
  def _print_requires_confirmation(runs: Sequence[script.RemoveRun], datasets: Mapping[str, int]) -> None:
@@ -55,6 +55,7 @@ __all__ = (
55
55
  )
56
56
 
57
57
 
58
+ import importlib.metadata
58
59
  import itertools
59
60
  import logging
60
61
  import os
@@ -76,6 +77,7 @@ import click
76
77
  import click.exceptions
77
78
  import click.testing
78
79
  import yaml
80
+ from packaging.version import Version
79
81
 
80
82
  from lsst.utils.iteration import ensure_iterable
81
83
 
@@ -87,6 +89,12 @@ if TYPE_CHECKING:
87
89
 
88
90
  from lsst.daf.butler import Dimension
89
91
 
92
+ _click_version = Version(importlib.metadata.version("click"))
93
+ if _click_version >= Version("8.2.0"):
94
+ _click_make_metavar_has_context = True
95
+ else:
96
+ _click_make_metavar_has_context = False
97
+
90
98
  log = logging.getLogger(__name__)
91
99
 
92
100
  # This is used as the metavar argument to Options that accept multiple string
@@ -741,9 +749,16 @@ class MWPath(click.Path):
741
749
  class MWOption(click.Option):
742
750
  """Overrides click.Option with desired behaviors."""
743
751
 
744
- def make_metavar(self) -> str:
752
+ def make_metavar(self, ctx: click.Context | None = None) -> str:
745
753
  """Make the metavar for the help menu.
746
754
 
755
+ Parameters
756
+ ----------
757
+ ctx : `click.Context` or `None`
758
+ Context from the command.
759
+
760
+ Notes
761
+ -----
747
762
  Overrides `click.Option.make_metavar`.
748
763
  Adds a space and an ellipsis after the metavar name if
749
764
  the option accepts multiple inputs, otherwise defers to the base
@@ -758,7 +773,10 @@ class MWOption(click.Option):
758
773
  transformation that must apply to all types should be applied in
759
774
  get_help_record.
760
775
  """
761
- metavar = super().make_metavar()
776
+ if _click_make_metavar_has_context:
777
+ metavar = super().make_metavar(ctx=ctx) # type: ignore
778
+ else:
779
+ metavar = super().make_metavar() # type: ignore
762
780
  if self.multiple and self.nargs == 1:
763
781
  metavar += " ..."
764
782
  elif self.nargs != 1:
@@ -769,9 +787,16 @@ class MWOption(click.Option):
769
787
  class MWArgument(click.Argument):
770
788
  """Overrides click.Argument with desired behaviors."""
771
789
 
772
- def make_metavar(self) -> str:
790
+ def make_metavar(self, ctx: click.Context | None = None) -> str:
773
791
  """Make the metavar for the help menu.
774
792
 
793
+ Parameters
794
+ ----------
795
+ ctx : `click.Context` or `None`
796
+ Context from the command.
797
+
798
+ Notes
799
+ -----
775
800
  Overrides `click.Option.make_metavar`.
776
801
  Always adds a space and an ellipsis (' ...') after the
777
802
  metavar name if the option accepts multiple inputs.
@@ -784,7 +809,10 @@ class MWArgument(click.Argument):
784
809
  metavar : `str`
785
810
  The metavar value.
786
811
  """
787
- metavar = super().make_metavar()
812
+ if _click_make_metavar_has_context:
813
+ metavar = super().make_metavar(ctx=ctx) # type: ignore
814
+ else:
815
+ metavar = super().make_metavar() # type: ignore
788
816
  if self.nargs != 1:
789
817
  metavar = f"{metavar[:-3]} ..."
790
818
  return metavar
@@ -94,3 +94,4 @@ Timespan: lsst.daf.butler.formatters.json.JsonFormatter
94
94
  RegionTimeInfo: lsst.daf.butler.formatters.json.JsonFormatter
95
95
  QPEnsemble: lsst.meas.pz.qp_formatter.QPFormatter
96
96
  PZModel: lsst.meas.pz.model_formatter.ModelFormatter
97
+ VisitBackgroundModel: lsst.daf.butler.formatters.json.JsonFormatter
@@ -426,3 +426,5 @@ storageClasses:
426
426
  pytype: qp.Ensemble
427
427
  PZModel:
428
428
  pytype: rail.core.model.Model
429
+ VisitBackgroundModel:
430
+ pytype: lsst.drp.tasks.fit_visit_background.VisitBackgroundModel
@@ -1160,13 +1160,29 @@ class Datastore(metaclass=ABCMeta):
1160
1160
  raise NotImplementedError("Must be implemented by subclass")
1161
1161
 
1162
1162
  @abstractmethod
1163
- def emptyTrash(self, ignore_errors: bool = True) -> None:
1163
+ def emptyTrash(
1164
+ self, ignore_errors: bool = True, refs: Collection[DatasetRef] | None = None, dry_run: bool = False
1165
+ ) -> set[ResourcePath]:
1164
1166
  """Remove all datasets from the trash.
1165
1167
 
1166
1168
  Parameters
1167
1169
  ----------
1168
1170
  ignore_errors : `bool`, optional
1169
1171
  Determine whether errors should be ignored.
1172
+ refs : `collections.abc.Collection` [ `DatasetRef` ] or `None`
1173
+ Explicit list of datasets that can be removed from trash. If listed
1174
+ datasets are not already stored in the trash table they will be
1175
+ ignored. If `None` every entry in the trash table will be
1176
+ processed.
1177
+ dry_run : `bool`, optional
1178
+ If `True`, the trash table will be queried and results reported
1179
+ but no artifacts will be removed.
1180
+
1181
+ Returns
1182
+ -------
1183
+ removed : `set` [ `lsst.resources.ResourcePath` ]
1184
+ List of artifacts that were removed. Can return nothing if
1185
+ artifacts cannot be represented by URIs.
1170
1186
 
1171
1187
  Notes
1172
1188
  -----
@@ -1512,7 +1528,9 @@ class NullDatastore(Datastore):
1512
1528
  def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None:
1513
1529
  raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1514
1530
 
1515
- def emptyTrash(self, ignore_errors: bool = True) -> None:
1531
+ def emptyTrash(
1532
+ self, ignore_errors: bool = True, refs: Collection[DatasetRef] | None = None, dry_run: bool = False
1533
+ ) -> set[ResourcePath]:
1516
1534
  raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1517
1535
 
1518
1536
  def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
@@ -80,7 +80,7 @@ class GenericBaseDatastore(Datastore, Generic[_InfoType]):
80
80
  encountered during removal are not ignored.
81
81
  """
82
82
  self.trash(ref, ignore_errors=False)
83
- self.emptyTrash(ignore_errors=False)
83
+ self.emptyTrash(ignore_errors=False, refs=[ref])
84
84
 
85
85
  def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
86
86
  """Retrieve a dataset from an input `Datastore`,
@@ -89,7 +89,7 @@ class GenericBaseDatastore(Datastore, Generic[_InfoType]):
89
89
  Parameters
90
90
  ----------
91
91
  inputDatastore : `Datastore`
92
- The external `Datastore` from which to retreive the Dataset.
92
+ The external `Datastore` from which to retrieve the Dataset.
93
93
  ref : `DatasetRef`
94
94
  Reference to the required dataset in the input data store.
95
95
  """
@@ -999,7 +999,7 @@ class ChainedDatastore(Datastore):
999
999
  """
1000
1000
  log.debug("Removing %s", ref)
1001
1001
  self.trash(ref, ignore_errors=False)
1002
- self.emptyTrash(ignore_errors=False)
1002
+ self.emptyTrash(ignore_errors=False, refs=[ref])
1003
1003
 
1004
1004
  def forget(self, refs: Iterable[DatasetRef]) -> None:
1005
1005
  for datastore in tuple(self.datastores):
@@ -1028,9 +1028,13 @@ class ChainedDatastore(Datastore):
1028
1028
  else:
1029
1029
  raise FileNotFoundError(err_msg)
1030
1030
 
1031
- def emptyTrash(self, ignore_errors: bool = True) -> None:
1031
+ def emptyTrash(
1032
+ self, ignore_errors: bool = True, refs: Collection[DatasetRef] | None = None, dry_run: bool = False
1033
+ ) -> set[ResourcePath]:
1034
+ removed = set()
1032
1035
  for datastore in self.datastores:
1033
- datastore.emptyTrash(ignore_errors=ignore_errors)
1036
+ removed.update(datastore.emptyTrash(ignore_errors=ignore_errors, refs=refs, dry_run=dry_run))
1037
+ return removed
1034
1038
 
1035
1039
  def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
1036
1040
  """Retrieve a dataset from an input `Datastore`,
@@ -1039,7 +1043,7 @@ class ChainedDatastore(Datastore):
1039
1043
  Parameters
1040
1044
  ----------
1041
1045
  inputDatastore : `Datastore`
1042
- The external `Datastore` from which to retreive the Dataset.
1046
+ The external `Datastore` from which to retrieve the Dataset.
1043
1047
  ref : `DatasetRef`
1044
1048
  Reference to the required dataset in the input data store.
1045
1049
 
@@ -401,27 +401,6 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
401
401
  log.debug("Checking if resource exists: %s", location.uri)
402
402
  return location.uri.exists()
403
403
 
404
- def _delete_artifact(self, location: Location) -> None:
405
- """Delete the artifact from the datastore.
406
-
407
- Parameters
408
- ----------
409
- location : `Location`
410
- Location of the artifact associated with this datastore.
411
- """
412
- if location.pathInStore.isabs():
413
- raise RuntimeError(f"Cannot delete artifact with absolute uri {location.uri}.")
414
-
415
- try:
416
- location.uri.remove()
417
- except FileNotFoundError:
418
- log.debug("File %s did not exist and so could not be deleted.", location.uri)
419
- raise
420
- except Exception as e:
421
- log.critical("Failed to delete file: %s (%s)", location.uri, e)
422
- raise
423
- log.debug("Successfully deleted file: %s", location.uri)
424
-
425
404
  def addStoredItemInfo(
426
405
  self,
427
406
  refs: Iterable[DatasetRef],
@@ -2531,8 +2510,9 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
2531
2510
  else:
2532
2511
  raise
2533
2512
 
2534
- @transactional
2535
- def emptyTrash(self, ignore_errors: bool = True) -> None:
2513
+ def emptyTrash(
2514
+ self, ignore_errors: bool = True, refs: Collection[DatasetRef] | None = None, dry_run: bool = False
2515
+ ) -> set[ResourcePath]:
2536
2516
  """Remove all datasets from the trash.
2537
2517
 
2538
2518
  Parameters
@@ -2541,14 +2521,91 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
2541
2521
  If `True` return without error even if something went wrong.
2542
2522
  Problems could occur if another process is simultaneously trying
2543
2523
  to delete.
2524
+ refs : `collections.abc.Collection` [ `DatasetRef` ] or `None`
2525
+ Explicit list of datasets that can be removed from trash. If listed
2526
+ datasets are not already stored in the trash table they will be
2527
+ ignored. If `None` every entry in the trash table will be
2528
+ processed.
2529
+ dry_run : `bool`, optional
2530
+ If `True`, the trash table will be queried and results reported
2531
+ but no artifacts will be removed.
2532
+
2533
+ Returns
2534
+ -------
2535
+ removed : `set` [ `lsst.resources.ResourcePath` ]
2536
+ List of artifacts that were removed.
2537
+
2538
+ Notes
2539
+ -----
2540
+ Will empty the records from the trash tables only if this call finishes
2541
+ without raising.
2544
2542
  """
2545
- log.debug("Emptying trash in datastore %s", self.name)
2543
+ removed = set()
2544
+ if refs:
2545
+ selected_ids = {ref.id for ref in refs}
2546
+ n_chunks = 0
2547
+ for chunk in chunk_iterable(selected_ids, chunk_size=50_000):
2548
+ n_chunks += 1
2549
+ log.verbose("Emptying trash for chunk %d of size %d", n_chunks, len(chunk))
2550
+ removed.update(
2551
+ self._empty_trash_subset(ignore_errors=ignore_errors, selected_ids=chunk, dry_run=dry_run)
2552
+ )
2553
+ else:
2554
+ log.verbose("Emptying all trash in datastore %s", self.name)
2555
+ removed = self._empty_trash_subset(ignore_errors=ignore_errors, dry_run=dry_run)
2556
+ log.info(
2557
+ "%sRemoved %d file artifact%s from datastore %s",
2558
+ "Would have " if dry_run else "",
2559
+ len(removed),
2560
+ "s" if len(removed) != 1 else "",
2561
+ self.name,
2562
+ )
2563
+ return removed
2564
+
2565
+ @transactional
2566
+ def _empty_trash_subset(
2567
+ self,
2568
+ *,
2569
+ ignore_errors: bool = True,
2570
+ selected_ids: Collection[DatasetId] | None = None,
2571
+ dry_run: bool = False,
2572
+ ) -> set[ResourcePath]:
2573
+ """Empty trash table in transaction.
2574
+
2575
+ Parameters
2576
+ ----------
2577
+ ignore_errors : `bool`
2578
+ If `True` return without error even if something went wrong.
2579
+ Problems could occur if another process is simultaneously trying
2580
+ to delete.
2581
+ selected_ids : `collections.abc.collection` [`DatasetId`] or `None`
2582
+ Explicit list of dataset IDs that can be removed from the trash.
2583
+ If listed datasets are not already included in the trash table
2584
+ they will be ignored. If `None` every entry in the trash table
2585
+ will be processed.
2586
+ dry_run : `bool`, optional
2587
+ If `True`, the trash table will be queried and results reported
2588
+ but no artifacts will be removed.
2589
+
2590
+ Returns
2591
+ -------
2592
+ removed : `set` [ `lsst.resources.ResourcePath` ]
2593
+ Artifacts successfully removed.
2546
2594
 
2595
+ Notes
2596
+ -----
2597
+ Will empty the records from the trash tables only if this call finishes
2598
+ without raising.
2599
+ """
2547
2600
  # Context manager will empty trash iff we finish it without raising.
2548
2601
  # It will also automatically delete the relevant rows from the
2549
2602
  # trash table and the records table.
2550
2603
  with self.bridge.emptyTrash(
2551
- self._table, record_class=StoredFileInfo, record_column="path"
2604
+ self._table,
2605
+ record_class=StoredFileInfo,
2606
+ record_column="path",
2607
+ selected_ids=selected_ids,
2608
+ dry_run=dry_run,
2552
2609
  ) as trash_data:
2553
2610
  # Removing the artifacts themselves requires that the files are
2554
2611
  # not also associated with refs that are not to be trashed.
@@ -2596,6 +2653,8 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
2596
2653
  else:
2597
2654
  artifacts_to_keep = slow_artifacts_to_keep
2598
2655
 
2656
+ n_direct = 0
2657
+ artifacts_to_delete: set[ResourcePath] = set()
2599
2658
  for ref, info in trashed_list:
2600
2659
  # Should not happen for this implementation but need
2601
2660
  # to keep mypy happy.
@@ -2609,42 +2668,91 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
2609
2668
  # Only trashed refs still known to datastore will be returned.
2610
2669
  location = info.file_location(self.locationFactory)
2611
2670
 
2612
- # Point of no return for this artifact
2613
- log.debug("Removing artifact %s from datastore %s", location.uri, self.name)
2614
- try:
2615
- self._delete_artifact(location)
2616
- except FileNotFoundError:
2617
- # If the file itself has been deleted there is nothing
2618
- # we can do about it. It is possible that trash has
2619
- # been run in parallel in another process or someone
2620
- # decided to delete the file. It is unlikely to come
2621
- # back and so we should still continue with the removal
2622
- # of the entry from the trash table. It is also possible
2623
- # we removed it in a previous iteration if it was
2624
- # a multi-dataset artifact. The delete artifact method
2625
- # will log a debug message in this scenario.
2626
- # Distinguishing file missing before trash started and
2627
- # file already removed previously as part of this trash
2628
- # is not worth the distinction with regards to potential
2629
- # memory cost.
2630
- pass
2631
- except Exception as e:
2632
- if ignore_errors:
2633
- # Use a debug message here even though it's not
2634
- # a good situation. In some cases this can be
2635
- # caused by a race between user A and user B
2636
- # and neither of them has permissions for the
2637
- # other's files. Butler does not know about users
2638
- # and trash has no idea what collections these
2639
- # files were in (without guessing from a path).
2640
- log.debug(
2641
- "Encountered error removing artifact %s from datastore %s: %s",
2642
- location.uri,
2643
- self.name,
2644
- e,
2645
- )
2646
- else:
2647
- raise
2671
+ if location.pathInStore.isabs():
2672
+ n_direct += 1
2673
+ continue
2674
+
2675
+ # Strip fragment before storing since it is the artifact
2676
+ # we are deleting and we do not want repeats for every member
2677
+ # in a zip.
2678
+ artifacts_to_delete.add(location.uri.replace(fragment=""))
2679
+
2680
+ if n_direct > 0:
2681
+ s = "s" if n_direct != 1 else ""
2682
+ log.verbose("Not deleting %d artifact%s using absolute URI%s", n_direct, s, s)
2683
+
2684
+ if artifacts_to_keep:
2685
+ log.verbose(
2686
+ "%d artifact%s were not deleted because they are associated with other datasets",
2687
+ len(artifacts_to_keep),
2688
+ "s" if len(artifacts_to_keep) != 1 else "",
2689
+ )
2690
+
2691
+ if not artifacts_to_delete:
2692
+ return set()
2693
+
2694
+ # Now do the deleting. Special case the log message for a single
2695
+ # artifact.
2696
+ if len(artifacts_to_delete) == 1:
2697
+ log.verbose(
2698
+ "%s removing file artifact %s from datastore %s",
2699
+ "Would be" if dry_run else "Now",
2700
+ list(artifacts_to_delete)[0],
2701
+ self.name,
2702
+ )
2703
+ else:
2704
+ log.verbose(
2705
+ "%s removing %d file artifacts from datastore %s",
2706
+ "Would be" if dry_run else "Now",
2707
+ len(artifacts_to_delete),
2708
+ self.name,
2709
+ )
2710
+
2711
+ # For dry-run mode do not attempt to search the file store for
2712
+ # the artifacts to determine whether they exist or not. Simply
2713
+ # report that an attempt would be made to delete them. Never
2714
+ # report direct imports.
2715
+ if dry_run:
2716
+ return artifacts_to_delete
2717
+
2718
+ # Now remove the actual file artifacts.
2719
+ remove_result = ResourcePath.mremove(artifacts_to_delete, do_raise=False)
2720
+
2721
+ removed: set[ResourcePath] = set()
2722
+ exceptions: list[Exception] = []
2723
+ for uri, result in remove_result.items():
2724
+ if result.exception is None or isinstance(result.exception, FileNotFoundError):
2725
+ # File not existing is not an error since some other
2726
+ # process might have been trying to clean it and we do not
2727
+ # want to raise an error for a situation where the file
2728
+ # is not there and we do not want it to be there.
2729
+ removed.add(uri)
2730
+ else:
2731
+ exceptions.append(result.exception)
2732
+
2733
+ if exceptions:
2734
+ s_err = "s" if len(exceptions) != 1 else ""
2735
+ e = ExceptionGroup(f"Error{s_err} removing {len(exceptions)} artifact{s_err}", exceptions)
2736
+ if ignore_errors:
2737
+ # Use a debug message here even though it's not
2738
+ # a good situation. In some cases this can be
2739
+ # caused by a race between user A and user B
2740
+ # and neither of them has permissions for the
2741
+ # other's files. Butler does not know about users
2742
+ # and trash has no idea what collections these
2743
+ # files were in (without guessing from a path).
2744
+ log.debug(
2745
+ "Encountered %d error%s removing %d artifact%s from datastore %s: %s",
2746
+ len(exceptions),
2747
+ s_err,
2748
+ len(artifacts_to_delete),
2749
+ "s" if len(artifacts_to_delete) != 1 else "",
2750
+ self.name,
2751
+ e,
2752
+ )
2753
+ else:
2754
+ raise e
2755
+ return removed
2648
2756
 
2649
2757
  @transactional
2650
2758
  def transfer_from(
@@ -33,7 +33,7 @@ __all__ = ("InMemoryDatastore", "StoredMemoryItemInfo")
33
33
 
34
34
  import logging
35
35
  import time
36
- from collections.abc import Iterable, Mapping
36
+ from collections.abc import Collection, Iterable, Mapping
37
37
  from dataclasses import dataclass
38
38
  from typing import TYPE_CHECKING, Any
39
39
  from urllib.parse import urlencode
@@ -627,13 +627,28 @@ class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]):
627
627
  with self._transaction.undoWith(f"Trash {len(ref_list)} datasets", _rollbackMoveToTrash, ref_list):
628
628
  self._trashedIds.update(ref.id for ref in ref_list)
629
629
 
630
- def emptyTrash(self, ignore_errors: bool = False) -> None:
630
+ def emptyTrash(
631
+ self, ignore_errors: bool = False, refs: Collection[DatasetRef] | None = None, dry_run: bool = False
632
+ ) -> set[ResourcePath]:
631
633
  """Remove all datasets from the trash.
632
634
 
633
635
  Parameters
634
636
  ----------
635
637
  ignore_errors : `bool`, optional
636
638
  Ignore errors.
639
+ refs : `collections.abc.Collection` [ `DatasetRef` ] or `None`
640
+ Explicit list of datasets that can be removed from trash. If listed
641
+ datasets are not already stored in the trash table they will be
642
+ ignored. If `None` every entry in the trash table will be
643
+ processed.
644
+ dry_run : `bool`, optional
645
+ If `True`, the trash table will be queried and results reported
646
+ but no artifacts will be removed.
647
+
648
+ Returns
649
+ -------
650
+ removed : `set` [ `lsst.resources.ResourcePath` ]
651
+ List of artifacts that were removed. Empty for this datastore.
637
652
 
638
653
  Notes
639
654
  -----
@@ -647,7 +662,19 @@ class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]):
647
662
  """
648
663
  log.debug("Emptying trash in datastore %s", self.name)
649
664
 
650
- for dataset_id in self._trashedIds:
665
+ trashed_ids = self._trashedIds
666
+ if refs:
667
+ selected_ids = {ref.id for ref in refs}
668
+ if selected_ids is not None:
669
+ trashed_ids = {tid for tid in trashed_ids if tid in selected_ids}
670
+
671
+ if dry_run:
672
+ log.info(
673
+ "Would attempt remove %s dataset%s.", len(trashed_ids), "s" if len(trashed_ids) != 1 else ""
674
+ )
675
+ return set()
676
+
677
+ for dataset_id in trashed_ids:
651
678
  try:
652
679
  realID, _ = self._get_dataset_info(dataset_id)
653
680
  except FileNotFoundError:
@@ -677,7 +704,8 @@ class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]):
677
704
  self._remove_stored_item_info(dataset_id)
678
705
 
679
706
  # Empty the trash table
680
- self._trashedIds = set()
707
+ self._trashedIds = self._trashedIds - trashed_ids
708
+ return set()
681
709
 
682
710
  def validateConfiguration(
683
711
  self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False
@@ -54,6 +54,7 @@ from sqlalchemy.exc import IntegrityError
54
54
  from lsst.resources import ResourcePath, ResourcePathExpression
55
55
  from lsst.utils.introspection import get_class_of
56
56
  from lsst.utils.logging import VERBOSE, getLogger
57
+ from lsst.utils.timer import time_this
57
58
 
58
59
  from .._butler import Butler
59
60
  from .._butler_config import ButlerConfig
@@ -1464,14 +1465,25 @@ class DirectButler(Butler): # numpydoc ignore=PR02
1464
1465
  refs.extend(query.datasets(dt, collections=name))
1465
1466
  with self._datastore.transaction(), self._registry.transaction():
1466
1467
  if unstore:
1467
- self._datastore.trash(refs)
1468
+ with time_this(
1469
+ _LOG, msg="Marking %d datasets for removal to clear RUN collections", args=(len(refs),)
1470
+ ):
1471
+ self._datastore.trash(refs)
1468
1472
  else:
1469
1473
  self._datastore.forget(refs)
1470
1474
  for name in names:
1471
- self._registry.removeCollection(name)
1475
+ with time_this(_LOG, msg="Removing registry entries for RUN collection %s", args=(name,)):
1476
+ self._registry.removeCollection(name)
1472
1477
  if unstore:
1473
- # Point of no return for removing artifacts
1474
- self._datastore.emptyTrash()
1478
+ # Point of no return for removing artifacts. Restrict the trash
1479
+ # emptying to the datasets from this specific collection rather
1480
+ # than everything in the trash.
1481
+ with time_this(
1482
+ _LOG,
1483
+ msg="Attempting to remove artifacts for %d datasets associated with RUN collections",
1484
+ args=(len(refs),),
1485
+ ):
1486
+ self._datastore.emptyTrash(refs=refs)
1475
1487
 
1476
1488
  def pruneDatasets(
1477
1489
  self,
@@ -1518,13 +1530,20 @@ class DirectButler(Butler): # numpydoc ignore=PR02
1518
1530
  # Registry operations.
1519
1531
  with self._datastore.transaction(), self._registry.transaction():
1520
1532
  if unstore:
1521
- self._datastore.trash(refs)
1533
+ with time_this(
1534
+ _LOG, msg="Marking %d datasets for removal to during pruning", args=(len(refs),)
1535
+ ):
1536
+ self._datastore.trash(refs)
1522
1537
  if purge:
1523
- self._registry.removeDatasets(refs)
1538
+ with time_this(_LOG, msg="Removing %d pruned datasets from registry", args=(len(refs),)):
1539
+ self._registry.removeDatasets(refs)
1524
1540
  elif disassociate:
1525
1541
  assert tags, "Guaranteed by earlier logic in this function."
1526
- for tag in tags:
1527
- self._registry.disassociate(tag, refs)
1542
+ with time_this(
1543
+ _LOG, msg="Disassociating %d datasets from tagged collections", args=(len(refs),)
1544
+ ):
1545
+ for tag in tags:
1546
+ self._registry.disassociate(tag, refs)
1528
1547
  # We've exited the Registry transaction, and apparently committed.
1529
1548
  # (if there was an exception, everything rolled back, and it's as if
1530
1549
  # nothing happened - and we never get here).
@@ -1535,8 +1554,14 @@ class DirectButler(Butler): # numpydoc ignore=PR02
1535
1554
  # deleting everything on disk and in private Datastore tables that is
1536
1555
  # in the dataset_location_trash table.
1537
1556
  if unstore:
1538
- # Point of no return for removing artifacts
1539
- self._datastore.emptyTrash()
1557
+ # Point of no return for removing artifacts. Restrict the trash
1558
+ # emptying to the refs that this call trashed.
1559
+ with time_this(
1560
+ _LOG,
1561
+ msg="Attempting to remove artifacts for %d datasets associated with pruning",
1562
+ args=(len(refs),),
1563
+ ):
1564
+ self._datastore.emptyTrash(refs=refs)
1540
1565
 
1541
1566
  @transactional
1542
1567
  def ingest_zip(self, zip_file: ResourcePathExpression, transfer: str = "auto") -> None: