erasus 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. erasus-0.1.0/PKG-INFO +547 -0
  2. erasus-0.1.0/README.md +507 -0
  3. erasus-0.1.0/erasus/__init__.py +30 -0
  4. erasus-0.1.0/erasus/certification/__init__.py +8 -0
  5. erasus-0.1.0/erasus/certification/bounds.py +197 -0
  6. erasus-0.1.0/erasus/certification/certified_removal.py +129 -0
  7. erasus-0.1.0/erasus/certification/verification.py +243 -0
  8. erasus-0.1.0/erasus/cli/__init__.py +1 -0
  9. erasus-0.1.0/erasus/cli/benchmark.py +133 -0
  10. erasus-0.1.0/erasus/cli/evaluate.py +121 -0
  11. erasus-0.1.0/erasus/cli/main.py +85 -0
  12. erasus-0.1.0/erasus/cli/unlearn.py +208 -0
  13. erasus-0.1.0/erasus/cli/visualize.py +148 -0
  14. erasus-0.1.0/erasus/core/__init__.py +33 -0
  15. erasus-0.1.0/erasus/core/base_metric.py +30 -0
  16. erasus-0.1.0/erasus/core/base_selector.py +118 -0
  17. erasus-0.1.0/erasus/core/base_strategy.py +59 -0
  18. erasus-0.1.0/erasus/core/base_unlearner.py +180 -0
  19. erasus-0.1.0/erasus/core/config.py +48 -0
  20. erasus-0.1.0/erasus/core/exceptions.py +27 -0
  21. erasus-0.1.0/erasus/core/registry.py +72 -0
  22. erasus-0.1.0/erasus/core/types.py +18 -0
  23. erasus-0.1.0/erasus/data/__init__.py +21 -0
  24. erasus-0.1.0/erasus/data/augmentation.py +233 -0
  25. erasus-0.1.0/erasus/data/datasets/__init__.py +21 -0
  26. erasus-0.1.0/erasus/data/datasets/coco.py +114 -0
  27. erasus-0.1.0/erasus/data/datasets/conceptual_captions.py +120 -0
  28. erasus-0.1.0/erasus/data/datasets/i2p.py +91 -0
  29. erasus-0.1.0/erasus/data/datasets/imagenet.py +163 -0
  30. erasus-0.1.0/erasus/data/datasets/muse.py +160 -0
  31. erasus-0.1.0/erasus/data/datasets/tofu.py +121 -0
  32. erasus-0.1.0/erasus/data/datasets/wmdp.py +101 -0
  33. erasus-0.1.0/erasus/data/datasets.py +60 -0
  34. erasus-0.1.0/erasus/data/loaders.py +70 -0
  35. erasus-0.1.0/erasus/data/multimodal.py +55 -0
  36. erasus-0.1.0/erasus/data/partitioning.py +142 -0
  37. erasus-0.1.0/erasus/data/preprocessing.py +131 -0
  38. erasus-0.1.0/erasus/data/samplers.py +129 -0
  39. erasus-0.1.0/erasus/data/splits.py +90 -0
  40. erasus-0.1.0/erasus/data/synthetic/__init__.py +9 -0
  41. erasus-0.1.0/erasus/data/synthetic/backdoor_generator.py +168 -0
  42. erasus-0.1.0/erasus/data/synthetic/bias_generator.py +220 -0
  43. erasus-0.1.0/erasus/data/synthetic/privacy_generator.py +284 -0
  44. erasus-0.1.0/erasus/data/transforms.py +53 -0
  45. erasus-0.1.0/erasus/experiments/__init__.py +9 -0
  46. erasus-0.1.0/erasus/experiments/ablation_studies.py +135 -0
  47. erasus-0.1.0/erasus/experiments/experiment_tracker.py +217 -0
  48. erasus-0.1.0/erasus/experiments/hyperparameter_search.py +140 -0
  49. erasus-0.1.0/erasus/integrations/__init__.py +8 -0
  50. erasus-0.1.0/erasus/integrations/huggingface.py +382 -0
  51. erasus-0.1.0/erasus/losses/__init__.py +7 -0
  52. erasus-0.1.0/erasus/losses/adversarial_loss.py +72 -0
  53. erasus-0.1.0/erasus/losses/contrastive.py +33 -0
  54. erasus-0.1.0/erasus/losses/custom_losses.py +75 -0
  55. erasus-0.1.0/erasus/losses/fisher_regularization.py +81 -0
  56. erasus-0.1.0/erasus/losses/kl_divergence.py +20 -0
  57. erasus-0.1.0/erasus/losses/l2_regularization.py +39 -0
  58. erasus-0.1.0/erasus/losses/mmd.py +62 -0
  59. erasus-0.1.0/erasus/losses/retain_anchor.py +33 -0
  60. erasus-0.1.0/erasus/losses/triplet_loss.py +70 -0
  61. erasus-0.1.0/erasus/metrics/__init__.py +92 -0
  62. erasus-0.1.0/erasus/metrics/accuracy.py +54 -0
  63. erasus-0.1.0/erasus/metrics/benchmarks.py +391 -0
  64. erasus-0.1.0/erasus/metrics/efficiency/__init__.py +3 -0
  65. erasus-0.1.0/erasus/metrics/efficiency/flops.py +62 -0
  66. erasus-0.1.0/erasus/metrics/efficiency/memory_usage.py +97 -0
  67. erasus-0.1.0/erasus/metrics/efficiency/speedup.py +126 -0
  68. erasus-0.1.0/erasus/metrics/efficiency/time_complexity.py +120 -0
  69. erasus-0.1.0/erasus/metrics/fid.py +139 -0
  70. erasus-0.1.0/erasus/metrics/forgetting/__init__.py +3 -0
  71. erasus-0.1.0/erasus/metrics/forgetting/activation_analysis.py +112 -0
  72. erasus-0.1.0/erasus/metrics/forgetting/backdoor_activation.py +95 -0
  73. erasus-0.1.0/erasus/metrics/forgetting/confidence.py +84 -0
  74. erasus-0.1.0/erasus/metrics/forgetting/extraction_attack.py +142 -0
  75. erasus-0.1.0/erasus/metrics/forgetting/feature_distance.py +134 -0
  76. erasus-0.1.0/erasus/metrics/forgetting/mia.py +142 -0
  77. erasus-0.1.0/erasus/metrics/forgetting/mia_variants.py +198 -0
  78. erasus-0.1.0/erasus/metrics/membership_inference.py +100 -0
  79. erasus-0.1.0/erasus/metrics/metric_suite.py +138 -0
  80. erasus-0.1.0/erasus/metrics/perplexity.py +100 -0
  81. erasus-0.1.0/erasus/metrics/privacy/__init__.py +3 -0
  82. erasus-0.1.0/erasus/metrics/privacy/differential_privacy.py +187 -0
  83. erasus-0.1.0/erasus/metrics/privacy/epsilon_delta.py +168 -0
  84. erasus-0.1.0/erasus/metrics/privacy/privacy_audit.py +234 -0
  85. erasus-0.1.0/erasus/metrics/retrieval.py +126 -0
  86. erasus-0.1.0/erasus/metrics/retrieval_metrics.py +86 -0
  87. erasus-0.1.0/erasus/metrics/utility/__init__.py +17 -0
  88. erasus-0.1.0/erasus/metrics/utility/bleu.py +153 -0
  89. erasus-0.1.0/erasus/metrics/utility/clip_score.py +109 -0
  90. erasus-0.1.0/erasus/metrics/utility/downstream_tasks.py +98 -0
  91. erasus-0.1.0/erasus/metrics/utility/inception_score.py +128 -0
  92. erasus-0.1.0/erasus/metrics/utility/rouge.py +139 -0
  93. erasus-0.1.0/erasus/models/__init__.py +5 -0
  94. erasus-0.1.0/erasus/models/audio/__init__.py +9 -0
  95. erasus-0.1.0/erasus/models/audio/clap.py +216 -0
  96. erasus-0.1.0/erasus/models/audio/wav2vec.py +206 -0
  97. erasus-0.1.0/erasus/models/audio/whisper.py +93 -0
  98. erasus-0.1.0/erasus/models/diffusion/__init__.py +9 -0
  99. erasus-0.1.0/erasus/models/diffusion/dalle.py +205 -0
  100. erasus-0.1.0/erasus/models/diffusion/diffusion_utils.py +367 -0
  101. erasus-0.1.0/erasus/models/diffusion/imagen.py +208 -0
  102. erasus-0.1.0/erasus/models/diffusion/stable_diffusion.py +76 -0
  103. erasus-0.1.0/erasus/models/llm/__init__.py +9 -0
  104. erasus-0.1.0/erasus/models/llm/bert.py +87 -0
  105. erasus-0.1.0/erasus/models/llm/gpt.py +54 -0
  106. erasus-0.1.0/erasus/models/llm/llama.py +80 -0
  107. erasus-0.1.0/erasus/models/llm/mistral.py +53 -0
  108. erasus-0.1.0/erasus/models/llm/t5.py +248 -0
  109. erasus-0.1.0/erasus/models/model_wrapper.py +117 -0
  110. erasus-0.1.0/erasus/models/registry.py +8 -0
  111. erasus-0.1.0/erasus/models/video/__init__.py +8 -0
  112. erasus-0.1.0/erasus/models/video/video_clip.py +222 -0
  113. erasus-0.1.0/erasus/models/video/videomae.py +90 -0
  114. erasus-0.1.0/erasus/models/vlm/__init__.py +11 -0
  115. erasus-0.1.0/erasus/models/vlm/blip.py +56 -0
  116. erasus-0.1.0/erasus/models/vlm/clip.py +217 -0
  117. erasus-0.1.0/erasus/models/vlm/flamingo.py +218 -0
  118. erasus-0.1.0/erasus/models/vlm/llava.py +123 -0
  119. erasus-0.1.0/erasus/models/vlm/vision_transformer.py +285 -0
  120. erasus-0.1.0/erasus/privacy/__init__.py +16 -0
  121. erasus-0.1.0/erasus/privacy/accountant.py +79 -0
  122. erasus-0.1.0/erasus/privacy/certificates.py +52 -0
  123. erasus-0.1.0/erasus/privacy/dp_mechanisms.py +29 -0
  124. erasus-0.1.0/erasus/privacy/gradient_clipping.py +302 -0
  125. erasus-0.1.0/erasus/privacy/influence_bounds.py +54 -0
  126. erasus-0.1.0/erasus/privacy/secure_aggregation.py +329 -0
  127. erasus-0.1.0/erasus/selectors/__init__.py +66 -0
  128. erasus-0.1.0/erasus/selectors/auto_selector.py +56 -0
  129. erasus-0.1.0/erasus/selectors/ensemble/__init__.py +6 -0
  130. erasus-0.1.0/erasus/selectors/ensemble/voting.py +70 -0
  131. erasus-0.1.0/erasus/selectors/ensemble/weighted_fusion.py +121 -0
  132. erasus-0.1.0/erasus/selectors/full_selector.py +21 -0
  133. erasus-0.1.0/erasus/selectors/geometry_based/__init__.py +1 -0
  134. erasus-0.1.0/erasus/selectors/geometry_based/craig.py +15 -0
  135. erasus-0.1.0/erasus/selectors/geometry_based/glister.py +75 -0
  136. erasus-0.1.0/erasus/selectors/geometry_based/herding.py +84 -0
  137. erasus-0.1.0/erasus/selectors/geometry_based/k_center.py +15 -0
  138. erasus-0.1.0/erasus/selectors/geometry_based/kcenter.py +42 -0
  139. erasus-0.1.0/erasus/selectors/geometry_based/kmeans_coreset.py +68 -0
  140. erasus-0.1.0/erasus/selectors/geometry_based/submodular.py +92 -0
  141. erasus-0.1.0/erasus/selectors/gradient_based/__init__.py +1 -0
  142. erasus-0.1.0/erasus/selectors/gradient_based/el2n.py +77 -0
  143. erasus-0.1.0/erasus/selectors/gradient_based/forgetting_score.py +21 -0
  144. erasus-0.1.0/erasus/selectors/gradient_based/grad_match.py +99 -0
  145. erasus-0.1.0/erasus/selectors/gradient_based/gradient_norm.py +133 -0
  146. erasus-0.1.0/erasus/selectors/gradient_based/influence.py +179 -0
  147. erasus-0.1.0/erasus/selectors/gradient_based/representer.py +93 -0
  148. erasus-0.1.0/erasus/selectors/gradient_based/tracin.py +155 -0
  149. erasus-0.1.0/erasus/selectors/learning_based/__init__.py +1 -0
  150. erasus-0.1.0/erasus/selectors/learning_based/active_learning.py +184 -0
  151. erasus-0.1.0/erasus/selectors/learning_based/data_shapley.py +45 -0
  152. erasus-0.1.0/erasus/selectors/learning_based/forgetting_events.py +52 -0
  153. erasus-0.1.0/erasus/selectors/learning_based/loss_accum.py +67 -0
  154. erasus-0.1.0/erasus/selectors/learning_based/valuation_network.py +71 -0
  155. erasus-0.1.0/erasus/selectors/quality_metrics.py +240 -0
  156. erasus-0.1.0/erasus/selectors/random_selector.py +38 -0
  157. erasus-0.1.0/erasus/strategies/__init__.py +90 -0
  158. erasus-0.1.0/erasus/strategies/data_methods/__init__.py +1 -0
  159. erasus-0.1.0/erasus/strategies/data_methods/amnesiac.py +119 -0
  160. erasus-0.1.0/erasus/strategies/data_methods/certified_removal.py +209 -0
  161. erasus-0.1.0/erasus/strategies/data_methods/knowledge_distillation.py +150 -0
  162. erasus-0.1.0/erasus/strategies/data_methods/sisa.py +66 -0
  163. erasus-0.1.0/erasus/strategies/diffusion_specific/__init__.py +1 -0
  164. erasus-0.1.0/erasus/strategies/diffusion_specific/concept_erasure.py +109 -0
  165. erasus-0.1.0/erasus/strategies/diffusion_specific/noise_injection.py +111 -0
  166. erasus-0.1.0/erasus/strategies/diffusion_specific/safe_latents.py +151 -0
  167. erasus-0.1.0/erasus/strategies/diffusion_specific/timestep_masking.py +144 -0
  168. erasus-0.1.0/erasus/strategies/diffusion_specific/unet_surgery.py +100 -0
  169. erasus-0.1.0/erasus/strategies/ensemble_strategy.py +127 -0
  170. erasus-0.1.0/erasus/strategies/gradient_methods/__init__.py +6 -0
  171. erasus-0.1.0/erasus/strategies/gradient_methods/fisher_forgetting.py +154 -0
  172. erasus-0.1.0/erasus/strategies/gradient_methods/gradient_ascent.py +100 -0
  173. erasus-0.1.0/erasus/strategies/gradient_methods/modality_decoupling.py +215 -0
  174. erasus-0.1.0/erasus/strategies/gradient_methods/negative_gradient.py +76 -0
  175. erasus-0.1.0/erasus/strategies/gradient_methods/saliency_unlearning.py +177 -0
  176. erasus-0.1.0/erasus/strategies/gradient_methods/scrub.py +116 -0
  177. erasus-0.1.0/erasus/strategies/llm_specific/__init__.py +8 -0
  178. erasus-0.1.0/erasus/strategies/llm_specific/attention_surgery.py +139 -0
  179. erasus-0.1.0/erasus/strategies/llm_specific/causal_tracing.py +97 -0
  180. erasus-0.1.0/erasus/strategies/llm_specific/embedding_alignment.py +102 -0
  181. erasus-0.1.0/erasus/strategies/llm_specific/ssd.py +110 -0
  182. erasus-0.1.0/erasus/strategies/llm_specific/token_masking.py +80 -0
  183. erasus-0.1.0/erasus/strategies/parameter_methods/__init__.py +1 -0
  184. erasus-0.1.0/erasus/strategies/parameter_methods/layer_freezing.py +162 -0
  185. erasus-0.1.0/erasus/strategies/parameter_methods/lora_unlearning.py +150 -0
  186. erasus-0.1.0/erasus/strategies/parameter_methods/mask_based.py +156 -0
  187. erasus-0.1.0/erasus/strategies/parameter_methods/neuron_pruning.py +129 -0
  188. erasus-0.1.0/erasus/strategies/parameter_methods/sparse_aware.py +92 -0
  189. erasus-0.1.0/erasus/strategies/vlm_specific/__init__.py +5 -0
  190. erasus-0.1.0/erasus/strategies/vlm_specific/attention_unlearning.py +113 -0
  191. erasus-0.1.0/erasus/strategies/vlm_specific/contrastive_unlearning.py +122 -0
  192. erasus-0.1.0/erasus/strategies/vlm_specific/cross_modal_decoupling.py +5 -0
  193. erasus-0.1.0/erasus/strategies/vlm_specific/vision_text_split.py +237 -0
  194. erasus-0.1.0/erasus/unlearners/__init__.py +25 -0
  195. erasus-0.1.0/erasus/unlearners/audio_unlearner.py +101 -0
  196. erasus-0.1.0/erasus/unlearners/diffusion_unlearner.py +123 -0
  197. erasus-0.1.0/erasus/unlearners/erasus_unlearner.py +83 -0
  198. erasus-0.1.0/erasus/unlearners/federated_unlearner.py +253 -0
  199. erasus-0.1.0/erasus/unlearners/llm_unlearner.py +128 -0
  200. erasus-0.1.0/erasus/unlearners/multimodal_unlearner.py +118 -0
  201. erasus-0.1.0/erasus/unlearners/video_unlearner.py +101 -0
  202. erasus-0.1.0/erasus/unlearners/vlm_unlearner.py +138 -0
  203. erasus-0.1.0/erasus/utils/__init__.py +64 -0
  204. erasus-0.1.0/erasus/utils/callbacks.py +154 -0
  205. erasus-0.1.0/erasus/utils/checkpointing.py +38 -0
  206. erasus-0.1.0/erasus/utils/distributed.py +102 -0
  207. erasus-0.1.0/erasus/utils/early_stopping.py +83 -0
  208. erasus-0.1.0/erasus/utils/helpers.py +130 -0
  209. erasus-0.1.0/erasus/utils/logging.py +36 -0
  210. erasus-0.1.0/erasus/utils/profiling.py +347 -0
  211. erasus-0.1.0/erasus/utils/reproducibility.py +317 -0
  212. erasus-0.1.0/erasus/utils/seed.py +21 -0
  213. erasus-0.1.0/erasus/version.py +4 -0
  214. erasus-0.1.0/erasus/visualization/__init__.py +39 -0
  215. erasus-0.1.0/erasus/visualization/activation.py +201 -0
  216. erasus-0.1.0/erasus/visualization/attention.py +158 -0
  217. erasus-0.1.0/erasus/visualization/comparisons.py +165 -0
  218. erasus-0.1.0/erasus/visualization/cross_modal.py +327 -0
  219. erasus-0.1.0/erasus/visualization/embeddings.py +184 -0
  220. erasus-0.1.0/erasus/visualization/feature_plots.py +79 -0
  221. erasus-0.1.0/erasus/visualization/gradients.py +108 -0
  222. erasus-0.1.0/erasus/visualization/influence_maps.py +201 -0
  223. erasus-0.1.0/erasus/visualization/interactive.py +88 -0
  224. erasus-0.1.0/erasus/visualization/loss_curves.py +42 -0
  225. erasus-0.1.0/erasus/visualization/mia_plots.py +92 -0
  226. erasus-0.1.0/erasus/visualization/reports.py +110 -0
  227. erasus-0.1.0/erasus/visualization/surfaces.py +202 -0
  228. erasus-0.1.0/erasus.egg-info/PKG-INFO +547 -0
  229. erasus-0.1.0/erasus.egg-info/SOURCES.txt +240 -0
  230. erasus-0.1.0/erasus.egg-info/dependency_links.txt +1 -0
  231. erasus-0.1.0/erasus.egg-info/entry_points.txt +2 -0
  232. erasus-0.1.0/erasus.egg-info/requires.txt +28 -0
  233. erasus-0.1.0/erasus.egg-info/top_level.txt +1 -0
  234. erasus-0.1.0/pyproject.toml +70 -0
  235. erasus-0.1.0/setup.cfg +4 -0
  236. erasus-0.1.0/tests/test_advanced.py +40 -0
  237. erasus-0.1.0/tests/test_components.py +56 -0
  238. erasus-0.1.0/tests/test_core.py +124 -0
  239. erasus-0.1.0/tests/test_end_to_end.py +222 -0
  240. erasus-0.1.0/tests/test_imports.py +60 -0
  241. erasus-0.1.0/tests/test_integration.py +231 -0
  242. erasus-0.1.0/tests/test_selectors.py +45 -0
erasus-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,547 @@
1
+ Metadata-Version: 2.4
2
+ Name: erasus
3
+ Version: 0.1.0
4
+ Summary: Efficient Representative And Surgical Unlearning Selection — Universal Machine Unlearning via Coreset Selection
5
+ Author-email: Avaya Aggarwal <aggarwal.avaya27@gmail.com>
6
+ License: MIT
7
+ Keywords: machine-unlearning,coreset,foundation-models,privacy,pytorch
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
+ Requires-Python: >=3.9
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: torch>=2.0
16
+ Requires-Dist: numpy>=1.24
17
+ Requires-Dist: Pillow>=9.0
18
+ Requires-Dist: tqdm>=4.60
19
+ Requires-Dist: pyyaml>=6.0
20
+ Requires-Dist: transformers>=4.30
21
+ Provides-Extra: full
22
+ Requires-Dist: diffusers>=0.20; extra == "full"
23
+ Requires-Dist: opacus>=1.3; extra == "full"
24
+ Requires-Dist: datasets>=2.14; extra == "full"
25
+ Requires-Dist: scikit-learn>=1.2; extra == "full"
26
+ Requires-Dist: matplotlib>=3.7; extra == "full"
27
+ Requires-Dist: seaborn>=0.12; extra == "full"
28
+ Requires-Dist: wandb>=0.15; extra == "full"
29
+ Requires-Dist: peft>=0.5; extra == "full"
30
+ Requires-Dist: huggingface_hub>=0.20; extra == "full"
31
+ Provides-Extra: hub
32
+ Requires-Dist: huggingface_hub>=0.20; extra == "hub"
33
+ Requires-Dist: datasets>=2.14; extra == "hub"
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest>=7.4; extra == "dev"
36
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
37
+ Requires-Dist: ruff>=0.1; extra == "dev"
38
+ Requires-Dist: mypy>=1.5; extra == "dev"
39
+ Requires-Dist: pre-commit>=3.4; extra == "dev"
40
+
41
+ <p align="center">
42
+ <h1 align="center">👻 Erasus</h1>
43
+ <p align="center">
44
+ <strong>Efficient Representative And Surgical Unlearning Selection</strong><br>
45
+ Universal Machine Unlearning via Coreset Selection
46
+ </p>
47
+ <p align="center">
48
+ <a href="#-quick-start"><img src="https://img.shields.io/badge/python-3.9+-blue.svg" alt="Python 3.9+"></a>
49
+ <a href="#-installation"><img src="https://img.shields.io/badge/pytorch-2.0+-ee4c2c.svg" alt="PyTorch 2.0+"></a>
50
+ <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-green.svg" alt="License: MIT"></a>
51
+ <a href="#-test-status"><img src="https://img.shields.io/badge/tests-87%20passed-brightgreen.svg" alt="Tests"></a>
52
+ <a href="#-supported-models"><img src="https://img.shields.io/badge/models-10%20architectures-purple.svg" alt="Models"></a>
53
+ <a href="#-strategies--selectors"><img src="https://img.shields.io/badge/strategies-27%20methods-orange.svg" alt="Strategies"></a>
54
+ </p>
55
+ </p>
56
+
57
+ ---
58
+
59
+ **Erasus** is a research-grade Python framework for **Machine Unlearning** across all major foundation model types. It surgically removes specific data, concepts, or behaviors from trained models — without the computational cost of full retraining.
60
+
61
+ It supports **Vision-Language Models**, **Large Language Models**, **Diffusion Models**, **Audio Models**, and **Video Models** through a unified API backed by 27 unlearning strategies, 19 coreset selectors, 7 loss functions, and a comprehensive evaluation suite with 15+ metrics.
62
+
63
+ ---
64
+
65
+ ## 🧠 How It Works
66
+
67
+ Erasus operates in a three-stage pipeline:
68
+
69
+ ```
70
+ ┌──────────────────────┐ ┌──────────────────────┐ ┌──────────────────────┐
71
+ │ 1. CORESET SELECTION │────▶│ 2. TARGETED │────▶│ 3. EVALUATION & │
72
+ │ │ │ UNLEARNING │ │ CERTIFICATION │
73
+ │ Pick the minimal │ │ │ │ │
74
+ │ set of samples that │ │ Apply gradient ascent,│ │ MIA, accuracy, │
75
+ │ define forgetting │ │ Fisher, SCRUB, LoRA, │ │ perplexity, FID, │
76
+ │ "support vectors" │ │ or 16+ other methods │ │ certified removal │
77
+ └──────────────────────┘ └──────────────────────┘ └──────────────────────┘
78
+ ```
79
+
80
+ **Key Innovation:** Geometry-aware coreset selection identifies the *"support vectors of forgetting"* — proving that unlearning k% of the most influential samples approximates unlearning 100% with bounded utility loss.
81
+
82
+ ---
83
+
84
+ ## ⚡ Key Features
85
+
86
+ | Feature | Description |
87
+ |---------|-------------|
88
+ | 🎯 **Coreset-Driven Forgetting** | 19 coreset selectors (influence functions, CRAIG, herding, k-center, EL2N, TracIn, Data Shapley) reduce compute by up to 90% |
89
+ | 🧩 **Ensemble Unlearning** | Combine strategies sequentially or via weight averaging for robust forgetting |
90
+ | 📷📝 **Multimodal Decoupling** | Unlearn image-text associations without breaking visual or textual generalization |
91
+ | 🛡️ **Utility Preservation** | Retain-Anchor loss + Fisher regularization constrain model drift on safe data |
92
+ | 🔐 **Certified Removal** | Formal (ε, δ)-removal verification with PAC-style guarantees |
93
+ | 📊 **Integrated Evaluation** | MIA, confidence, feature distance, perplexity, FID, activation analysis, backdoor detection, 15+ metrics |
94
+ | 📈 **Visualization Suite** | Loss landscapes, embedding plots, gradient flow, interactive Plotly dashboards, HTML reports |
95
+ | 🔌 **Model Agnostic** | Works with any PyTorch model + HuggingFace Transformers |
96
+ | 🖥️ **CLI + Python API** | `erasus unlearn`, `erasus benchmark`, `erasus visualize`, or full Python API |
97
+ | 🧪 **Experiment Tracking** | Built-in W&B, MLflow, local JSON tracking + HPO with Optuna |
98
+ | 📐 **Theoretical Bounds** | PAC-learning utility bounds, influence bounds, certified unlearning radius |
99
+
100
+ ---
101
+
102
+ ## 🏗️ Supported Models
103
+
104
+ | Modality | Models | Unlearner |
105
+ |----------|--------|-----------|
106
+ | **Vision-Language** | CLIP, LLaVA, BLIP-2 | `VLMUnlearner` |
107
+ | **Language** | LLaMA, Mistral, GPT-2/J, BERT | `LLMUnlearner` |
108
+ | **Diffusion** | Stable Diffusion 1.x/2.x/XL | `DiffusionUnlearner` |
109
+ | **Audio** | Whisper | `AudioUnlearner` |
110
+ | **Video** | VideoMAE | `VideoUnlearner` |
111
+ | **Any** | Auto-detect | `MultimodalUnlearner` |
112
+
113
+ ---
114
+
115
+ ## 📦 Installation
116
+
117
+ ```bash
118
+ # From PyPI (once published)
119
+ pip install erasus
120
+ pip install erasus[full] # with diffusers, datasets, wandb, etc.
121
+ pip install erasus[hub] # Hugging Face Hub push/pull
122
+
123
+ # From source (development)
124
+ git clone https://github.com/OnePunchMonk/erasus.git
125
+ cd erasus
126
+ pip install -e .
127
+
128
+ # With all optional dependencies
129
+ pip install -e ".[full]"
130
+
131
+ # Hugging Face Hub (push/pull unlearned models)
132
+ pip install -e ".[hub]"
133
+
134
+ # Development
135
+ pip install -e ".[dev]"
136
+ ```
137
+
138
+ ### Quick Setup Script
139
+ ```bash
140
+ bash scripts/setup_env.sh # CPU
141
+ bash scripts/setup_env.sh --gpu # CUDA 12.1
142
+ ```
143
+
144
+ ### Docker
145
+ ```bash
146
+ docker compose -f docker/docker-compose.yml up test # Run tests
147
+ docker compose -f docker/docker-compose.yml run dev # Dev shell
148
+ docker compose -f docker/docker-compose.yml up benchmark # GPU benchmarks
149
+ ```
150
+
151
+ ---
152
+
153
+ ## 🚀 Quick Start
154
+
155
+ ### Python API
156
+
157
+ ```python
158
+ from erasus.unlearners import ErasusUnlearner
159
+
160
+ # 1. Load your model
161
+ model = ... # Any PyTorch model
162
+
163
+ # 2. Create unlearner
164
+ unlearner = ErasusUnlearner(
165
+ model=model,
166
+ strategy="gradient_ascent", # 27 strategies available
167
+ selector="influence", # 19 selectors available
168
+ device="cuda",
169
+ )
170
+
171
+ # 3. Unlearn
172
+ result = unlearner.fit(
173
+ forget_data=forget_loader, # Data to remove
174
+ retain_data=retain_loader, # Data to preserve
175
+ prune_ratio=0.1, # Use top 10% coreset
176
+ epochs=5,
177
+ )
178
+
179
+ # 4. Evaluate
180
+ metrics = unlearner.evaluate(
181
+ forget_data=forget_loader,
182
+ retain_data=retain_loader,
183
+ )
184
+ print(f"MIA AUC: {metrics['mia_auc']:.4f}") # Should → 0.5
185
+ ```
186
+
187
+ ### Modality-Specific Unlearners
188
+
189
+ ```python
190
+ from erasus.unlearners import VLMUnlearner, LLMUnlearner, DiffusionUnlearner
191
+
192
+ # CLIP: Remove NSFW concepts
193
+ vlm = VLMUnlearner(model=clip_model, strategy="modality_decoupling")
194
+ vlm.fit(forget_data=nsfw_loader, retain_data=safe_loader)
195
+
196
+ # LLaMA: Remove hazardous knowledge
197
+ llm = LLMUnlearner(model=llama_model, strategy="gradient_ascent")
198
+ llm.fit(forget_data=harmful_loader, retain_data=benign_loader)
199
+
200
+ # Stable Diffusion: Remove artist styles
201
+ diff = DiffusionUnlearner(model=sd_model, strategy="concept_erasure")
202
+ diff.fit(forget_data=artist_loader, retain_data=general_loader)
203
+ ```
204
+
205
+ ### Auto-Detect Model Type
206
+
207
+ ```python
208
+ from erasus.unlearners import MultimodalUnlearner
209
+
210
+ # Automatically picks the right unlearner
211
+ unlearner = MultimodalUnlearner.from_model(your_model)
212
+ ```
213
+
214
+ ### CLI
215
+
216
+ ```bash
217
+ # Run unlearning
218
+ erasus unlearn --config configs/default.yaml
219
+
220
+ # Evaluate results
221
+ erasus evaluate --config configs/default.yaml --checkpoint model.pt
222
+
223
+ # Run benchmarks
224
+ erasus benchmark --strategies gradient_ascent,scrub --selectors random,influence
225
+
226
+ # Generate visualizations
227
+ erasus visualize --type embeddings --method tsne --output embeddings.png
228
+ erasus visualize --type comparison --output comparison.png
229
+ erasus visualize --type report --output report.html
230
+ ```
231
+
232
+ ---
233
+
234
+ ## 🔧 Strategies & Selectors
235
+
236
+ ### Unlearning Strategies (27)
237
+
238
+ | Category | Strategies |
239
+ |----------|------------|
240
+ | **Gradient Methods** | Gradient Ascent, SCRUB (CVPR 2024), Fisher Forgetting, Negative Gradient, Modality Decoupling, **Saliency Unlearning** |
241
+ | **Parameter Methods** | LoRA Unlearning, Sparse-Aware, Mask-Based, Neuron Pruning, **Layer Freezing** |
242
+ | **Data Methods** | Amnesiac ML, SISA, Certified Removal, **Knowledge Distillation** |
243
+ | **LLM-Specific** | SSD (NeurIPS 2024), Token Masking, Embedding Alignment, Causal Tracing, **Attention Surgery** |
244
+ | **Diffusion-Specific** | Concept Erasure (ICCV 2023), Noise Injection, U-Net Surgery, **Timestep Masking**, **Safe Latents** |
245
+ | **VLM-Specific** | Contrastive Unlearning, Cross-Modal Decoupling, **Attention Unlearning** |
246
+ | **Ensemble** | Sequential / Averaged multi-strategy combination |
247
+
248
+ ### Coreset Selectors (19)
249
+
250
+ | Category | Selectors |
251
+ |----------|-----------|
252
+ | **Gradient-Based** | Influence Functions, TracIn, Gradient Norm, GradMatch/CRAIG, EL2N, Representer |
253
+ | **Geometry-Based** | k-Center, Herding, GLISTER, Submodular, k-Means++ |
254
+ | **Learning-Based** | Forgetting Events, Data Shapley, Valuation Network |
255
+ | **Ensemble** | Voting Selector, Auto-Selector |
256
+
257
+ ---
258
+
259
+ ## 📊 Evaluation & Metrics
260
+
261
+ ```python
262
+ from erasus.metrics import MetricSuite
263
+
264
+ suite = MetricSuite(["accuracy", "mia", "perplexity"])
265
+ results = suite.run(model, forget_loader, retain_loader)
266
+ ```
267
+
268
+ | Category | Metrics |
269
+ |----------|---------|
270
+ | **Forgetting** | MIA (+ LiRA, LOSS variants), Confidence, Feature Distance, **Activation Analysis**, **Backdoor ASR** |
271
+ | **Utility** | Accuracy, Perplexity, Retrieval (R@1/5/10), FID |
272
+ | **Efficiency** | Time Complexity, Memory Usage, **Speedup Ratio**, **FLOPs Estimation** |
273
+ | **Privacy** | Differential Privacy (ε, δ) |
274
+
275
+ ---
276
+
277
+ ## 📈 Visualization
278
+
279
+ ```python
280
+ from erasus.visualization import (
281
+ EmbeddingVisualizer,
282
+ LossLandscapeVisualizer,
283
+ GradientVisualizer,
284
+ ReportGenerator,
285
+ )
286
+ from erasus.visualization.attention import AttentionVisualizer
287
+ from erasus.visualization.comparisons import ComparisonVisualizer
288
+
289
+ # t-SNE / PCA embeddings
290
+ viz = EmbeddingVisualizer(model)
291
+ viz.plot(data_loader, method="tsne")
292
+
293
+ # Loss landscape
294
+ landscape = LossLandscapeVisualizer(model)
295
+ landscape.plot_2d_contour(data_loader)
296
+
297
+ # Attention heatmaps (before vs. after)
298
+ attn_viz = AttentionVisualizer(model_after)
299
+ attn_viz.plot_attention_comparison(inputs, model_before)
300
+
301
+ # Before/after comparisons
302
+ comp = ComparisonVisualizer()
303
+ comp.plot_prediction_shift(model_before, model_after, forget_loader)
304
+ comp.plot_metric_comparison(metrics_before, metrics_after)
305
+
306
+ # HTML report
307
+ report = ReportGenerator("Unlearning Report")
308
+ report.add_metrics(metrics)
309
+ report.save("report.html")
310
+ ```
311
+
312
+ ---
313
+
314
+ ## 🔐 Certification & Privacy
315
+
316
+ ```python
317
+ from erasus.certification import CertifiedRemovalVerifier, UnlearningVerifier
318
+
319
+ # Formal (ε, δ)-removal verification
320
+ verifier = CertifiedRemovalVerifier(epsilon=1.0, delta=1e-5)
321
+ result = verifier.verify(unlearned_model, retrained_model, n_total=10000, n_forget=500)
322
+ print(f"Certified: {result['certified']}")
323
+
324
+ # Statistical verification
325
+ stat_verifier = UnlearningVerifier(significance=0.05)
326
+ tests = stat_verifier.verify_all(model, forget_loader, retain_loader)
327
+ ```
328
+
329
+ ### Theoretical Bounds
330
+
331
+ ```python
332
+ from erasus.certification.bounds import TheoreticalBounds
333
+
334
+ # PAC-learning utility bound
335
+ bounds = TheoreticalBounds.pac_utility_bound(
336
+ n_total=50000, n_forget=500, n_retain=49500, delta=0.05, model=model,
337
+ )
338
+ print(f"Utility drop bound: {bounds['pac_utility_drop_bound']:.4f}")
339
+
340
+ # Certified unlearning radius
341
+ radius = TheoreticalBounds.unlearning_radius(
342
+ epsilon=1.0, delta=1e-5, n_forget=500,
343
+ )
344
+ print(f"Certified radius: {radius['certified_radius']:.4f}")
345
+ ```
346
+
347
+ ---
348
+
349
+ ## 📉 Loss Functions
350
+
351
+ | Loss | Description |
352
+ |------|-------------|
353
+ | **Retain Anchor** | Cross-entropy on retain data to preserve utility |
354
+ | **Contrastive** | CLIP-style contrastive loss for VLM alignment |
355
+ | **KL Divergence** | Distribution matching between models |
356
+ | **MMD** | Maximum Mean Discrepancy for distribution comparison |
357
+ | **Fisher Regularization** | Fisher information-weighted parameter penalty |
358
+ | **Adversarial** | GAN-style loss for indistinguishable forget/retain outputs |
359
+ | **Triplet** | Push forget embeddings away from retain-set anchors |
360
+ | **L2 Regularization** | Simple weight-drift penalty |
361
+
362
+ ---
363
+
364
+ ## 🧪 Experiment Tracking
365
+
366
+ ```python
367
+ from erasus.experiments import ExperimentTracker, HyperparameterSearch, AblationStudy
368
+
369
+ # Supports: "local", "wandb", "mlflow"
370
+ with ExperimentTracker("clip_unlearning", backend="wandb") as tracker:
371
+ tracker.log_config({"strategy": "gradient_ascent", "lr": 1e-4})
372
+ result = unlearner.fit(...)
373
+ tracker.log_metrics({"mia_auc": 0.52, "accuracy": 0.94})
374
+ tracker.log_model(model)
375
+
376
+ # Hyperparameter search (Optuna or random fallback)
377
+ search = HyperparameterSearch(
378
+ objective_fn=my_objective,
379
+ param_space={"lr": {"type": "float", "low": 1e-5, "high": 1e-2, "log": True}},
380
+ n_trials=50,
381
+ )
382
+ best = search.run()
383
+
384
+ # Ablation studies
385
+ ablation = AblationStudy(base_config={...}, run_fn=run_trial)
386
+ ablation.run_full_ablation({"lr": [1e-3, 1e-4, 1e-5], "strategy": ["ga", "scrub"]})
387
+ print(ablation.summary())
388
+ ```
389
+
390
+ ---
391
+
392
+ ## 📁 Project Structure
393
+
394
+ ```
395
+ erasus/
396
+ ├── core/ # Base classes, registry, config, types
397
+ ├── unlearners/ # High-level API (7 modality-specific unlearners)
398
+ ├── strategies/ # 27 unlearning algorithms (gradient, parameter, data, LLM, diffusion, VLM, ensemble)
399
+ ├── selectors/ # 19 coreset selection methods (gradient, geometry, learning, ensemble)
400
+ ├── metrics/ # 15+ evaluation metrics (forgetting, utility, efficiency, privacy)
401
+ ├── losses/ # 8 loss functions (retain-anchor, Fisher, adversarial, triplet, KL, MMD, L2)
402
+ ├── visualization/ # Embeddings, loss surfaces, gradients, attention heatmaps, comparisons, reports
403
+ ├── data/ # Dataset loaders (TOFU, WMDP, COCO, I2P, CC), preprocessing, partitioning
404
+ ├── models/ # 10 model wrappers (VLM, LLM, diffusion, audio, video)
405
+ ├── privacy/ # DP mechanisms, privacy accountant, certificates
406
+ ├── certification/ # Certified removal, statistical verification, theoretical bounds
407
+ ├── experiments/ # W&B / MLflow / local tracking, HPO, ablation studies
408
+ ├── cli/ # Command-line interface (unlearn, evaluate, benchmark, visualize)
409
+ └── utils/ # Checkpointing, distributed, helpers, logging, callbacks, early stopping
410
+ ```
411
+
412
+ ---
413
+
414
+ ## 🏆 Benchmarks
415
+
416
+ Run standardized benchmarks:
417
+
418
+ ```bash
419
+ # TOFU Benchmark (LLM unlearning)
420
+ python benchmarks/tofu/run.py --strategies gradient_ascent,scrub --epochs 5
421
+
422
+ # WMDP Benchmark (hazardous knowledge)
423
+ python benchmarks/wmdp/run.py --subsets bio,cyber
424
+
425
+ # Full suite
426
+ bash scripts/run_benchmarks.sh
427
+ ```
428
+
429
+ ---
430
+
431
+ ## 🧑‍💻 Examples
432
+
433
+ | Example | Description |
434
+ |---------|-------------|
435
+ | [CLIP Coreset Comparison](examples/vision_language/clip_coreset_comparison.py) | Compare random vs. gradient_norm selectors |
436
+ | [LLaVA Unlearning](examples/vision_language/llava_unlearning.py) | VLM unlearning with gradient ascent |
437
+ | [LLaMA Concept Removal](examples/language_models/llama_concept_removal.py) | Remove concepts from LLaMA |
438
+ | [GPT-2 Strategy Comparison](examples/language_models/gpt2_unlearning.py) | Compare gradient_ascent vs. negative_gradient |
439
+ | [LoRA Efficient Unlearning](examples/language_models/lora_efficient_unlearning.py) | Parameter-efficient unlearning |
440
+ | [SD NSFW Removal](examples/diffusion_models/stable_diffusion_nsfw.py) | Remove NSFW concepts |
441
+ | [SD Artist Removal](examples/diffusion_models/stable_diffusion_artist.py) | Remove artist styles |
442
+ | [TOFU Benchmark](examples/benchmarks/run_tofu_benchmark.py) | End-to-end benchmark |
443
+
444
+ ---
445
+
446
+ ## ✅ Test Status
447
+
448
+ ```
449
+ 87 tests passed ✅ | 0 failed | 26s
450
+ ```
451
+
452
+ ```bash
453
+ python -m pytest tests/ -v --tb=short
454
+ ```
455
+
456
+ | Test Suite | Tests | Status |
457
+ |-----------|:-----:|:------:|
458
+ | Integration (pipelines) | 6 | ✅ |
459
+ | End-to-end | 15 | ✅ |
460
+ | Unit (selectors) | 9 | ✅ |
461
+ | Unit (strategies) | 7 | ✅ |
462
+ | Unit (metrics) | 8 | ✅ |
463
+ | Core / imports / components | 42 | ✅ |
464
+
465
+ ---
466
+
467
+ ## 📚 Research References
468
+
469
+ Erasus integrates and builds upon these key works:
470
+
471
+ | Method | Paper | Venue |
472
+ |--------|-------|-------|
473
+ | SCRUB | Kurmanji et al. | CVPR 2024 |
474
+ | Selective Synaptic Dampening | Foster et al. | NeurIPS 2024 |
475
+ | Concept Erasure (ESD) | Gandikota et al. | ICCV 2023 |
476
+ | Gradient Ascent | Golatkar et al. | NeurIPS 2020 |
477
+ | Fisher Forgetting | Golatkar et al. | NeurIPS 2020 |
478
+ | CRAIG | Mirzasoleiman et al. | NeurIPS 2020 |
479
+ | GLISTER | Killamsetty et al. | ICLR 2021 |
480
+ | Influence Functions | Koh & Liang | ICML 2017 |
481
+ | TracIn | Pruthi et al. | NeurIPS 2020 |
482
+ | Data Shapley | Ghorbani & Zou | ICML 2019 |
483
+ | Forgetting Events | Toneva et al. | ICLR 2019 |
484
+ | EL2N | Paul et al. | ICML 2021 |
485
+ | Amnesiac ML | Graves et al. | S&P 2021 |
486
+
487
+ ---
488
+
489
+ ## 🗺️ Roadmap
490
+
491
+ - [x] Core framework (base classes, registry, config)
492
+ - [x] 10 model architectures
493
+ - [x] 27 unlearning strategies (gradient, parameter, data, LLM, diffusion, VLM, ensemble)
494
+ - [x] 19 coreset selectors
495
+ - [x] 15+ evaluation metrics (forgetting, utility, efficiency, privacy)
496
+ - [x] 8 loss functions (Fisher, adversarial, triplet, L2, retain-anchor, KL, MMD, contrastive)
497
+ - [x] Visualization suite (embeddings, landscapes, gradients, attention, comparisons, reports)
498
+ - [x] CLI (`erasus unlearn`, `erasus evaluate`, `erasus benchmark`, `erasus visualize`)
499
+ - [x] Certification & privacy modules + theoretical bounds (PAC, influence, certified radius)
500
+ - [x] Experiment tracking (W&B, MLflow, local) + HPO + ablation studies
501
+ - [x] Benchmark runners (TOFU, WMDP)
502
+ - [x] Callbacks & early stopping
503
+ - [x] 87 passing tests
504
+ - [ ] Additional model architectures (Flamingo, T5, DALL-E, Wav2Vec)
505
+ - [ ] HuggingFace Hub integration
506
+ - [ ] Interactive Gradio/Streamlit dashboard
507
+ - [ ] Tutorial notebooks
508
+ - [ ] PyPI release
509
+
510
+ ---
511
+
512
+ ## 🤝 Contributing
513
+
514
+ Contributions are welcome! Whether it's new unlearning strategies, coreset selectors, model support, or documentation.
515
+
516
+ ```bash
517
+ # Setup development environment
518
+ git clone https://github.com/OnePunchMonk/erasus.git
519
+ cd erasus
520
+ pip install -e ".[dev]"
521
+ python -m pytest tests/ -v
522
+ ```
523
+
524
+ ---
525
+
526
+ ## 📜 License
527
+
528
+ MIT License — see [LICENSE](LICENSE) for details.
529
+
530
+ ---
531
+
532
+ ## 📖 Citation
533
+
534
+ ```bibtex
535
+ @software{erasus2026,
536
+ title={Erasus: Universal Machine Unlearning via Coreset Selection},
537
+ author={Aggarwal, Avaya},
538
+ year={2026},
539
+ url={https://github.com/OnePunchMonk/erasus}
540
+ }
541
+ ```
542
+
543
+ ---
544
+
545
+ <p align="center">
546
+ <b>Built with ❤️ for the machine unlearning research community</b>
547
+ </p>