nonconform 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. nonconform-0.9.0/.gitattributes +21 -0
  2. nonconform-0.9.0/.gitignore +32 -0
  3. nonconform-0.9.0/.pre-commit-config.yaml +66 -0
  4. nonconform-0.9.0/LICENSE +28 -0
  5. nonconform-0.9.0/PKG-INFO +300 -0
  6. nonconform-0.9.0/README.md +221 -0
  7. nonconform-0.9.0/docs/Makefile +20 -0
  8. nonconform-0.9.0/docs/img/banner_dark.png +0 -0
  9. nonconform-0.9.0/docs/img/banner_light.png +0 -0
  10. nonconform-0.9.0/docs/make.bat +35 -0
  11. nonconform-0.9.0/docs/paper/paper.bib +174 -0
  12. nonconform-0.9.0/docs/paper/paper.md +83 -0
  13. nonconform-0.9.0/docs/source/_templates/autosummary/class.rst +33 -0
  14. nonconform-0.9.0/docs/source/_templates/autosummary/function.rst +6 -0
  15. nonconform-0.9.0/docs/source/_templates/autosummary/module.rst +12 -0
  16. nonconform-0.9.0/docs/source/conf.py +173 -0
  17. nonconform-0.9.0/docs/source/contributing.md +152 -0
  18. nonconform-0.9.0/docs/source/examples/bootstrap_conformal.md +207 -0
  19. nonconform-0.9.0/docs/source/examples/classical_conformal.md +144 -0
  20. nonconform-0.9.0/docs/source/examples/cross_val_conformal.md +221 -0
  21. nonconform-0.9.0/docs/source/examples/extreme_conformal.md +320 -0
  22. nonconform-0.9.0/docs/source/examples/fdr_control.md +307 -0
  23. nonconform-0.9.0/docs/source/examples/index.rst +14 -0
  24. nonconform-0.9.0/docs/source/examples/jackknife_conformal.md +255 -0
  25. nonconform-0.9.0/docs/source/examples/weighted_conformal.md +259 -0
  26. nonconform-0.9.0/docs/source/index.rst +20 -0
  27. nonconform-0.9.0/docs/source/installation.md +51 -0
  28. nonconform-0.9.0/docs/source/quickstart.md +241 -0
  29. nonconform-0.9.0/docs/source/user_guide/batch_evaluation.md +354 -0
  30. nonconform-0.9.0/docs/source/user_guide/best_practices.md +609 -0
  31. nonconform-0.9.0/docs/source/user_guide/conformal_inference.md +433 -0
  32. nonconform-0.9.0/docs/source/user_guide/conformalization_strategies.md +148 -0
  33. nonconform-0.9.0/docs/source/user_guide/extreme_conformal.md +184 -0
  34. nonconform-0.9.0/docs/source/user_guide/fdr_control.md +343 -0
  35. nonconform-0.9.0/docs/source/user_guide/index.rst +16 -0
  36. nonconform-0.9.0/docs/source/user_guide/statistical_concepts.md +77 -0
  37. nonconform-0.9.0/docs/source/user_guide/streaming_evaluation.md +553 -0
  38. nonconform-0.9.0/docs/source/user_guide/troubleshooting.md +446 -0
  39. nonconform-0.9.0/docs/source/user_guide/weighted_conformal.md +407 -0
  40. nonconform-0.9.0/examples/data_generators.py +179 -0
  41. nonconform-0.9.0/examples/extreme_conformal.py +54 -0
  42. nonconform-0.9.0/examples/pyod/abod.py +19 -0
  43. nonconform-0.9.0/examples/pyod/autoencoder.py +22 -0
  44. nonconform-0.9.0/examples/pyod/cd.py +22 -0
  45. nonconform-0.9.0/examples/pyod/copod.py +22 -0
  46. nonconform-0.9.0/examples/pyod/dif.py +22 -0
  47. nonconform-0.9.0/examples/pyod/ecod.py +19 -0
  48. nonconform-0.9.0/examples/pyod/gmm.py +19 -0
  49. nonconform-0.9.0/examples/pyod/hbos.py +22 -0
  50. nonconform-0.9.0/examples/pyod/iforest.py +22 -0
  51. nonconform-0.9.0/examples/pyod/inne.py +22 -0
  52. nonconform-0.9.0/examples/pyod/kde.py +19 -0
  53. nonconform-0.9.0/examples/pyod/knn.py +19 -0
  54. nonconform-0.9.0/examples/pyod/knn_mahalanobis.py +27 -0
  55. nonconform-0.9.0/examples/pyod/kpca.py +19 -0
  56. nonconform-0.9.0/examples/pyod/lmdd.py +19 -0
  57. nonconform-0.9.0/examples/pyod/loci.py +19 -0
  58. nonconform-0.9.0/examples/pyod/loda.py +19 -0
  59. nonconform-0.9.0/examples/pyod/lof.py +19 -0
  60. nonconform-0.9.0/examples/pyod/lscp.py +29 -0
  61. nonconform-0.9.0/examples/pyod/lunar.py +19 -0
  62. nonconform-0.9.0/examples/pyod/mad.py +19 -0
  63. nonconform-0.9.0/examples/pyod/mcd.py +21 -0
  64. nonconform-0.9.0/examples/pyod/ocsvm.py +19 -0
  65. nonconform-0.9.0/examples/pyod/pca.py +21 -0
  66. nonconform-0.9.0/examples/pyod/qmcd.py +19 -0
  67. nonconform-0.9.0/examples/pyod/rod.py +21 -0
  68. nonconform-0.9.0/examples/pyod/sod.py +19 -0
  69. nonconform-0.9.0/examples/standard_conformal.py +29 -0
  70. nonconform-0.9.0/examples/weighted_conformal.py +28 -0
  71. nonconform-0.9.0/nonconform/__init__.py +15 -0
  72. nonconform-0.9.0/nonconform/estimation/__init__.py +17 -0
  73. nonconform-0.9.0/nonconform/estimation/base.py +53 -0
  74. nonconform-0.9.0/nonconform/estimation/extreme_conformal.py +230 -0
  75. nonconform-0.9.0/nonconform/estimation/standard_conformal.py +162 -0
  76. nonconform-0.9.0/nonconform/estimation/weighted_conformal.py +262 -0
  77. nonconform-0.9.0/nonconform/strategy/__init__.py +19 -0
  78. nonconform-0.9.0/nonconform/strategy/base.py +87 -0
  79. nonconform-0.9.0/nonconform/strategy/bootstrap.py +307 -0
  80. nonconform-0.9.0/nonconform/strategy/cross_val.py +144 -0
  81. nonconform-0.9.0/nonconform/strategy/jackknife.py +111 -0
  82. nonconform-0.9.0/nonconform/strategy/split.py +105 -0
  83. nonconform-0.9.0/nonconform/utils/__init__.py +0 -0
  84. nonconform-0.9.0/nonconform/utils/data/__init__.py +31 -0
  85. nonconform-0.9.0/nonconform/utils/data/generator/__init__.py +15 -0
  86. nonconform-0.9.0/nonconform/utils/data/generator/base.py +241 -0
  87. nonconform-0.9.0/nonconform/utils/data/generator/batch.py +165 -0
  88. nonconform-0.9.0/nonconform/utils/data/generator/online.py +110 -0
  89. nonconform-0.9.0/nonconform/utils/data/load.py +633 -0
  90. nonconform-0.9.0/nonconform/utils/func/__init__.py +15 -0
  91. nonconform-0.9.0/nonconform/utils/func/decorator.py +115 -0
  92. nonconform-0.9.0/nonconform/utils/func/enums.py +25 -0
  93. nonconform-0.9.0/nonconform/utils/func/params.py +114 -0
  94. nonconform-0.9.0/nonconform/utils/stat/__init__.py +25 -0
  95. nonconform-0.9.0/nonconform/utils/stat/aggregation.py +47 -0
  96. nonconform-0.9.0/nonconform/utils/stat/extreme.py +175 -0
  97. nonconform-0.9.0/nonconform/utils/stat/metrics.py +84 -0
  98. nonconform-0.9.0/nonconform/utils/stat/statistical.py +155 -0
  99. nonconform-0.9.0/pyproject.toml +183 -0
  100. nonconform-0.9.0/requirements.txt +23 -0
  101. nonconform-0.9.0/tests/functional/__init__.py +0 -0
  102. nonconform-0.9.0/tests/functional/test_data_generator.py +316 -0
  103. nonconform-0.9.0/tests/functional/test_extreme_split.py +171 -0
  104. nonconform-0.9.0/tests/functional/test_standard_bootstrap.py +75 -0
  105. nonconform-0.9.0/tests/functional/test_standard_cross_val.py +52 -0
  106. nonconform-0.9.0/tests/functional/test_standard_jackknife.py +33 -0
  107. nonconform-0.9.0/tests/functional/test_standard_split.py +51 -0
  108. nonconform-0.9.0/tests/functional/test_weighted_bootstrap.py +72 -0
  109. nonconform-0.9.0/tests/functional/test_weighted_cross_val.py +50 -0
  110. nonconform-0.9.0/tests/functional/test_weighted_jackknife.py +32 -0
  111. nonconform-0.9.0/tests/functional/test_weighted_split.py +49 -0
  112. nonconform-0.9.0/tests/unit/__init__.py +0 -0
  113. nonconform-0.9.0/tests/unit/test_dataset_download.py +406 -0
  114. nonconform-0.9.0/tests/unit/test_dataset_setup.py +52 -0
  115. nonconform-0.9.0/tests/unit/test_unsupported_detector.py +15 -0
  116. nonconform-0.9.0/tests/unit/test_utils_aggregation.py +34 -0
  117. nonconform-0.9.0/tests/unit/test_utils_statistical.py +33 -0
@@ -0,0 +1,21 @@
1
+ # Auto detect text files and ensure they use LF line endings in the repo
2
+ * text=auto eol=lf
3
+
4
+ # Explicitly declare file types that should always have LF
5
+ *.py text eol=lf
6
+ *.js text eol=lf
7
+ *.css text eol=lf
8
+ *.html text eol=lf
9
+ *.sh text eol=lf
10
+ Dockerfile text eol=lf
11
+ *.md text eol=lf
12
+ *.json text eol=lf
13
+ *.yaml text eol=lf
14
+ *.yml text eol=lf
15
+
16
+ # Declare files that should always have CRLF (less common)
17
+ # *.bat text eol=crlf
18
+
19
+ # Declare files that should be treated as binary (Git won't change line endings)
20
+ *.png binary
21
+ *.jpg binary
@@ -0,0 +1,32 @@
1
+ # Operating System
2
+ .DS_STORE
3
+
4
+ # Global directories
5
+ __pycache__/
6
+
7
+ # Local
8
+ __pycache__
9
+ *.py[cdo]
10
+ *.pyc
11
+
12
+ # Root directories
13
+ /.env/
14
+ /.venv/
15
+ /.venv-wsl/
16
+ /.idea/
17
+ /venv/
18
+ /dist/
19
+
20
+ # Auto-generated during builds
21
+ /src/hatch/_version.py
22
+
23
+ # Sphinx documentation
24
+ docs/build/
25
+ docs/source/api/ # If using sphinx-apidoc and generating into source
26
+ docs/source/_autosummary_generated/ # If using autosummary
27
+
28
+ # Claude Code
29
+ CLAUDE.md
30
+ CLAUDE.local.md
31
+ .claude
32
+ .claudeignore
@@ -0,0 +1,66 @@
1
+ # This file configures the pre-commit hooks for the project.
2
+ # For more information, see https://pre-commit.com
3
+
4
+ repos:
5
+ - repo: https://github.com/pre-commit/pre-commit-hooks
6
+ rev: v4.6.0
7
+ hooks:
8
+ - id: trailing-whitespace # Trims trailing whitespace.
9
+ - id: end-of-file-fixer # Ensures files end with a single newline.
10
+ - id: check-yaml # Checks YAML files for parseable syntax.
11
+ - id: check-toml # Checks TOML files for parseable syntax.
12
+ - id: check-json # Checks JSON files for parseable syntax.
13
+ - id: check-added-large-files # Prevents committing large files.
14
+ - id: check-case-conflict # Checks for files that would conflict on case-insensitive filesystems.
15
+ - id: check-merge-conflict # Checks for files that contain merge conflict strings.
16
+ - id: debug-statements # Checks for leftover debugger imports and calls (e.g., pdb, breakpoint()).
17
+
18
+ # Black for code formatting
19
+ - repo: https://github.com/psf/black
20
+ rev: 24.4.2
21
+ hooks:
22
+ - id: black
23
+
24
+ # Ruff for linting
25
+ - repo: https://github.com/astral-sh/ruff-pre-commit
26
+ # Check for updates: https://github.com/astral-sh/ruff-pre-commit/releases
27
+ rev: v0.4.4 # Make sure to use the latest version compatible with your setup
28
+ hooks:
29
+ # Run Ruff's linter.
30
+ - id: ruff
31
+ name: ruff (linter)
32
+ # Apply fixes for fixable violations and exit with a non-zero code if fixes were made.
33
+ # This ensures that if Ruff modifies files, the commit will be stopped,
34
+ # prompting you to `git add` the changes and re-commit.
35
+ args: [--fix, --exit-non-zero-on-fix]
36
+ # Ruff will automatically find and use your pyproject.toml configuration
37
+ # for selected rules, exclusions, target version, etc.
38
+
39
+ # Local hook for building Sphinx documentation
40
+ - repo: local
41
+ hooks:
42
+ - id: build-sphinx-docs
43
+ name: Build Sphinx HTML documentation
44
+ # Use -W to treat warnings as errors, making the check stricter.
45
+ # --keep-going allows Sphinx to attempt to process the rest of the docs even if errors are found early.
46
+ entry: sphinx-build
47
+ args: ["-W", "--keep-going", "-b", "html", "docs/source", "docs/build/html"]
48
+ language: python
49
+ # These dependencies are taken from your pyproject.toml [project.optional-dependencies].docs
50
+ # and are needed for Sphinx to build your documentation correctly.
51
+ additional_dependencies: [
52
+ "sphinx",
53
+ "furo", # Your HTML theme
54
+ "sphinx-autoapi", # For auto-generating API docs
55
+ "myst-parser", # For parsing Markdown files
56
+ # Your project 'nonconform' itself. AutoAPI needs to inspect its source code.
57
+ # Adding "." installs the current project in the pre-commit environment.
58
+ "."
59
+ ]
60
+ pass_filenames: false # The command operates on directories
61
+ # This regex triggers the hook if:
62
+ # 1. Any .rst, .py, or .md file in docs/source/ changes.
63
+ # 2. Your docs/source/conf.py changes.
64
+ # 3. Any .py file in your main 'nonconform' package changes (because autoapi reads these).
65
+ files: ^docs/source/.*\.(rst|py|md)$|^(docs/source/conf\.py)$|^nonconform/.*\.py$
66
+ verbose: true
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2024, Oliver Hennhöfer
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,300 @@
1
+ Metadata-Version: 2.4
2
+ Name: nonconform
3
+ Version: 0.9.0
4
+ Summary: Conformal Anomaly Detection
5
+ Project-URL: Homepage, https://github.com/OliverHennhoefer/nonconform
6
+ Project-URL: Bugs, https://github.com/OliverHennhoefer/nonconform/issues
7
+ Author-email: Oliver Hennhoefer <oliver.hennhoefer@mail.de>
8
+ Maintainer-email: Oliver Hennhoefer <oliver.hennhoefer@mail.de>
9
+ License: BSD 3-Clause License
10
+
11
+ Copyright (c) 2024, Oliver Hennhöfer
12
+
13
+ Redistribution and use in source and binary forms, with or without
14
+ modification, are permitted provided that the following conditions are met:
15
+
16
+ 1. Redistributions of source code must retain the above copyright notice, this
17
+ list of conditions and the following disclaimer.
18
+
19
+ 2. Redistributions in binary form must reproduce the above copyright notice,
20
+ this list of conditions and the following disclaimer in the documentation
21
+ and/or other materials provided with the distribution.
22
+
23
+ 3. Neither the name of the copyright holder nor the names of its
24
+ contributors may be used to endorse or promote products derived from
25
+ this software without specific prior written permission.
26
+
27
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
31
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
34
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
35
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
+ License-File: LICENSE
38
+ Keywords: anomaly detection,conformal anomaly detection,conformal inference,false discovery rate,uncertainty quantification
39
+ Classifier: Development Status :: 4 - Beta
40
+ Classifier: License :: OSI Approved :: BSD License
41
+ Classifier: Operating System :: OS Independent
42
+ Classifier: Programming Language :: Python :: 3.12
43
+ Requires-Python: >=3.12
44
+ Requires-Dist: numpy~=1.26.0
45
+ Requires-Dist: pandas>=2.2.1
46
+ Requires-Dist: pyod~=2.0.3
47
+ Requires-Dist: scikit-learn>=1.6.1
48
+ Requires-Dist: scipy>=1.13.0
49
+ Requires-Dist: tqdm>=4.66.2
50
+ Provides-Extra: all
51
+ Requires-Dist: black; extra == 'all'
52
+ Requires-Dist: furo; extra == 'all'
53
+ Requires-Dist: myst-parser; extra == 'all'
54
+ Requires-Dist: online-fdr>=0.0.3; extra == 'all'
55
+ Requires-Dist: pre-commit; extra == 'all'
56
+ Requires-Dist: pyarrow>=16.1.0; extra == 'all'
57
+ Requires-Dist: ruff; extra == 'all'
58
+ Requires-Dist: sphinx; extra == 'all'
59
+ Requires-Dist: sphinx-autoapi; extra == 'all'
60
+ Requires-Dist: tensorflow>=2.16.1; extra == 'all'
61
+ Requires-Dist: torch>=2.7.0; extra == 'all'
62
+ Provides-Extra: data
63
+ Requires-Dist: pyarrow>=16.1.0; extra == 'data'
64
+ Provides-Extra: dev
65
+ Requires-Dist: black; extra == 'dev'
66
+ Requires-Dist: pre-commit; extra == 'dev'
67
+ Requires-Dist: ruff; extra == 'dev'
68
+ Provides-Extra: dl
69
+ Requires-Dist: tensorflow>=2.16.1; extra == 'dl'
70
+ Requires-Dist: torch>=2.7.0; extra == 'dl'
71
+ Provides-Extra: docs
72
+ Requires-Dist: furo; extra == 'docs'
73
+ Requires-Dist: myst-parser; extra == 'docs'
74
+ Requires-Dist: sphinx; extra == 'docs'
75
+ Requires-Dist: sphinx-autoapi; extra == 'docs'
76
+ Provides-Extra: fdr
77
+ Requires-Dist: online-fdr>=0.0.3; extra == 'fdr'
78
+ Description-Content-Type: text/markdown
79
+
80
+ ![Logo](./docs/img/banner_dark.png#gh-dark-mode-only)
81
+ ![Logo](./docs/img/banner_light.png#gh-light-mode-only)
82
+
83
+ [![PyPI Downloads](https://static.pepy.tech/badge/nonconform)](https://pepy.tech/projects/nonconform) [![PyPI Downloads](https://static.pepy.tech/badge/nonconform/month)](https://pepy.tech/projects/nonconform) [![License](https://img.shields.io/badge/License-BSD_3--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/nonconform) [![start with why](https://img.shields.io/badge/start%20with-why%3F-brightgreen.svg?style=flat)](https://www.diva-portal.org/smash/get/diva2:690997/FULLTEXT02.pdf)
84
+
85
+ **nonconform** is a Python library that enhances anomaly detection by providing uncertainty quantification. It acts as a wrapper around most detectors from the popular [*PyOD*](https://pyod.readthedocs.io/en/latest/) library (see [Supported Estimators](#supported-estimators)). By leveraging one-class classification principles and **conformal inference**, **nonconform** enables **statistically rigorous anomaly detection**.
86
+
87
+ # Key Features
88
+
89
+ * **Uncertainty Quantification:** Go beyond simple anomaly scores; get statistically valid _p_-values.
90
+ * **Error Control:** Reliably control metrics like the False Discovery Rate (FDR).
91
+ * **Broad PyOD Compatibility:** Works with a wide range of PyOD estimators (see [Supported Estimators](#supported-estimators)).
92
+ * **Flexible Strategies:** Implements various conformal strategies like Split-Conformal and Bootstrap-after-Jackknife+ (JaB+).
93
+
94
+ # :hatching_chick: Getting Started
95
+
96
+ ```sh
97
+ pip install nonconform
98
+ ```
99
+
100
+ _For additional features, you might need optional dependencies:_
101
+ - `pip install nonconform[data]` - Includes pyarrow for loading example data (via remote download)
102
+ - `pip install nonconform[dl]` - Includes deep learning dependencies (TensorFlow, PyTorch)
103
+ - `pip install nonconform[all]` - Includes all optional dependencies
104
+
105
+ _Please refer to the [pyproject.toml](https://github.com/OliverHennhoefer/nonconform/blob/main/pyproject.toml) for details._
106
+
107
+ ## Split-Conformal (also _Inductive_) Approach
108
+
109
+ Using a _Gaussian Mixture Model_ on the _Shuttle_ dataset:
110
+
111
+ > **Note:** The examples below use the built-in datasets. Install with `pip install nonconform[data]` to run these examples.
112
+
113
+ ```python
114
+ from pyod.models.gmm import GMM
115
+ from scipy.stats import false_discovery_control
116
+
117
+ from nonconform.strategy import Split
118
+ from nonconform.estimation import StandardConformalDetector
119
+ from nonconform.utils.data import load_shuttle
120
+ from nonconform.utils.stat import false_discovery_rate, statistical_power
121
+
122
+ x_train, x_test, y_test = load_shuttle(setup=True)
123
+
124
+ ce = StandardConformalDetector(
125
+ detector=GMM(),
126
+ strategy=Split(calib_size=1_000)
127
+ )
128
+
129
+ ce.fit(x_train)
130
+ estimates = ce.predict(x_test)
131
+
132
+ decisions = false_discovery_control(estimates, method='bh') <= 0.2
133
+
134
+ print(f"Empirical FDR: {false_discovery_rate(y=y_test, y_hat=decisions)}")
135
+ print(f"Empirical Power: {statistical_power(y=y_test, y_hat=decisions)}")
136
+ ```
137
+
138
+ Output:
139
+ ```text
140
+ Empirical FDR: 0.108
141
+ Empirical Power: 0.99
142
+ ```
143
+
144
+ # :hatched_chick: Advanced Usage
145
+
146
+ ## Bootstrap-after-Jackknife+ (JaB+)
147
+
148
+ The `BootstrapConformal()` strategy allows to set 2 of the 3 parameters `resampling_ratio`, `n_boostraps` and `n_calib`.
149
+ For either combination, the remaining parameter will be filled automatically. This allows exact control of the
150
+ calibration procedure when using a bootstrap strategy.
151
+
152
+ ```python
153
+ from pyod.models.iforest import IForest
154
+ from scipy.stats import false_discovery_control
155
+
156
+ from nonconform.estimation import StandardConformalDetector
157
+ from nonconform.strategy import Bootstrap
158
+ from nonconform.utils.data import load_shuttle
159
+ from nonconform.utils.stat import false_discovery_rate, statistical_power
160
+
161
+ x_train, x_test, y_test = load_shuttle(setup=True)
162
+
163
+ ce = StandardConformalDetector(
164
+ detector=IForest(behaviour="new"),
165
+ strategy=Bootstrap(resampling_ratio=0.99, n_bootstraps=20, plus=True)
166
+ )
167
+
168
+ ce.fit(x_train)
169
+ estimates = ce.predict(x_test)
170
+
171
+ decisions = false_discovery_control(estimates, method='bh') <= 0.1
172
+
173
+ print(f"Empirical FDR: {false_discovery_rate(y=y_test, y_hat=decisions)}")
174
+ print(f"Empirical Power: {statistical_power(y=y_test, y_hat=decisions)}")
175
+ ```
176
+
177
+ Output:
178
+ ```text
179
+ Empirical FDR: 0.067
180
+ Empirical Power: 0.98
181
+ ```
182
+
183
+ ## Weighted Conformal Anomaly Detection
184
+
185
+ The statistical validity of conformal anomaly detection depends on data *exchangability* (weaker than i.i.d.). This assumption can be slightly relaxed by computing weighted conformal _p_-values.
186
+
187
+ ```python
188
+ from pyod.models.iforest import IForest
189
+ from scipy.stats import false_discovery_control
190
+
191
+ from nonconform.utils.data import load_shuttle
192
+ from nonconform.estimation import WeightedConformalDetector
193
+ from nonconform.strategy import Split
194
+ from nonconform.utils.stat import false_discovery_rate, statistical_power
195
+
196
+ x_train, x_test, y_test = load_shuttle(setup=True)
197
+
198
+ model = IForest(behaviour="new")
199
+ strategy = Split(calib_size=1_000)
200
+
201
+ ce = WeightedConformalDetector(detector=model, strategy=strategy)
202
+ ce.fit(x_train)
203
+ estimates = ce.predict(x_test)
204
+
205
+ decisions = false_discovery_control(estimates, method='bh') <= 0.1
206
+
207
+ print(f"Empirical FDR: {false_discovery_rate(y=y_test, y_hat=decisions)}")
208
+ print(f"Empirical Power: {statistical_power(y=y_test, y_hat=decisions)}")
209
+ ```
210
+
211
+ Output:
212
+ ```text
213
+ Empirical FDR: 0.077
214
+ Empirical Power: 0.96
215
+ ```
216
+
217
+ # Citation
218
+
219
+ If you find this repository useful for your research, please cite following papers:
220
+
221
+ ##### Leave-One-Out-, Bootstrap- and Cross-Conformal Anomaly Detectors
222
+ ```text
223
+ @inproceedings{Hennhofer2024,
224
+ title = {{ Leave-One-Out-, Bootstrap- and Cross-Conformal Anomaly Detectors }},
225
+ author = {Hennhofer, Oliver and Preisach, Christine},
226
+ year = 2024,
227
+ month = {Dec},
228
+ booktitle = {2024 IEEE International Conference on Knowledge Graph (ICKG)},
229
+ publisher = {IEEE Computer Society},
230
+ address = {Los Alamitos, CA, USA},
231
+ pages = {110--119},
232
+ doi = {10.1109/ICKG63256.2024.00022},
233
+ url = {https://doi.ieeecomputersociety.org/10.1109/ICKG63256.2024.00022}
234
+ }
235
+ ```
236
+
237
+ ##### Testing for outliers with conformal p-values
238
+ ```text
239
+ @article{Bates2023,
240
+ title = {Testing for outliers with conformal p-values},
241
+ author = {Bates, Stephen and Candès, Emmanuel and Lei, Lihua and Romano, Yaniv and Sesia, Matteo},
242
+ year = 2023,
243
+ month = feb,
244
+ journal = {The Annals of Statistics},
245
+ publisher = {Institute of Mathematical Statistics},
246
+ volume = 51,
247
+ number = 1,
248
+ doi = {10.1214/22-aos2244},
249
+ issn = {0090-5364},
250
+ url = {http://dx.doi.org/10.1214/22-AOS2244}
251
+ }
252
+ ```
253
+ ##### Model-free selective inference under covariate shift via weighted conformal p-values
254
+ ```text
255
+ @inproceedings{Jin2023,
256
+ title = {Model-free selective inference under covariate shift via weighted conformal p-values},
257
+ author = {Ying Jin and Emmanuel J. Cand{\`e}s},
258
+ year = 2023,
259
+ url = {https://api.semanticscholar.org/CorpusID:259950903}
260
+ }
261
+ ```
262
+
263
+ # Supported Estimators
264
+
265
+ The package only supports anomaly estimators that are suitable for unsupervised one-class classification. As respective
266
+ detectors are therefore exclusively fitted on *normal* (or *non-anomalous*) data, parameters like *threshold* are internally
267
+ set to the smallest possible values.
268
+
269
+ Models that are **currently supported** include:
270
+
271
+ * Angle-Based Outlier Detection (**ABOD**)
272
+ * Autoencoder (**AE**)
273
+ * Cook's Distance (**CD**)
274
+ * Copula-based Outlier Detector (**COPOD**)
275
+ * Deep Isolation Forest (**DIF**)
276
+ * Empirical-Cumulative-distribution-based Outlier Detection (**ECOD**)
277
+ * Gaussian Mixture Model (**GMM**)
278
+ * Histogram-based Outlier Detection (**HBOS**)
279
+ * Isolation-based Anomaly Detection using Nearest-Neighbor Ensembles (**INNE**)
280
+ * Isolation Forest (**IForest**)
281
+ * Kernel Density Estimation (**KDE**)
282
+ * *k*-Nearest Neighbor (***k*NN**)
283
+ * Kernel Principal Component Analysis (**KPCA**)
284
+ * Linear Model Deviation-base Outlier Detection (**LMDD**)
285
+ * Local Outlier Factor (**LOF**)
286
+ * Local Correlation Integral (**LOCI**)
287
+ * Lightweight Online Detector of Anomalies (**LODA**)
288
+ * Locally Selective Combination of Parallel Outlier Ensembles (**LSCP**)
289
+ * GNN-based Anomaly Detection Method (**LUNAR**)
290
+ * Median Absolute Deviation (**MAD**)
291
+ * Minimum Covariance Determinant (**MCD**)
292
+ * One-Class SVM (**OCSVM**)
293
+ * Principal Component Analysis (**PCA**)
294
+ * Quasi-Monte Carlo Discrepancy Outlier Detection (**QMCD**)
295
+ * Rotation-based Outlier Detection (**ROD**)
296
+ * Subspace Outlier Detection (**SOD**)
297
+ * Scalable Unsupervised Outlier Detection (**SUOD**)
298
+
299
+ # Contact
300
+ **Bug reporting:** [https://github.com/OliverHennhoefer/nonconform/issues](https://github.com/OliverHennhoefer/nonconform/issues)
@@ -0,0 +1,221 @@
1
+ ![Logo](./docs/img/banner_dark.png#gh-dark-mode-only)
2
+ ![Logo](./docs/img/banner_light.png#gh-light-mode-only)
3
+
4
+ [![PyPI Downloads](https://static.pepy.tech/badge/nonconform)](https://pepy.tech/projects/nonconform) [![PyPI Downloads](https://static.pepy.tech/badge/nonconform/month)](https://pepy.tech/projects/nonconform) [![License](https://img.shields.io/badge/License-BSD_3--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/nonconform) [![start with why](https://img.shields.io/badge/start%20with-why%3F-brightgreen.svg?style=flat)](https://www.diva-portal.org/smash/get/diva2:690997/FULLTEXT02.pdf)
5
+
6
+ **nonconform** is a Python library that enhances anomaly detection by providing uncertainty quantification. It acts as a wrapper around most detectors from the popular [*PyOD*](https://pyod.readthedocs.io/en/latest/) library (see [Supported Estimators](#supported-estimators)). By leveraging one-class classification principles and **conformal inference**, **nonconform** enables **statistically rigorous anomaly detection**.
7
+
8
+ # Key Features
9
+
10
+ * **Uncertainty Quantification:** Go beyond simple anomaly scores; get statistically valid _p_-values.
11
+ * **Error Control:** Reliably control metrics like the False Discovery Rate (FDR).
12
+ * **Broad PyOD Compatibility:** Works with a wide range of PyOD estimators (see [Supported Estimators](#supported-estimators)).
13
+ * **Flexible Strategies:** Implements various conformal strategies like Split-Conformal and Bootstrap-after-Jackknife+ (JaB+).
14
+
15
+ # :hatching_chick: Getting Started
16
+
17
+ ```sh
18
+ pip install nonconform
19
+ ```
20
+
21
+ _For additional features, you might need optional dependencies:_
22
+ - `pip install nonconform[data]` - Includes pyarrow for loading example data (via remote download)
23
+ - `pip install nonconform[dl]` - Includes deep learning dependencies (TensorFlow, PyTorch)
24
+ - `pip install nonconform[all]` - Includes all optional dependencies
25
+
26
+ _Please refer to the [pyproject.toml](https://github.com/OliverHennhoefer/nonconform/blob/main/pyproject.toml) for details._
27
+
28
+ ## Split-Conformal (also _Inductive_) Approach
29
+
30
+ Using a _Gaussian Mixture Model_ on the _Shuttle_ dataset:
31
+
32
+ > **Note:** The examples below use the built-in datasets. Install with `pip install nonconform[data]` to run these examples.
33
+
34
+ ```python
35
+ from pyod.models.gmm import GMM
36
+ from scipy.stats import false_discovery_control
37
+
38
+ from nonconform.strategy import Split
39
+ from nonconform.estimation import StandardConformalDetector
40
+ from nonconform.utils.data import load_shuttle
41
+ from nonconform.utils.stat import false_discovery_rate, statistical_power
42
+
43
+ x_train, x_test, y_test = load_shuttle(setup=True)
44
+
45
+ ce = StandardConformalDetector(
46
+ detector=GMM(),
47
+ strategy=Split(calib_size=1_000)
48
+ )
49
+
50
+ ce.fit(x_train)
51
+ estimates = ce.predict(x_test)
52
+
53
+ decisions = false_discovery_control(estimates, method='bh') <= 0.2
54
+
55
+ print(f"Empirical FDR: {false_discovery_rate(y=y_test, y_hat=decisions)}")
56
+ print(f"Empirical Power: {statistical_power(y=y_test, y_hat=decisions)}")
57
+ ```
58
+
59
+ Output:
60
+ ```text
61
+ Empirical FDR: 0.108
62
+ Empirical Power: 0.99
63
+ ```
64
+
65
+ # :hatched_chick: Advanced Usage
66
+
67
+ ## Bootstrap-after-Jackknife+ (JaB+)
68
+
69
+ The `BootstrapConformal()` strategy allows to set 2 of the 3 parameters `resampling_ratio`, `n_boostraps` and `n_calib`.
70
+ For either combination, the remaining parameter will be filled automatically. This allows exact control of the
71
+ calibration procedure when using a bootstrap strategy.
72
+
73
+ ```python
74
+ from pyod.models.iforest import IForest
75
+ from scipy.stats import false_discovery_control
76
+
77
+ from nonconform.estimation import StandardConformalDetector
78
+ from nonconform.strategy import Bootstrap
79
+ from nonconform.utils.data import load_shuttle
80
+ from nonconform.utils.stat import false_discovery_rate, statistical_power
81
+
82
+ x_train, x_test, y_test = load_shuttle(setup=True)
83
+
84
+ ce = StandardConformalDetector(
85
+ detector=IForest(behaviour="new"),
86
+ strategy=Bootstrap(resampling_ratio=0.99, n_bootstraps=20, plus=True)
87
+ )
88
+
89
+ ce.fit(x_train)
90
+ estimates = ce.predict(x_test)
91
+
92
+ decisions = false_discovery_control(estimates, method='bh') <= 0.1
93
+
94
+ print(f"Empirical FDR: {false_discovery_rate(y=y_test, y_hat=decisions)}")
95
+ print(f"Empirical Power: {statistical_power(y=y_test, y_hat=decisions)}")
96
+ ```
97
+
98
+ Output:
99
+ ```text
100
+ Empirical FDR: 0.067
101
+ Empirical Power: 0.98
102
+ ```
103
+
104
+ ## Weighted Conformal Anomaly Detection
105
+
106
+ The statistical validity of conformal anomaly detection depends on data *exchangability* (weaker than i.i.d.). This assumption can be slightly relaxed by computing weighted conformal _p_-values.
107
+
108
+ ```python
109
+ from pyod.models.iforest import IForest
110
+ from scipy.stats import false_discovery_control
111
+
112
+ from nonconform.utils.data import load_shuttle
113
+ from nonconform.estimation import WeightedConformalDetector
114
+ from nonconform.strategy import Split
115
+ from nonconform.utils.stat import false_discovery_rate, statistical_power
116
+
117
+ x_train, x_test, y_test = load_shuttle(setup=True)
118
+
119
+ model = IForest(behaviour="new")
120
+ strategy = Split(calib_size=1_000)
121
+
122
+ ce = WeightedConformalDetector(detector=model, strategy=strategy)
123
+ ce.fit(x_train)
124
+ estimates = ce.predict(x_test)
125
+
126
+ decisions = false_discovery_control(estimates, method='bh') <= 0.1
127
+
128
+ print(f"Empirical FDR: {false_discovery_rate(y=y_test, y_hat=decisions)}")
129
+ print(f"Empirical Power: {statistical_power(y=y_test, y_hat=decisions)}")
130
+ ```
131
+
132
+ Output:
133
+ ```text
134
+ Empirical FDR: 0.077
135
+ Empirical Power: 0.96
136
+ ```
137
+
138
+ # Citation
139
+
140
+ If you find this repository useful for your research, please cite following papers:
141
+
142
+ ##### Leave-One-Out-, Bootstrap- and Cross-Conformal Anomaly Detectors
143
+ ```text
144
+ @inproceedings{Hennhofer2024,
145
+ title = {{ Leave-One-Out-, Bootstrap- and Cross-Conformal Anomaly Detectors }},
146
+ author = {Hennhofer, Oliver and Preisach, Christine},
147
+ year = 2024,
148
+ month = {Dec},
149
+ booktitle = {2024 IEEE International Conference on Knowledge Graph (ICKG)},
150
+ publisher = {IEEE Computer Society},
151
+ address = {Los Alamitos, CA, USA},
152
+ pages = {110--119},
153
+ doi = {10.1109/ICKG63256.2024.00022},
154
+ url = {https://doi.ieeecomputersociety.org/10.1109/ICKG63256.2024.00022}
155
+ }
156
+ ```
157
+
158
+ ##### Testing for outliers with conformal p-values
159
+ ```text
160
+ @article{Bates2023,
161
+ title = {Testing for outliers with conformal p-values},
162
+ author = {Bates, Stephen and Candès, Emmanuel and Lei, Lihua and Romano, Yaniv and Sesia, Matteo},
163
+ year = 2023,
164
+ month = feb,
165
+ journal = {The Annals of Statistics},
166
+ publisher = {Institute of Mathematical Statistics},
167
+ volume = 51,
168
+ number = 1,
169
+ doi = {10.1214/22-aos2244},
170
+ issn = {0090-5364},
171
+ url = {http://dx.doi.org/10.1214/22-AOS2244}
172
+ }
173
+ ```
174
+ ##### Model-free selective inference under covariate shift via weighted conformal p-values
175
+ ```text
176
+ @inproceedings{Jin2023,
177
+ title = {Model-free selective inference under covariate shift via weighted conformal p-values},
178
+ author = {Ying Jin and Emmanuel J. Cand{\`e}s},
179
+ year = 2023,
180
+ url = {https://api.semanticscholar.org/CorpusID:259950903}
181
+ }
182
+ ```
183
+
184
+ # Supported Estimators
185
+
186
+ The package only supports anomaly estimators that are suitable for unsupervised one-class classification. As respective
187
+ detectors are therefore exclusively fitted on *normal* (or *non-anomalous*) data, parameters like *threshold* are internally
188
+ set to the smallest possible values.
189
+
190
+ Models that are **currently supported** include:
191
+
192
+ * Angle-Based Outlier Detection (**ABOD**)
193
+ * Autoencoder (**AE**)
194
+ * Cook's Distance (**CD**)
195
+ * Copula-based Outlier Detector (**COPOD**)
196
+ * Deep Isolation Forest (**DIF**)
197
+ * Empirical-Cumulative-distribution-based Outlier Detection (**ECOD**)
198
+ * Gaussian Mixture Model (**GMM**)
199
+ * Histogram-based Outlier Detection (**HBOS**)
200
+ * Isolation-based Anomaly Detection using Nearest-Neighbor Ensembles (**INNE**)
201
+ * Isolation Forest (**IForest**)
202
+ * Kernel Density Estimation (**KDE**)
203
+ * *k*-Nearest Neighbor (***k*NN**)
204
+ * Kernel Principal Component Analysis (**KPCA**)
205
+ * Linear Model Deviation-base Outlier Detection (**LMDD**)
206
+ * Local Outlier Factor (**LOF**)
207
+ * Local Correlation Integral (**LOCI**)
208
+ * Lightweight Online Detector of Anomalies (**LODA**)
209
+ * Locally Selective Combination of Parallel Outlier Ensembles (**LSCP**)
210
+ * GNN-based Anomaly Detection Method (**LUNAR**)
211
+ * Median Absolute Deviation (**MAD**)
212
+ * Minimum Covariance Determinant (**MCD**)
213
+ * One-Class SVM (**OCSVM**)
214
+ * Principal Component Analysis (**PCA**)
215
+ * Quasi-Monte Carlo Discrepancy Outlier Detection (**QMCD**)
216
+ * Rotation-based Outlier Detection (**ROD**)
217
+ * Subspace Outlier Detection (**SOD**)
218
+ * Scalable Unsupervised Outlier Detection (**SUOD**)
219
+
220
+ # Contact
221
+ **Bug reporting:** [https://github.com/OliverHennhoefer/nonconform/issues](https://github.com/OliverHennhoefer/nonconform/issues)