overlapindex 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ Copyright (c) 2026 Niklas M. Melton
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,158 @@
1
+ Metadata-Version: 2.4
2
+ Name: overlapindex
3
+ Version: 0.1.0
4
+ Summary: OverlapIndex (OI), an Incremental Cluster Validity index for identifying the degree of overlap of data classes.
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Keywords: incremental cluster validity,cluster validity,ART,machine learning,transfer learning,clustering
8
+ Author: Niklas M. Melton
9
+ Author-email: niklasmelton@gmail.com
10
+ Requires-Python: >=3.9
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Requires-Dist: artlib (>=0.1.7,<0.2.0)
20
+ Requires-Dist: numpy (>=2.4.1,<3.0.0)
21
+ Project-URL: Documentation, https://github.com/NiklasMelton/OverlapIndex
22
+ Project-URL: Homepage, https://github.com/NiklasMelton/OverlapIndex
23
+ Project-URL: Repository, https://github.com/NiklasMelton/OverlapIndex
24
+ Description-Content-Type: text/markdown
25
+
26
+ # OverlapIndex (OI)
27
+
28
+ This package provides an implementation of the **Overlap Index (OI)**, an *incremental cluster validity index (iCVI)* designed to quantify the degree of overlap between data classes or clusters. The OI is updated online, sample by sample or in batches, and is particularly suited for streaming, continual learning, and representation analysis.
29
+
30
+ The implementation is built on **ARTMAP-based clustering** (Fuzzy ART or Hypersphere
31
+ ART), leveraging the dynamic clustering properties of Adaptive Resonance Theory to
32
+ track class overlap as new data (and classes) arrive.
33
+
34
+ ---
35
+
36
+ ## Overview
37
+
38
+ The Overlap Index is bounded in the interval **[0, 1]** and has the following interpretation:
39
+
40
+ - **OI = 1.0**
41
+ Indicates perfect class separation (no overlap).
42
+
43
+ - **OI = 0.5**
44
+ Indicates complete overlap between classes.
45
+
46
+ - **OI < 0.5**
47
+ Indicates a degenerate or pathological case in the data distribution.
48
+
49
+ The index is computed incrementally by tracking shared cluster activations between pairs of classes and aggregating class-wise overlap into a global measure.
50
+
51
+ ---
52
+
53
+ ## Key Properties
54
+
55
+ - **Incremental / Online**
56
+ Supports streaming updates via `add_sample` and mini-batch updates via `add_batch`.
57
+ New classes can be introduced at any time, enabling analysis of incremental
58
+ learning scenarios.
59
+
60
+ - **Label-Aware**
61
+ Can be applied both to labeled raw data and to intermediate representations (e.g., neural network activations).
62
+
63
+ - **Geometry-Agnostic**
64
+ Works well on arbitrary geometric structures of data. No geometric constraints are
65
+ assumed.
66
+
67
+ ---
68
+
69
+ ## Typical Use Cases
70
+
71
+ The Overlap Index can be used in several settings:
72
+
73
+ - **Unsupervised clustering evaluation**
74
+ As an iCVI, OI provides insight into the quality of a clustering partition as it evolves over time.
75
+
76
+ - **Class separability analysis**
77
+ Measures the degree of overlap in labeled datasets without requiring a classifier.
78
+
79
+ - **Representation monitoring in deep learning**
80
+ Tracks how class separation changes across layers or training epochs.
81
+
82
+ - **Backbone evaluation for transfer learning**
83
+ Compares feature extractors, where higher OI values indicate better class separation in the learned representation.
84
+
85
+ ---
86
+
87
+ ## Implementation Notes
88
+
89
+ - ART-based clustering is performed using `artlib`’s `FuzzyARTMAP` or `HypersphereARTMAP`.
90
+ - Inputs are **complement coded**, following standard ART practice.
91
+ - Overlap is estimated by monitoring shared best-matching units (BMUs) between class pairs.
92
+ - The global OI is computed as the mean of per-class minimum pairwise overlap scores.
93
+
94
+ ---
95
+
96
+ ## Basic Usage
97
+
98
+ from overlap_index import OverlapIndex
99
+
100
+ oi = OverlapIndex(
101
+ rho=0.9,
102
+ ART="Hypersphere",
103
+ match_tracking="MT+"
104
+ )
105
+
106
+ # Incremental update
107
+ for x, y in stream:
108
+ score = oi.add_sample(x, y)
109
+
110
+ # Or batch update
111
+ score = oi.add_batch(X, Y)
112
+
113
+ The returned value is the current Overlap Index after the update.
114
+
115
+ ---
116
+
117
+ ## Parameters
118
+
119
+ - `rho` *(float)*
120
+ Vigilance parameter controlling cluster granularity.
121
+
122
+ - `r_hat` *(float, Hypersphere ART only)*
123
+ Maximum cluster radius.
124
+
125
+ - `ART` *("Fuzzy" | "Hypersphere")*
126
+ Choice of ART module.
127
+
128
+ - `match_tracking` *(str)*
129
+ Match-tracking strategy used during ARTMAP learning.
130
+
131
+ The default parameters are likely to satisfy most use cases. For very large datasets,
132
+ it may be necessary to use smaller `rho` values (0.5-0.7) to improve run-time
133
+ performance.
134
+
135
+ ---
136
+
137
+ ## Output
138
+
139
+ - **`index`**
140
+ Global Overlap Index across all observed classes.
141
+
142
+ - **`singleton_index[y]`**
143
+ Minimum pairwise overlap score for class `y`.
144
+
145
+ - **`pairwise_index[(y, b)]`**
146
+ Pairwise overlap score between classes `y` and `b`.
147
+
148
+ ---
149
+
150
+ ## Intended Audience
151
+
152
+ This package is intended for researchers and practitioners working on:
153
+
154
+ - incremental and continual learning,
155
+ - clustering validation,
156
+ - representation learning,
157
+ - transfer learning
158
+
@@ -0,0 +1,132 @@
1
+ # OverlapIndex (OI)
2
+
3
+ This package provides an implementation of the **Overlap Index (OI)**, an *incremental cluster validity index (iCVI)* designed to quantify the degree of overlap between data classes or clusters. The OI is updated online, sample by sample or in batches, and is particularly suited for streaming, continual learning, and representation analysis.
4
+
5
+ The implementation is built on **ARTMAP-based clustering** (Fuzzy ART or Hypersphere
6
+ ART), leveraging the dynamic clustering properties of Adaptive Resonance Theory to
7
+ track class overlap as new data (and classes) arrive.
8
+
9
+ ---
10
+
11
+ ## Overview
12
+
13
+ The Overlap Index is bounded in the interval **[0, 1]** and has the following interpretation:
14
+
15
+ - **OI = 1.0**
16
+ Indicates perfect class separation (no overlap).
17
+
18
+ - **OI = 0.5**
19
+ Indicates complete overlap between classes.
20
+
21
+ - **OI < 0.5**
22
+ Indicates a degenerate or pathological case in the data distribution.
23
+
24
+ The index is computed incrementally by tracking shared cluster activations between pairs of classes and aggregating class-wise overlap into a global measure.
25
+
26
+ ---
27
+
28
+ ## Key Properties
29
+
30
+ - **Incremental / Online**
31
+ Supports streaming updates via `add_sample` and mini-batch updates via `add_batch`.
32
+ New classes can be introduced at any time, enabling analysis of incremental
33
+ learning scenarios.
34
+
35
+ - **Label-Aware**
36
+ Can be applied both to labeled raw data and to intermediate representations (e.g., neural network activations).
37
+
38
+ - **Geometry-Agnostic**
39
+ Works well on arbitrary geometric structures of data. No geometric constraints are
40
+ assumed.
41
+
42
+ ---
43
+
44
+ ## Typical Use Cases
45
+
46
+ The Overlap Index can be used in several settings:
47
+
48
+ - **Unsupervised clustering evaluation**
49
+ As an iCVI, OI provides insight into the quality of a clustering partition as it evolves over time.
50
+
51
+ - **Class separability analysis**
52
+ Measures the degree of overlap in labeled datasets without requiring a classifier.
53
+
54
+ - **Representation monitoring in deep learning**
55
+ Tracks how class separation changes across layers or training epochs.
56
+
57
+ - **Backbone evaluation for transfer learning**
58
+ Compares feature extractors, where higher OI values indicate better class separation in the learned representation.
59
+
60
+ ---
61
+
62
+ ## Implementation Notes
63
+
64
+ - ART-based clustering is performed using `artlib`’s `FuzzyARTMAP` or `HypersphereARTMAP`.
65
+ - Inputs are **complement coded**, following standard ART practice.
66
+ - Overlap is estimated by monitoring shared best-matching units (BMUs) between class pairs.
67
+ - The global OI is computed as the mean of per-class minimum pairwise overlap scores.
68
+
69
+ ---
70
+
71
+ ## Basic Usage
72
+
73
+ from overlap_index import OverlapIndex
74
+
75
+ oi = OverlapIndex(
76
+ rho=0.9,
77
+ ART="Hypersphere",
78
+ match_tracking="MT+"
79
+ )
80
+
81
+ # Incremental update
82
+ for x, y in stream:
83
+ score = oi.add_sample(x, y)
84
+
85
+ # Or batch update
86
+ score = oi.add_batch(X, Y)
87
+
88
+ The returned value is the current Overlap Index after the update.
89
+
90
+ ---
91
+
92
+ ## Parameters
93
+
94
+ - `rho` *(float)*
95
+ Vigilance parameter controlling cluster granularity.
96
+
97
+ - `r_hat` *(float, Hypersphere ART only)*
98
+ Maximum cluster radius.
99
+
100
+ - `ART` *("Fuzzy" | "Hypersphere")*
101
+ Choice of ART module.
102
+
103
+ - `match_tracking` *(str)*
104
+ Match-tracking strategy used during ARTMAP learning.
105
+
106
+ The default parameters are likely to satisfy most use cases. For very large datasets,
107
+ it may be necessary to use smaller `rho` values (0.5-0.7) to improve run-time
108
+ performance.
109
+
110
+ ---
111
+
112
+ ## Output
113
+
114
+ - **`index`**
115
+ Global Overlap Index across all observed classes.
116
+
117
+ - **`singleton_index[y]`**
118
+ Minimum pairwise overlap score for class `y`.
119
+
120
+ - **`pairwise_index[(y, b)]`**
121
+ Pairwise overlap score between classes `y` and `b`.
122
+
123
+ ---
124
+
125
+ ## Intended Audience
126
+
127
+ This package is intended for researchers and practitioners working on:
128
+
129
+ - incremental and continual learning,
130
+ - clustering validation,
131
+ - representation learning,
132
+ - transfer learning
@@ -0,0 +1,185 @@
1
+ import numpy as np
2
+ from artlib import HypersphereARTMAP, FuzzyARTMAP, complement_code
3
+ from typing import Literal
4
+ from collections import defaultdict
5
+
6
+
7
+ class GrowingArray1D:
8
+ def __init__(self, dtype=int):
9
+ self.array = np.zeros(0, dtype=dtype)
10
+
11
+ def _ensure_size(self, i):
12
+ if i >= self.array.size:
13
+ new_size = i + 1
14
+ new_array = np.zeros(new_size, dtype=self.array.dtype)
15
+ new_array[:self.array.size] = self.array
16
+ self.array = new_array
17
+
18
+ def __getitem__(self, i):
19
+ self._ensure_size(i)
20
+ return self.array[i]
21
+
22
+ def __setitem__(self, i, value):
23
+ self._ensure_size(i)
24
+ self.array[i] = value
25
+
26
+ def __iadd__(self, idx_value):
27
+ i, value = idx_value
28
+ self._ensure_size(i)
29
+ self.array[i] += value
30
+ return self
31
+
32
+ def __len__(self):
33
+ return len(self.array)
34
+
35
+ def __repr__(self):
36
+ return repr(self.array)
37
+
38
+ def asarray(self):
39
+ return self.array.copy()
40
+
41
+ def __iter__(self):
42
+ # iterate over the *current* contents only
43
+ for v in self.array:
44
+ yield v
45
+
46
+
47
+ def top_two_indices_against_others(T, classes, class_to_clusters, a):
48
+ T = np.asarray(T)
49
+ result = {}
50
+
51
+ clusters_a = class_to_clusters.get(a, set())
52
+
53
+ for b in classes:
54
+ if b == a:
55
+ continue
56
+
57
+ clusters_b = class_to_clusters.get(b, set())
58
+ cluster_indices = list(clusters_a | clusters_b)
59
+
60
+ if len(cluster_indices) == 0:
61
+ top2 = ()
62
+ elif len(cluster_indices) == 1:
63
+ top2 = (cluster_indices[0],)
64
+ else:
65
+ values = T[cluster_indices]
66
+ top2_rel = np.argpartition(values, -2)[-2:]
67
+ top2_sorted = top2_rel[np.argsort(values[top2_rel])[::-1]]
68
+ top2 = tuple(cluster_indices[i] for i in top2_sorted)
69
+
70
+ result[b] = top2
71
+
72
+ return result
73
+
74
+
75
+ class OverlapIndex:
76
+ def __init__(
77
+ self,
78
+ rho: float = 0.9,
79
+ r_hat: float = np.inf,
80
+ ART: Literal["Fuzzy", "Hypersphere"] = "Fuzzy",
81
+ match_tracking="MT+",
82
+ ):
83
+ assert ART in ["Fuzzy", "Hypersphere"]
84
+ if ART == "Fuzzy":
85
+ self.ARTMAP = FuzzyARTMAP(rho=rho, alpha=1e-10, beta=1.0)
86
+ else:
87
+ self.ARTMAP = HypersphereARTMAP(rho=rho, alpha=1e-10, beta=1.0, r_hat=r_hat)
88
+ self.ART = ART
89
+ self.sparse_adj = defaultdict(lambda: 0)
90
+ self.cluster_cardinality = GrowingArray1D()
91
+ self.rev_map = defaultdict(set)
92
+ self.pairwise_index = defaultdict(lambda: 1.0)
93
+ self.singleton_index = defaultdict(lambda: 1.0)
94
+ self.index = 1.0
95
+ self.match_tracking = match_tracking
96
+
97
+ @property
98
+ def module_a(self):
99
+ return self.ARTMAP.module_a
100
+
101
+ @property
102
+ def map(self):
103
+ return self.ARTMAP.map
104
+
105
+ def predict_subset_pairs(self, x, y):
106
+ assert len(self.module_a.W) >= 0, "ART module is not fit."
107
+ T, _ = zip(*[
108
+ self.module_a.category_choice(x, w, params=self.module_a.params)
109
+ for w in self.module_a.W
110
+ ])
111
+ classes = list(self.rev_map.keys())
112
+ top2bmu = top_two_indices_against_others(T, classes, self.rev_map, y)
113
+ return top2bmu
114
+
115
+ def add_sample(self, x, y):
116
+ x_prep = complement_code([x])
117
+ self.ARTMAP = self.ARTMAP.partial_fit(x_prep, [y],
118
+ match_tracking=self.match_tracking)
119
+ bmu1 = self.ARTMAP.module_a.labels_[-1]
120
+ self.rev_map[y].add(bmu1)
121
+
122
+ self.cluster_cardinality[y] += 1
123
+ top2bmu = self.predict_subset_pairs(x_prep, y)
124
+
125
+ if y not in self.singleton_index:
126
+ self.singleton_index[y] = 1.0
127
+ for b in self.rev_map.keys():
128
+ bmu2 = int(bmu1)
129
+ if b != y:
130
+ if len(top2bmu[b]) > 1:
131
+ bmu2_, bmu3_ = top2bmu[b]
132
+ if bmu2_ == bmu1:
133
+ bmu2 = bmu3_
134
+ else:
135
+ bmu2 = bmu2_
136
+ if bmu2 in self.rev_map[b]:
137
+ self.sparse_adj[(y, b)] += 1
138
+
139
+ self.pairwise_index[(y, b)] = 1. - (
140
+ float(self.sparse_adj[(y, b)]) /
141
+ float(self.cluster_cardinality[y])
142
+ )
143
+ if len(self.rev_map) > 1:
144
+ self.singleton_index[y] = min(
145
+ [self.pairwise_index[(y, b)] for b in self.rev_map.keys() if b != y]
146
+ )
147
+ self.index = np.mean(list(self.singleton_index.values()))
148
+ return self.index
149
+
150
+ def add_batch(self, X, Y):
151
+ X_prep = complement_code(X)
152
+ self.ARTMAP = self.ARTMAP.partial_fit(X_prep, Y,
153
+ match_tracking=self.match_tracking)
154
+ BMU1 = self.ARTMAP.module_a.labels_[-len(Y):]
155
+ for x, y, bmu1 in zip(X_prep, Y, BMU1):
156
+ self.rev_map[y].add(bmu1)
157
+ if y not in self.singleton_index:
158
+ self.singleton_index[y] = 1.0
159
+
160
+ self.cluster_cardinality[y] += 1
161
+ top2bmu = self.predict_subset_pairs(x, y) # eq 1 & 2
162
+
163
+ for b in self.rev_map.keys():
164
+ bmu2 = int(bmu1)
165
+ if b != y:
166
+ if len(top2bmu[b]) > 1:
167
+ bmu2_, bmu3_ = top2bmu[b]
168
+ if bmu2_ == bmu1:
169
+ bmu2 = bmu3_
170
+ else:
171
+ bmu2 = bmu2_
172
+ if bmu2 in self.rev_map[b]:
173
+ self.sparse_adj[(y, b)] += 1 # eq 3
174
+ self.pairwise_index[(y, b)] = 1. - (
175
+ float(self.sparse_adj[(y, b)]) /
176
+ float(self.cluster_cardinality[y])
177
+ ) # eq 4
178
+ unique_y = np.unique(Y)
179
+ if len(self.rev_map) > 1:
180
+ for y in unique_y:
181
+ self.singleton_index[y] = min(
182
+ [self.pairwise_index[(y, b)] for b in self.rev_map.keys() if b != y]
183
+ ) # eq 5
184
+ self.index = np.mean(list(self.singleton_index.values())) # eq 6
185
+ return self.index
@@ -0,0 +1,3 @@
1
+ from .OverlapIndex import OverlapIndex
2
+
3
+ __all__ = ["OverlapIndex"]
@@ -0,0 +1,22 @@
1
+ [tool.poetry]
2
+ name = "overlapindex"
3
+ version = "0.1.0"
4
+ description = "OverlapIndex (OI), an Incremental Cluster Validity index for identifying the degree of overlap of data classes."
5
+ authors = ["Niklas M. Melton <niklasmelton@gmail.com>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ homepage = "https://github.com/NiklasMelton/OverlapIndex"
9
+ repository = "https://github.com/NiklasMelton/OverlapIndex"
10
+ documentation = "https://github.com/NiklasMelton/OverlapIndex"
11
+ keywords = ["incremental cluster validity", "cluster validity", "ART", "machine learning", "transfer learning", "clustering"]
12
+
13
+ packages = [{ include = "overlapindex" }]
14
+
15
+ [tool.poetry.dependencies]
16
+ python = ">=3.9"
17
+ artlib = ">=0.1.7,<0.2.0"
18
+ numpy = ">=2.4.1,<3.0.0"
19
+
20
+ [build-system]
21
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
22
+ build-backend = "poetry.core.masonry.api"