ndscape 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ndscape/README.md ADDED
@@ -0,0 +1,86 @@
1
+ # ndscape
2
+
3
+ Fit, score, and embed nested-dichotomy (ND) trees for multi-class classification.
4
+
5
+ A nested dichotomy reduces a C-class problem to a tree of binary splits
6
+ (e.g. {0,1,2} vs {3,4}, then {0} vs {1,2}, ...). ndscape lets you fit one,
7
+ score it, or place a whole population of candidate trees in a 2-D
8
+ "tree-space" to see how a property (accuracy, variance, ...) varies across
9
+ tree structures.
10
+
11
+ ## Install
12
+
13
+ ```
14
+ pip install ndscape
15
+ pip install ndscape[spatial] # adds Moran's I support (esda, libpysal)
16
+ ```
17
+
18
+ ## Quickstart
19
+
20
+ ```python
21
+ from sklearn.datasets import load_iris
22
+ from sklearn.model_selection import train_test_split
23
+ import ndscape as nds
24
+
25
+ X, y = load_iris(return_X_y=True)
26
+ classes = sorted(set(y))
27
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
28
+
29
+ nd = nds.fit(X_train, y_train, classes=classes, base="lr")
30
+ nd.predict(X_test)
31
+ nd.score(X_test, y_test) # {"accuracy": ..., "logloss": ...}
32
+ ```
33
+
34
+ `classes` is the list of class labels in your `y` (`sorted(set(y))` works for
35
+ integer or string labels). `nds.fit` samples one ND tree automatically; pass
36
+ `tree=...` to use a specific one (see "A `tree` is..." below).
37
+
38
+ ## Use cases
39
+
40
+ **You have a dataset and a binary classifier.**
41
+
42
+ `base` can be the string `"lr"` or `"decisiontree"`, or your own unfitted
43
+ scikit-learn estimator — a fresh clone of it is fit at every split.
44
+
45
+ ```python
46
+ from sklearn.svm import SVC
47
+
48
+ nd = nds.fit(X_train, y_train, classes=classes, base=SVC(probability=True, kernel="linear"))
49
+ ```
50
+
51
+ **You have a train/test split and want a score.**
52
+
53
+ ```python
54
+ nd = nds.ND(tree, classes).fit(X_train, y_train, base="lr")
55
+ nd.score(X_test, y_test) # {"accuracy": ..., "logloss": ...}
56
+ ```
57
+
58
+ **You already trained the per-split models yourself.**
59
+
60
+ ```python
61
+ # models in the same order as tree, or a {(left, right): model} dict — either works
62
+ nd = nds.ND.from_trained(tree, classes, models=[fitted_model_1, fitted_model_2, ...])
63
+ nd.predict_proba(X_test)
64
+ ```
65
+
66
+ **You already scored a set of trees and want to see where they sit in tree-space.**
67
+
68
+ ```python
69
+ trees, coords = nds.embed_trees(classes)
70
+ nds.spatial_autocorrelation(my_scores, coords) # {"I": ..., "p_sim": ...}
71
+ ```
72
+
73
+ **You just want the whole picture: fit, score, and embed every candidate tree.**
74
+
75
+ ```python
76
+ rows = nds.analyze(X_train, y_train, classes, X_test=X_test, y_test=y_test, base="lr")
77
+ # [{"tree": ..., "accuracy": ..., "logloss": ..., "coord": array([...])}, ...]
78
+ ```
79
+
80
+ A `tree` is a list of `(left, right)` tuples of class labels, e.g.
81
+ `[((0, 1), (2, 3)), ((0,), (1,)), ((2,), (3,))]`. Use `nds.all_trees(classes)`
82
+ (exhaustive, for small C) or `nds.sample_trees(classes, N)` (for larger C)
83
+ to generate candidates.
84
+
85
+ `base` accepts `"lr"`, `"decisiontree"`, or any unfitted scikit-learn
86
+ estimator with `fit`/`predict_proba`.
ndscape/__init__.py ADDED
@@ -0,0 +1,21 @@
1
+ """ndscape: fit, score, and embed nested-dichotomy (ND) trees for multi-class classification."""
2
+
3
+ from .api import (
4
+ ND,
5
+ all_trees,
6
+ analyze,
7
+ embed_trees,
8
+ fit,
9
+ sample_trees,
10
+ spatial_autocorrelation,
11
+ )
12
+
13
+ __all__ = [
14
+ "ND",
15
+ "all_trees",
16
+ "analyze",
17
+ "embed_trees",
18
+ "fit",
19
+ "sample_trees",
20
+ "spatial_autocorrelation",
21
+ ]
ndscape/api.py ADDED
@@ -0,0 +1,367 @@
1
+ """Fit, score, and embed nested-dichotomy (ND) trees on real datasets.
2
+
3
+ Self-contained: this module does not import core.py, config.py, or anything
4
+ else from the rest of the repo, so it can be packaged and installed on its
5
+ own via pip.
6
+
7
+ A tree is a list of (left, right) tuples of class labels, e.g.
8
+ [((0, 1), (2, 3)), ((0,), (1,)), ((2,), (3,))]
9
+ read top-down: first split {0,1} vs {2,3}, then split each side further.
10
+ """
11
+
12
+ from itertools import combinations
13
+ from math import comb, sqrt
14
+
15
+ import numpy as np
16
+ from sklearn.base import clone
17
+ from sklearn.feature_extraction.text import TfidfVectorizer
18
+ from sklearn.linear_model import LogisticRegression
19
+ from sklearn.manifold import MDS
20
+ from sklearn.metrics.pairwise import cosine_similarity
21
+ from sklearn.tree import DecisionTreeClassifier
22
+
23
+
24
+ # ---------------------------------------------------------------- models
25
+
26
+ def _get_model(base):
27
+ """Resolve `base` to a fresh, unfitted binary classifier.
28
+
29
+ `base` is either a string shorthand ("lr", "decisiontree") or an actual
30
+ unfitted scikit-learn estimator, e.g. SVC(probability=True) or your own
31
+ model implementing fit/predict_proba. A fresh clone is made per split.
32
+ """
33
+ if isinstance(base, str):
34
+ if base == "lr":
35
+ return LogisticRegression(
36
+ penalty="l2", solver="newton-cholesky", C=0.1, max_iter=2000
37
+ )
38
+ if base in ("decisiontree", "dt"):
39
+ return DecisionTreeClassifier(
40
+ criterion="entropy", min_samples_leaf=5, random_state=0
41
+ )
42
+ raise ValueError(
43
+ f"Unknown base model '{base}'. Pass 'lr', 'decisiontree', "
44
+ "or an unfitted scikit-learn estimator."
45
+ )
46
+ return clone(base)
47
+
48
+
49
+ # ---------------------------------------------------------------- tree enumeration
50
+
51
+ def _gen(labels):
52
+ labels = tuple(sorted(labels))
53
+ if len(labels) <= 1:
54
+ yield ()
55
+ return
56
+ s = labels[0]
57
+ for r in range(1, len(labels)):
58
+ for rest in combinations(labels[1:], r - 1):
59
+ left = tuple(sorted((s,) + rest))
60
+ right = tuple(x for x in labels if x not in left)
61
+ pair = tuple(sorted((left, right), key=lambda t: (-len(t), t)))
62
+ L = ((),) if len(left) == 1 else tuple(_gen(left))
63
+ R = ((),) if len(right) == 1 else tuple(_gen(right))
64
+ for lt in L:
65
+ for rt in R:
66
+ yield (pair,) + lt + rt
67
+
68
+
69
+ def all_trees(classes):
70
+ """Enumerate every ND tree over `classes` (exhaustive — use sample_trees for many classes)."""
71
+ return list(_gen(tuple(classes)))
72
+
73
+
74
+ def _sample_tree_indices(n, N):
75
+ """Sample up to N ND trees over positions 0..n-1, proportionally balanced."""
76
+ tree_counts = [0, 1, 1] + [0] * max(0, n - 2)
77
+ for m in range(3, n + 1):
78
+ tree_counts[m] = (2 * m - 3) * tree_counts[m - 1]
79
+
80
+ def _draw(classes, budget):
81
+ m = len(classes)
82
+ if m <= 1:
83
+ return [()]
84
+ if m == 2:
85
+ return [((tuple(classes[:1]), tuple(classes[1:])),)]
86
+ if budget == 1:
87
+ k = m // 2
88
+ right, left = tuple(classes[k:]), tuple(classes[:k])
89
+ return [((right, left),) + _draw(list(right), 1)[0] + _draw(list(left), 1)[0]]
90
+ left_sizes = range(1, m // 2 + 1)
91
+ weights = [
92
+ comb(m, k) // (2 if 2 * k == m else 1) * tree_counts[k] * tree_counts[m - k]
93
+ for k in left_sizes
94
+ ]
95
+ total = sum(weights)
96
+ alloc = [budget * w // total for w in weights]
97
+ for i in sorted(range(len(weights)), key=lambda i: -(budget * weights[i] % total))[
98
+ : budget - sum(alloc)
99
+ ]:
100
+ alloc[i] += 1
101
+ out = []
102
+ for k, k_budget in zip(left_sizes, alloc):
103
+ if k_budget == 0:
104
+ continue
105
+ n_combos = comb(m, k) // (2 if 2 * k == m else 1)
106
+ per_combo, remainder = divmod(k_budget, n_combos)
107
+ seen = 0
108
+ for left in combinations(classes, k):
109
+ if 2 * k == m and classes[0] not in left:
110
+ continue
111
+ combo_budget = per_combo + (seen < remainder)
112
+ seen += 1
113
+ if combo_budget == 0:
114
+ if per_combo == 0 and seen >= remainder:
115
+ break
116
+ continue
117
+ left = tuple(left)
118
+ right = tuple(x for x in classes if x not in left)
119
+ left_budget = int(sqrt(combo_budget * tree_counts[k] / tree_counts[m - k]) + 0.5)
120
+ left_budget = max(left_budget, (combo_budget + tree_counts[m - k] - 1) // tree_counts[m - k])
121
+ left_budget = min(left_budget, tree_counts[k])
122
+ right_budget = (combo_budget + left_budget - 1) // left_budget
123
+ drawn = 0
124
+ for right_sub in _draw(list(right), right_budget):
125
+ for left_sub in _draw(list(left), left_budget):
126
+ out.append(((right, left),) + right_sub + left_sub)
127
+ drawn += 1
128
+ if drawn == combo_budget:
129
+ break
130
+ if drawn == combo_budget:
131
+ break
132
+ return out
133
+
134
+ return _draw(list(range(n)), min(N, tree_counts[n]))
135
+
136
+
137
+ def _relabel_tree(tree, classes):
138
+ m = dict(enumerate(classes))
139
+ return [(tuple(m[i] for i in left), tuple(m[i] for i in right)) for left, right in tree]
140
+
141
+
142
+ def sample_trees(classes, N):
143
+ """Sample up to N ND trees over `classes`, proportionally balanced by subtree size."""
144
+ classes = list(classes)
145
+ raw = _sample_tree_indices(len(classes), N)
146
+ return [_relabel_tree(t, classes) for t in raw]
147
+
148
+
149
+ # ---------------------------------------------------------------- ND
150
+
151
+ class ND:
152
+ """A single nested-dichotomy tree fitted to a dataset.
153
+
154
+ Parameters
155
+ ----------
156
+ tree : list of (left, right) tuples of class labels.
157
+ classes : the full set of class labels, in the order predict_proba columns follow.
158
+ models : optional, already-fitted binary classifiers — skips .fit(). Either
159
+ a list of fitted models in the same order as `tree`, or a dict mapping
160
+ (left, right) -> fitted model.
161
+ """
162
+
163
+ def __init__(self, tree, classes, models=None):
164
+ self.tree = [(tuple(left), tuple(right)) for left, right in tree]
165
+ self.classes = list(classes)
166
+ if models is None:
167
+ self.models = {}
168
+ elif isinstance(models, dict):
169
+ self.models = dict(models)
170
+ else:
171
+ self.models = dict(zip(self.tree, models))
172
+
173
+ @classmethod
174
+ def from_trained(cls, tree, classes, models):
175
+ """Wrap a tree whose per-split binary models are already fitted.
176
+
177
+ `models` is a list of fitted models in the same order as `tree`,
178
+ or a dict mapping (left, right) -> fitted model.
179
+ """
180
+ return cls(tree, classes, models=models)
181
+
182
+ def fit(self, X, y, base="lr"):
183
+ """Fit one binary classifier per split on (X, y). Returns self.
184
+
185
+ `base` is "lr", "decisiontree", or any unfitted scikit-learn
186
+ estimator, e.g. base=SVC(probability=True, kernel="linear").
187
+ """
188
+ X, y = np.asarray(X), np.asarray(y)
189
+ for left, right in self.tree:
190
+ mask = np.isin(y, left + right)
191
+ y_node = np.where(np.isin(y[mask], left), 0, 1)
192
+ if len(np.unique(y_node)) < 2:
193
+ self.models[(left, right)] = None
194
+ continue
195
+ model = _get_model(base)
196
+ model.fit(X[mask], y_node)
197
+ self.models[(left, right)] = model
198
+ return self
199
+
200
+ def predict_proba(self, X):
201
+ """Class-probability matrix, columns ordered as `self.classes`."""
202
+ X = np.asarray(X)
203
+ idx = {c: i for i, c in enumerate(self.classes)}
204
+ proba = np.ones((X.shape[0], len(self.classes)))
205
+ for left, right in self.tree:
206
+ model = self.models.get((left, right))
207
+ li = [idx[c] for c in left]
208
+ ri = [idx[c] for c in right]
209
+ if model is None:
210
+ proba[:, ri] = 0.0
211
+ continue
212
+ p = model.predict_proba(X)
213
+ proba[:, li] *= p[:, 0:1]
214
+ proba[:, ri] *= p[:, 1:2]
215
+ row_sums = proba.sum(axis=1, keepdims=True)
216
+ return proba / np.where(row_sums == 0, 1.0, row_sums)
217
+
218
+ def predict(self, X):
219
+ """Predicted class label per row."""
220
+ proba = self.predict_proba(X)
221
+ return np.asarray(self.classes)[proba.argmax(axis=1)]
222
+
223
+ def score(self, X, y):
224
+ """Accuracy and mean log-loss of this ND on (X, y)."""
225
+ y = np.asarray(y)
226
+ proba = self.predict_proba(X)
227
+ pred = np.asarray(self.classes)[proba.argmax(axis=1)]
228
+ idx = {c: i for i, c in enumerate(self.classes)}
229
+ y_idx = np.array([idx[v] for v in y])
230
+ logloss = -np.log(np.clip(proba[np.arange(len(y)), y_idx], 1e-15, 1.0)).mean()
231
+ return {"accuracy": float((pred == y).mean()), "logloss": float(logloss)}
232
+
233
+
234
+ def fit(X, y, classes, tree=None, base="lr"):
235
+ """Fit a single ND on (X, y).
236
+
237
+ If `tree` is omitted, one tree is sampled automatically. `base` is "lr",
238
+ "decisiontree", or any unfitted scikit-learn estimator you want to use
239
+ at each split, e.g. base=SVC(probability=True, kernel="linear").
240
+ """
241
+ classes = list(classes)
242
+ if tree is None:
243
+ tree = sample_trees(classes, 1)[0]
244
+ return ND(tree, classes).fit(X, y, base=base)
245
+
246
+
247
+ # ---------------------------------------------------------------- embedding + spatial stats
248
+
249
+ def _split_analyzer(n_classes):
250
+ """TF-IDF token-analyser function for ND trees (same scheme used to build the paper's artifacts)."""
251
+ def _analyze(doc):
252
+ splits = doc
253
+ splits = sorted(splits, key=lambda lr: -len(set(lr[0]) | set(lr[1])))
254
+
255
+ max_depth = min(8, n_classes - 2)
256
+ small_group = 3
257
+ all_classes = frozenset(x for L, R in splits for x in tuple(L) + tuple(R))
258
+ depth = {all_classes: 0}
259
+ extra_depth_w = max(0, n_classes - 6)
260
+ toks = []
261
+
262
+ for L, R in splits:
263
+ U = frozenset(L) | frozenset(R)
264
+ d = depth[U]
265
+ if len(L) > 1:
266
+ depth[frozenset(L)] = d + 1
267
+ if len(R) > 1:
268
+ depth[frozenset(R)] = d + 1
269
+ if d > max_depth:
270
+ continue
271
+
272
+ L, R = tuple(sorted(L)), tuple(sorted(R))
273
+ A, B = sorted([L, R], key=lambda x: (-len(x), x))
274
+ dbin = min(d, 5)
275
+ w = max(1, max_depth + 1 - d)
276
+
277
+ toks.extend([f"sz:{len(A)}|{len(B)}"] * max(1, w // 2))
278
+ if d == 0:
279
+ toks.extend([f"root:{A}|{B}"] * 2)
280
+ toks.extend([f"root_sz:{len(A)}|{len(B)}"] * 4)
281
+
282
+ for i in A:
283
+ for j in B:
284
+ a, b = sorted((i, j))
285
+ toks.extend([f"sep:{a}-{b}:sz:{len(A)}|{len(B)}"] * max(1, w // 2))
286
+ toks.append(f"sep:{a}-{b}:d{dbin}")
287
+ if d == 0:
288
+ toks.extend([f"root_sep:{a}-{b}"] * 2)
289
+ toks.extend([f"root_sep:{a}-{b}:sz:{len(A)}|{len(B)}"] * 2)
290
+
291
+ for side in (A, B):
292
+ for i, j in combinations(side, 2):
293
+ toks.append(f"same:{i}-{j}")
294
+ if n_classes >= 7 and 1 < len(side) <= small_group:
295
+ toks.extend([f"small_group_size:{len(side)}:d{dbin}"] * extra_depth_w)
296
+
297
+ return toks
298
+ return _analyze
299
+
300
+
301
+ def embed_trees(classes, trees=None, N=2000, seed=0, dim=2):
302
+ """Place a set of ND trees in `dim`-D tree-space (TF-IDF + cosine distance + MDS).
303
+
304
+ If `trees` is omitted, all trees are used for <=7 classes and a balanced
305
+ sample of N otherwise. Returns (trees, coords); coords[i] is the
306
+ embedding of trees[i].
307
+ """
308
+ classes = list(classes)
309
+ n = len(classes)
310
+ if trees is None:
311
+ trees = all_trees(classes) if n <= 7 else sample_trees(classes, N)
312
+
313
+ vec = TfidfVectorizer(analyzer=_split_analyzer(n), use_idf=False, norm="l2")
314
+ tfidf = vec.fit_transform(trees)
315
+ sim = cosine_similarity(tfidf)
316
+ dist = np.sqrt(np.maximum(0, 2.0 * (1.0 - sim)))
317
+ np.fill_diagonal(dist, 0.0)
318
+
319
+ mds = MDS(
320
+ n_components=dim, dissimilarity="precomputed",
321
+ random_state=seed, n_init=1, max_iter=1000, eps=1e-6, n_jobs=-1,
322
+ )
323
+ coords = mds.fit_transform(dist)
324
+ return trees, coords
325
+
326
+
327
+ def spatial_autocorrelation(values, coords, k=50, permutations=999):
328
+ """Global Moran's I for `values` over a tree-space embedding.
329
+
330
+ Needs the optional 'spatial' extra: pip install ndscape[spatial]
331
+ """
332
+ try:
333
+ from esda import Moran
334
+ from libpysal.weights import KNN
335
+ except ImportError as e:
336
+ raise ImportError(
337
+ "spatial_autocorrelation needs esda and libpysal. "
338
+ "Install with: pip install ndscape[spatial]"
339
+ ) from e
340
+
341
+ import warnings
342
+ k = min(k, len(coords) - 1)
343
+ with warnings.catch_warnings():
344
+ warnings.simplefilter("ignore")
345
+ w = KNN.from_array(coords, k=k)
346
+ w.transform = "r"
347
+ mi = Moran(np.asarray(values), w, permutations=permutations)
348
+ return {"I": mi.I, "p_sim": mi.p_sim}
349
+
350
+
351
+ def analyze(X, y, classes, X_test=None, y_test=None, base="lr", N=2000, seed=0):
352
+ """Fit every candidate ND tree on (X, y), score it, and place it in tree-space.
353
+
354
+ If X_test/y_test are omitted, scores are computed on (X, y). Returns a
355
+ list of dicts: tree, accuracy, logloss, coord.
356
+ """
357
+ classes = list(classes)
358
+ Xte = X if X_test is None else X_test
359
+ yte = y if y_test is None else y_test
360
+
361
+ trees, coords = embed_trees(classes, N=N, seed=seed)
362
+ rows = []
363
+ for tree, coord in zip(trees, coords):
364
+ nd = ND(tree, classes).fit(X, y, base=base)
365
+ s = nd.score(Xte, yte)
366
+ rows.append({"tree": tree, "accuracy": s["accuracy"], "logloss": s["logloss"], "coord": coord})
367
+ return rows
@@ -0,0 +1,105 @@
1
+ Metadata-Version: 2.4
2
+ Name: ndscape
3
+ Version: 0.1.0
4
+ Summary: Fit, score, and embed nested-dichotomy (ND) trees for multi-class classification.
5
+ Author: Maxwell Dix-Matthews
6
+ License-Expression: MIT
7
+ Project-URL: Repository, https://github.com/res-lucid/ndscape
8
+ Keywords: nested-dichotomy,multi-class,classification,scikit-learn
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Topic :: Scientific/Engineering
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: numpy>=1.24
15
+ Requires-Dist: scikit-learn>=1.3
16
+ Provides-Extra: spatial
17
+ Requires-Dist: esda>=2.6; extra == "spatial"
18
+ Requires-Dist: libpysal>=4.10; extra == "spatial"
19
+
20
+ # ndscape
21
+
22
+ Fit, score, and embed nested-dichotomy (ND) trees for multi-class classification.
23
+
24
+ A nested dichotomy reduces a C-class problem to a tree of binary splits
25
+ (e.g. {0,1,2} vs {3,4}, then {0} vs {1,2}, ...). ndscape lets you fit one,
26
+ score it, or place a whole population of candidate trees in a 2-D
27
+ "tree-space" to see how a property (accuracy, variance, ...) varies across
28
+ tree structures.
29
+
30
+ ## Install
31
+
32
+ ```
33
+ pip install ndscape
34
+ pip install ndscape[spatial] # adds Moran's I support (esda, libpysal)
35
+ ```
36
+
37
+ ## Quickstart
38
+
39
+ ```python
40
+ from sklearn.datasets import load_iris
41
+ from sklearn.model_selection import train_test_split
42
+ import ndscape as nds
43
+
44
+ X, y = load_iris(return_X_y=True)
45
+ classes = sorted(set(y))
46
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
47
+
48
+ nd = nds.fit(X_train, y_train, classes=classes, base="lr")
49
+ nd.predict(X_test)
50
+ nd.score(X_test, y_test) # {"accuracy": ..., "logloss": ...}
51
+ ```
52
+
53
+ `classes` is the list of class labels in your `y` (`sorted(set(y))` works for
54
+ integer or string labels). `nds.fit` samples one ND tree automatically; pass
55
+ `tree=...` to use a specific one (see "A `tree` is..." below).
56
+
57
+ ## Use cases
58
+
59
+ **You have a dataset and a binary classifier.**
60
+
61
+ `base` can be the string `"lr"` or `"decisiontree"`, or your own unfitted
62
+ scikit-learn estimator — a fresh clone of it is fit at every split.
63
+
64
+ ```python
65
+ from sklearn.svm import SVC
66
+
67
+ nd = nds.fit(X_train, y_train, classes=classes, base=SVC(probability=True, kernel="linear"))
68
+ ```
69
+
70
+ **You have a train/test split and want a score.**
71
+
72
+ ```python
73
+ nd = nds.ND(tree, classes).fit(X_train, y_train, base="lr")
74
+ nd.score(X_test, y_test) # {"accuracy": ..., "logloss": ...}
75
+ ```
76
+
77
+ **You already trained the per-split models yourself.**
78
+
79
+ ```python
80
+ # models in the same order as tree, or a {(left, right): model} dict — either works
81
+ nd = nds.ND.from_trained(tree, classes, models=[fitted_model_1, fitted_model_2, ...])
82
+ nd.predict_proba(X_test)
83
+ ```
84
+
85
+ **You already scored a set of trees and want to see where they sit in tree-space.**
86
+
87
+ ```python
88
+ trees, coords = nds.embed_trees(classes)
89
+ nds.spatial_autocorrelation(my_scores, coords) # {"I": ..., "p_sim": ...}
90
+ ```
91
+
92
+ **You just want the whole picture: fit, score, and embed every candidate tree.**
93
+
94
+ ```python
95
+ rows = nds.analyze(X_train, y_train, classes, X_test=X_test, y_test=y_test, base="lr")
96
+ # [{"tree": ..., "accuracy": ..., "logloss": ..., "coord": array([...])}, ...]
97
+ ```
98
+
99
+ A `tree` is a list of `(left, right)` tuples of class labels, e.g.
100
+ `[((0, 1), (2, 3)), ((0,), (1,)), ((2,), (3,))]`. Use `nds.all_trees(classes)`
101
+ (exhaustive, for small C) or `nds.sample_trees(classes, N)` (for larger C)
102
+ to generate candidates.
103
+
104
+ `base` accepts `"lr"`, `"decisiontree"`, or any unfitted scikit-learn
105
+ estimator with `fit`/`predict_proba`.
@@ -0,0 +1,7 @@
1
+ ndscape/README.md,sha256=U1lcEA4PwlA4n1qMZtGgihcYTG87uGisv5izJMAbTU8,2804
2
+ ndscape/__init__.py,sha256=WOl8A_jepMOzgOBcRQKVjpYNypItDUg-pbeH8bhiSsc,366
3
+ ndscape/api.py,sha256=b_a1Ua3bPjJXwxuEP2wf_k6P3CxS8gkGp6BRKg-Nw1Q,13990
4
+ ndscape-0.1.0.dist-info/METADATA,sha256=ugL8cvnr07nx-na8EgyR1r7D2EBeCwKhZUA7YTarZ8s,3513
5
+ ndscape-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ ndscape-0.1.0.dist-info/top_level.txt,sha256=hHP_1QS6GvgT6GH80LoAsLP6sUbxhWlEYJHFXsrKwZw,8
7
+ ndscape-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ ndscape