robust-mixed-dist 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,666 @@
1
+ import polars as pl
2
+ import numpy as np
3
+ import pandas as pd
4
+ from itertools import product
5
+ from scipy.spatial import distance
6
+ from scipy.spatial.distance import pdist, squareform, cdist
7
+ from scipy import sparse
8
+
9
+ ################################################################################
10
+
11
+ def euclidean_dist_matrix(X):
12
+ """
13
+ Calculates the Euclidean distance matrix for a data matrix using SciPy.
14
+
15
+ Parameters (inputs)
16
+ ----------
17
+ X: a Pandas or Polars DataFrame or a NumPy array. It represents a data matrix.
18
+
19
+ Returns (outputs)
20
+ -------
21
+ M: the Euclidean distance matrix between the rows of `X`.
22
+ """
23
+
24
+ # Convert to NumPy array if input is a DataFrame.
25
+ if isinstance(X, pl.DataFrame):
26
+ X = X.to_numpy()
27
+ if isinstance(X, pd.DataFrame):
28
+ X = X.to_numpy()
29
+
30
+ # Compute the pairwise distances using pdist and convert to a square form.
31
+ M = squareform(pdist(X, metric='euclidean'))
32
+
33
+ return M
34
+
35
+ ################################################################################
36
+
37
+ def euclidean_dist(xi, xr) :
38
+ """
39
+ Calculates the Euclidean distance between a pair of vectors.
40
+
41
+ Parameters (inputs)
42
+ ----------
43
+ xi, xr: a pair of Pandas or Polars Series or DataFrames.
44
+ They represent a couple of statistical observations of quantitative variables.
45
+
46
+ Returns (outputs)
47
+ -------
48
+ The Euclidean distance between the observations `xi` and `xr`.
49
+ """
50
+
51
+ if isinstance(xi, (pl.DataFrame, pd.DataFrame)) :
52
+ xi = xi.to_numpy().flatten()
53
+ elif isinstance(xi, (pd.Series, pl.Series)) :
54
+ xi = xi.to_numpy()
55
+ if isinstance(xr, (pl.DataFrame, pd.DataFrame)) :
56
+ xr = xr.to_numpy().flatten()
57
+ elif isinstance(xr, (pd.Series, pl.Series)) :
58
+ xr = xr.to_numpy()
59
+
60
+ return distance.euclidean(xi, xr)
61
+
62
+ ################################################################################
63
+
64
+ def minkowski_dist_matrix(X, q):
65
+ """
66
+ Calculates the Minkowski distance matrix for a data matrix using SciPy.
67
+
68
+ Parameters (inputs)
69
+ ----------
70
+ X: a Pandas or Polars DataFrame or a NumPy array. It represents a data matrix.
71
+ q: the parameters that defines the Minkowski form. Some particular cases: q=1 := Manhattan, q=2 := Euclidean.
72
+
73
+ Returns (outputs)
74
+ -------
75
+ M: the Minkowski(`q`) distance matrix between the rows of `X`.
76
+ """
77
+
78
+ if isinstance(X, pl.DataFrame):
79
+ X = X.to_numpy()
80
+ if isinstance(X, pd.DataFrame):
81
+ X = X.to_numpy()
82
+
83
+ # Compute the pairwise distances using pdist and convert to a square form.
84
+ M = squareform(pdist(X, metric='minkowski', p=q))
85
+
86
+ return M
87
+
88
+ ################################################################################
89
+
90
+ def minkowski_dist(xi, xr, q) :
91
+ """
92
+ Calculates the Minkowski distance between a pair of vectors.
93
+
94
+ Parameters (inputs)
95
+ ----------
96
+ xi, xr: a pair of quantitative vectors. They represent a couple of statistical observations.
97
+ q: the parameters that defines the Minkowski form. Some particular cases: q=1 := Manhattan, q=2 := Euclidean.
98
+
99
+ Returns (outputs)
100
+ -------
101
+ The Minkowki(`q`) distance between the observations `xi` and `xr`.
102
+ """
103
+
104
+ if isinstance(xi, (pl.DataFrame, pd.DataFrame)) :
105
+ xi = xi.to_numpy().flatten()
106
+ elif isinstance(xi, (pd.Series, pl.Series)) :
107
+ xi = xi.to_numpy()
108
+ if isinstance(xr, (pl.DataFrame, pd.DataFrame)) :
109
+ xr = xr.to_numpy().flatten()
110
+ elif isinstance(xr, (pd.Series, pl.Series)) :
111
+ xr = xr.to_numpy()
112
+
113
+ return distance.minkowski(xi, xr, q)
114
+
115
+ ################################################################################
116
+
117
+ def canberra_dist_matrix(X):
118
+ """
119
+ Calculates the Canberra distance matrix for a data matrix using SciPy.
120
+
121
+ Parameters (inputs)
122
+ ----------
123
+ X: a pandas/polars DataFrame or a NumPy array. It represents a data matrix.
124
+
125
+ Returns (outputs)
126
+ -------
127
+ M: the Canberra distance matrix between the rows of `X`.
128
+ """
129
+
130
+ if isinstance(X, pl.DataFrame):
131
+ X = X.to_numpy()
132
+ if isinstance(X, pd.DataFrame):
133
+ X = X.to_numpy()
134
+
135
+ # Compute the pairwise distances using pdist and convert to a square form.
136
+ M = squareform(pdist(X, metric='canberra'))
137
+
138
+ return M
139
+
140
+ ################################################################################
141
+
142
+ def canberra_dist(xi, xr) :
143
+ """
144
+ Calculates the Canberra distance between a pair of vectors.
145
+
146
+ Parameters (inputs)
147
+ ----------
148
+ xi, xr: a pair of quantitative vectors. They represent a couple of statistical observations.
149
+
150
+ Returns (outputs)
151
+ -------
152
+ The Canberra distance between the observations `xi` and `xr`.
153
+ """
154
+
155
+ if isinstance(xi, (pl.DataFrame, pd.DataFrame)) :
156
+ xi = xi.to_numpy().flatten()
157
+ elif isinstance(xi, (pd.Series, pl.Series)) :
158
+ xi = xi.to_numpy()
159
+ if isinstance(xr, (pl.DataFrame, pd.DataFrame)) :
160
+ xr = xr.to_numpy().flatten()
161
+ elif isinstance(xr, (pd.Series, pl.Series)) :
162
+ xr = xr.to_numpy()
163
+
164
+ return distance.canberra(xi, xr)
165
+
166
+ ################################################################################
167
+
168
+ def pearson_dist_matrix(X):
169
+ """
170
+ Calculates the Pearson distance matrix for a data matrix using SciPy.
171
+
172
+ Parameters (inputs)
173
+ ----------
174
+ X: a pandas/polars DataFrame or a NumPy array. It represents a data matrix.
175
+
176
+ Returns (outputs)
177
+ -------
178
+ M: the Pearson distance matrix between the rows of X.
179
+ """
180
+
181
+ if isinstance(X, pl.DataFrame):
182
+ X = X.to_numpy()
183
+ if isinstance(X, pd.DataFrame):
184
+ X = X.to_numpy()
185
+
186
+ # Compute the pairwise distances using pdist and convert to a square form.
187
+ M = squareform(pdist(X, metric='seuclidean'))
188
+
189
+ return M
190
+
191
+ ################################################################################
192
+
193
+ def mahalanobis_dist_matrix(X):
194
+ """
195
+ Calculates the classical Mahalanobis distance matrix for a data matrix `X`.
196
+
197
+ Parameters
198
+ ----------
199
+ X : pandas.DataFrame, polars.DataFrame, or np.ndarray
200
+ Data matrix of shape (n_samples, n_features).
201
+
202
+ Returns
203
+ -------
204
+ D : np.ndarray
205
+ Symmetric matrix (n_samples x n_samples) of Mahalanobis distances.
206
+ """
207
+
208
+ # Convert to numpy array if needed
209
+ if isinstance(X, pl.DataFrame):
210
+ X = X.to_numpy()
211
+ elif isinstance(X, pd.DataFrame):
212
+ X = X.to_numpy()
213
+
214
+ # Center the data
215
+ X_centered = X - np.mean(X, axis=0)
216
+
217
+ # Classical covariance matrix
218
+ S = np.cov(X_centered, rowvar=False)
219
+
220
+ # Use pseudo-inverse for numerical stability
221
+ S_pinv = np.linalg.pinv(S)
222
+
223
+ # Symmetrize just in case
224
+ S_pinv = (S_pinv + S_pinv.T) / 2
225
+
226
+ # Compute Mahalanobis distance matrix
227
+ D = cdist(X_centered, X_centered, metric='mahalanobis', VI=S_pinv)
228
+
229
+ return D
230
+
231
+ ################################################################################
232
+
233
+ def mahalanobis_dist(xi, xr, S) :
234
+ """
235
+ Calculates the Mahalanobis distance between a pair of vectors.
236
+
237
+ Parameters (inputs)
238
+ ----------
239
+ xi, xr: a pair of quantitative vectors. They represent a couple of statistical observations.
240
+ S: the covariance matrix of the data matrix to which `xi` and `xr` belong.
241
+
242
+ Returns (outputs)
243
+ -------
244
+ The Mahalanobis distance between the observations `xi` and `xr`.
245
+ """
246
+
247
+ if isinstance(xi, (pl.DataFrame, pd.DataFrame)) :
248
+ xi = xi.to_numpy().flatten()
249
+ elif isinstance(xi, (pd.Series, pl.Series)) :
250
+ xi = xi.to_numpy()
251
+ if isinstance(xr, (pl.DataFrame, pd.DataFrame)) :
252
+ xr = xr.to_numpy().flatten()
253
+ elif isinstance(xr, (pd.Series, pl.Series)) :
254
+ xr = xr.to_numpy()
255
+
256
+ S_inv = np.linalg.inv(S)
257
+ dist = distance.mahalanobis(xi, xr, S_inv)
258
+
259
+ return dist
260
+
261
+ ################################################################################
262
+
263
+ def mad(Xj) :
264
+ """
265
+ Compute the median absolute deviation of a statistical variable.
266
+
267
+ Parameters (inputs)
268
+ ----------
269
+ Xj: a vector representing a quantitative statistical variable.
270
+
271
+ Returns (outputs)
272
+ -------
273
+ MAD: median absolute deviation of `Xj`.
274
+ """
275
+
276
+ if isinstance(Xj, pl.Series):
277
+ Xj = Xj.to_numpy()
278
+ elif isinstance(Xj, pd.Series):
279
+ Xj = Xj.to_numpy()
280
+
281
+ mad_ = np.median(np.abs(Xj - np.median(Xj)))
282
+
283
+ return mad_
284
+
285
+ ################################################################################
286
+
287
+ def Xj_trimmed(Xj, alpha) :
288
+ """
289
+ Compute the trimmed version of a statistical variable.
290
+
291
+ Parameters (inputs)
292
+ ----------
293
+ Xj : a vector representing a quantitative statistical variable.
294
+ alpha : a real number in [0,1] that defines the trimming level.
295
+
296
+ Returns (outputs)
297
+ -------
298
+ result: the `alpha` trimmed version of `Xj`.
299
+ """
300
+
301
+ if isinstance(Xj, pl.Series):
302
+ Xj = Xj.to_numpy()
303
+ elif isinstance(Xj, pd.Series):
304
+ Xj = Xj.to_numpy()
305
+
306
+ lower_bound = np.quantile(Xj, q=alpha/2)
307
+ upper_bound = np.quantile(Xj, q=1-alpha/2)
308
+ mask = np.logical_and(Xj >= lower_bound, Xj <= upper_bound)
309
+ Xj_trimmed_ = Xj[mask]
310
+
311
+ return Xj_trimmed_
312
+
313
+ ################################################################################
314
+
315
+ def Xj_winsorized(Xj, alpha):
316
+ """
317
+ Compute the winsorized version of a quantitative variable.
318
+
319
+ Parameters
320
+ ----------
321
+ Xj : a vector representing a quantitative statistical variable.
322
+ alpha : a real number in [0,1] that defines the winsorizing level.
323
+
324
+ Returns
325
+ -------
326
+ result: the `alpha` winsorized version of Xj.
327
+ """
328
+
329
+ if isinstance(Xj, pl.Series):
330
+ Xj = Xj.to_numpy()
331
+ elif isinstance(Xj, pd.Series):
332
+ Xj = Xj.to_numpy()
333
+
334
+ # If Xj is a vector of zeros, return Xj.
335
+ if np.all(Xj == 0):
336
+ return Xj
337
+
338
+ lower_bound = np.quantile(Xj, q=alpha/2)
339
+ upper_bound = np.quantile(Xj, q=1-alpha/2)
340
+
341
+ # Clip the values: values smaller than lower_bound are set to lower_bound,
342
+ # those larger than upper_bound are set to upper_bound,
343
+ # and the ones in the middle are not transform.
344
+ Xj_winsorized_ = np.clip(Xj, lower_bound, upper_bound)
345
+
346
+ return Xj_winsorized_
347
+
348
+ ################################################################################
349
+
350
+ def robust_var(Xj, method, alpha=None) :
351
+ """
352
+ Compute the robust variance of `Xj` allowing different methods.
353
+
354
+ Parameters
355
+ ----------
356
+ Xj : a vector representing a quantitative statistical variable.
357
+ method: the method to be used for computing the robust variance of `Xj`. Must be an string in ['MAD', 'trimmed', 'winsorized'].
358
+ alpha : a real number in [0,1] that is used if `method` is 'trimmed' or 'winsorized'.
359
+
360
+ Returns
361
+ -------
362
+ result: the robust variance of `Xj` computed by the method `method`.
363
+ """
364
+
365
+ if method == 'MAD' :
366
+
367
+ return mad(Xj)**2
368
+
369
+ if method == 'trimmed' :
370
+
371
+ return np.var(Xj_trimmed(Xj, alpha))
372
+
373
+ if method == 'winsorized' :
374
+
375
+ return np.var(Xj_winsorized(Xj, alpha))
376
+
377
+ ################################################################################
378
+
379
+ def robust_corr(Xj, Xr, method, alpha=None) :
380
+ """
381
+ Compute the robust correlation between `Xj` and `Xr` by different methods.
382
+
383
+ Parameters
384
+ ----------
385
+ Xj, Xr : two vectors representing a quantitative statistical variables.
386
+ method: the method to be used for computing the robust variance of `Xj`. Must be an string in ['MAD', 'trimmed', 'winsorized'].
387
+ alpha : a real number in [0,1] that is used if `method` is 'trimmed' or 'winsorized'.
388
+
389
+ Returns
390
+ -------
391
+ result: the robust correlation between `Xj` and `Xr` computed by the method `method`.
392
+ """
393
+
394
+ if isinstance(Xj, pl.Series):
395
+ Xj = Xj.to_numpy()
396
+ elif isinstance(Xj, pd.Series):
397
+ Xj = Xj.to_numpy()
398
+ if isinstance(Xr, pl.Series):
399
+ Xr = Xr.to_numpy()
400
+ elif isinstance(Xr, pd.Series):
401
+ Xr = Xr.to_numpy()
402
+
403
+ # Si la varianza robusta de X_j es cero, la version estandarizada de X_j es la propia X_j.
404
+ if robust_var(Xj, method, alpha) == 0 :
405
+ Xj_std = Xj
406
+ # Si la varianza robusta de X_j es distinta de cero.
407
+ else :
408
+ # Se estandariza X_j como se especifica en la seccion 7.2.2.
409
+ Xj_std = Xj / np.sqrt(robust_var(Xj, method, alpha))
410
+ # Si la varianza robusta de X_r es cero, la version estandarizada de X_r es la propia X_r.
411
+ if np.sqrt(robust_var(Xr, method, alpha)) == 0 :
412
+ Xr_std = Xr
413
+ # Si la varianza robusta de X_res distinta de cero.
414
+ else :
415
+ # Se estandariza X_r como se especifica en la seccion 7.2.2.
416
+ Xr_std = Xr / np.sqrt(robust_var(Xr, method, alpha))
417
+
418
+ # Se calcula la correlacion robusta como se especifica en la seccion 7.2.2, evitando problemas de divisionalidad.
419
+ robust_var_3 = robust_var(Xj_std + Xr_std, method, alpha)
420
+ robust_var_4 = robust_var(Xj_std - Xr_std, method, alpha)
421
+ if (robust_var_3 + robust_var_4) == 0 :
422
+ robust_corr = (robust_var_3 - robust_var_4)
423
+ else :
424
+ robust_corr = (robust_var_3 - robust_var_4) / (robust_var_3 + robust_var_4)
425
+ return robust_corr
426
+
427
+ ################################################################################
428
+
429
+ def R_robust(X, method, alpha=None) :
430
+ """
431
+ Computes the robust correlation matrix of a given data matrix `X`.
432
+
433
+ Parameters
434
+ ----------
435
+ X : a pandas/polars data-frame or a numpy array.
436
+ method : the method used to compute the robust correlation matrix. Must be an string in ['MAD', 'trimmed', 'winsorized'].
437
+ alpha : a real number in [0,1] that is used if `method` is 'trimmed' or 'winsorized'.
438
+
439
+ Returns
440
+ -------
441
+ M : the robust correlation matrix for `X`.
442
+ """
443
+
444
+ if isinstance(X, pl.DataFrame):
445
+ X = X.to_numpy()
446
+ elif isinstance(X, pd.DataFrame):
447
+ X = X.to_numpy()
448
+
449
+ p = X.shape[1]
450
+ M = np.zeros((p,p))
451
+
452
+ for j,r in product(range(0,p), range(0,p)) :
453
+
454
+ M[j,r] = robust_corr(Xj=X[:,j], Xr=X[:,r], method=method, alpha=alpha)
455
+
456
+ return M
457
+
458
+ ################################################################################
459
+
460
+ def delvin_trans(M, epsilon=0.05) :
461
+ """
462
+ Applies the Delvin transformation for the matrix `M` passed as input to make it positive definite or closer to it.
463
+
464
+ Parameters (inputs)
465
+ ----------
466
+ M : a pandas/polars data-frame or a numpy array.
467
+ epsilon : parameter involved in the Delvin transformation that must be a close to zero positive number. epsilon=0.05 is recommended.
468
+
469
+ Returns (outputs)
470
+ -------
471
+ M : the Delvin transformation of the input matrix `M`.
472
+ """
473
+
474
+ if isinstance(M, pl.DataFrame):
475
+ M = M.to_numpy()
476
+ elif isinstance(M, pd.DataFrame):
477
+ M = M.to_numpy()
478
+
479
+ # Se define la funcion z.
480
+ def z(x) :
481
+ return np.arctanh(x)
482
+
483
+ # Se define la funcion z^{-1}
484
+ def z_inv(x) :
485
+ # La arctanh es la inversa de tanh, por tanto, la inversa de arctanh es tanh.
486
+ return np.tanh(x)
487
+
488
+ # Se define la funcion g.
489
+ def g(i,j, M) :
490
+ if i == j :
491
+ return 1
492
+ else:
493
+ if np.abs(M[i,j]) <= z(epsilon) :
494
+ return 0
495
+ elif M[i,j] < - z(epsilon) :
496
+ return z_inv(M[i,j] + epsilon)
497
+ elif M[i,j] > z(epsilon) :
498
+ return z_inv(M[i,j] - epsilon)
499
+
500
+ # Se crea una matriz cuyos elementos son el resultado de aplicar la funcion g sobre matrix elemento a elemento.
501
+ p = M.shape[1]
502
+ M_new = np.zeros((p,p))
503
+
504
+ for i,j in product(range(0,p), range(0,p)) :
505
+
506
+ M_new[i,j] = g(i,j, M)
507
+
508
+ return M_new
509
+
510
+ ################################################################################
511
+
512
+ def delvin_algorithm(M, epsilon, n_iters):
513
+ """
514
+ Applies the Delvin algorithm on the matrix `M` passed as input
515
+ to make it positive definite by applying on it the Delvin transformation as many iterations as needed.
516
+
517
+ Parameters (inputs)
518
+ ----------
519
+ M: a pandas/polars data-frame or a numpy array.
520
+ epsilon : parameter used by the Delvin transformation. epsilon=0.05 is recommended.
521
+ n_iter : maximum number of iterations run by the algorithm.
522
+
523
+ Returns (outputs)
524
+ -------
525
+ M_new : the resulting matrix of applying the Delvin algorithm on `M`.
526
+ """
527
+
528
+ M_new = M.copy()
529
+ # Se inicializa i=0 para entrar en el bucle while
530
+ i = 0
531
+ # Mientras i sea inferior o igual a n_iter, el bucle continua ejecutandose.
532
+ while i < n_iters:
533
+ # Si new_matrix ya es definida positiva (todos sus autovalores son positivos), se devuelve new_matrix.
534
+ # En otro caso, se le aplica la transformacion de Delvin y se vuelve a comprobar si es definida positiva.
535
+ if np.all(np.linalg.eigvals(M_new) > 0):
536
+ return M_new, i
537
+ else:
538
+ M_new = delvin_trans(M=M_new, epsilon=epsilon)
539
+ i = i + 1
540
+
541
+ return M_new, i
542
+
543
+ ################################################################################
544
+
545
+ def S_robust(X, method, epsilon, n_iters, alpha=None, weights=None):
546
+ """
547
+ Computes the robust covariance of the data matrix `X` by different methods.
548
+
549
+ Parameters (inputs)
550
+ ----------
551
+ X: a pandas/polars data-frame or a numpy array.
552
+ method: the method to be used to compute the robust covariance. Must be an string in ['MAD', 'trimmed', 'winsorized'].
553
+ alpha : a real number in [0,1] that is used if `method` is 'trimmed' or 'winsorized'.
554
+ epsilon : parameter used by the Delvin transformation. epsilon=0.05 is recommended.
555
+ n_iter : maximum number of iterations run by the Delvin algorithm.
556
+ weights: the sample weights. Only used if provided.
557
+
558
+ Returns (outputs)
559
+ -------
560
+ S_robust : the robust covariance matrix computed for `X`.
561
+ """
562
+
563
+ if isinstance(X, pl.DataFrame):
564
+ X = X.to_numpy()
565
+ elif isinstance(X, pd.DataFrame):
566
+ X = X.to_numpy()
567
+
568
+ if weights is None:
569
+ # Se calcula la matriz de correlaciones robustas para Data.
570
+ R_robust_ = R_robust(X, method, alpha)
571
+ # Se aplica el algoritmo de Delvin a la matriz de correlaciones robustas calculada.
572
+ R_robust_, i = delvin_algorithm(M=R_robust_, epsilon=epsilon, n_iters=n_iters)
573
+ # Se calcula la matriz de covarianzas robustas a partir de la matriz de correlaciones robustas.
574
+ S_robust = np.diag(np.std(X, axis=0)) @ R_robust_ @ np.diag(np.std(X, axis=0))
575
+
576
+ else:
577
+ w = weights
578
+ n = len(X)
579
+ Dw = sparse.diags(w)
580
+ I = np.identity(n)
581
+ ones_arr = np.ones(n)
582
+ Jw = np.sqrt(Dw) @ (I - ones_arr @ w.T)
583
+ Xw = Jw @ X # Computational problems when n is too large since is an n x n matrix.
584
+ # Se calcula la matriz de correlaciones robustas para Data.
585
+ R_robust_ = R_robust(Xw, method, alpha)
586
+ # Se aplica el algoritmo de Delvin a la matriz de correlaciones robustas calculada.
587
+ R_robust_, i = delvin_algorithm(M=R_robust_, epsilon=epsilon, n_iters=n_iters)
588
+ # Se calcula la matriz de covarianzas robustas a partir de la matriz de correlaciones robustas.
589
+ S_robust = np.diag(np.std(Xw, axis=0)) @ R_robust_ @ np.diag(np.std(Xw, axis=0))
590
+
591
+ return S_robust
592
+
593
+ ################################################################################
594
+
595
+ def robust_maha_dist_matrix(X, S_robust):
596
+ """
597
+ Calculates the Robust Mahalanobis distance matrix for a data matrix `X`
598
+ using a robust estimation of the covariance matrix.
599
+
600
+ Parameters
601
+ ----------
602
+ X : pandas.DataFrame, polars.DataFrame, or np.ndarray
603
+ The input data matrix with shape (n_samples, n_features).
604
+
605
+ S_robust : np.ndarray
606
+ Robust covariance matrix (e.g., from MCD or a trimmed estimator).
607
+ Should be of shape (n_features, n_features).
608
+
609
+ Returns
610
+ -------
611
+ D : np.ndarray
612
+ Symmetric matrix (n_samples, n_samples) of Mahalanobis distances.
613
+ """
614
+
615
+ # Convert input to NumPy array if needed
616
+ if isinstance(X, pl.DataFrame):
617
+ X = X.to_numpy()
618
+ elif isinstance(X, pd.DataFrame):
619
+ X = X.to_numpy()
620
+
621
+ # Center the data (important for Mahalanobis)
622
+ X_centered = X - np.mean(X, axis=0)
623
+
624
+ # Use pseudo-inverse for stability
625
+ S_robust_pinv = np.linalg.pinv(S_robust)
626
+
627
+ # Force symmetry (sometimes lost by numerical error)
628
+ S_robust_pinv = (S_robust_pinv + S_robust_pinv.T) / 2
629
+
630
+ # Compute pairwise Mahalanobis distances with cdist
631
+ D = cdist(X_centered, X_centered, metric='mahalanobis', VI=S_robust_pinv)
632
+
633
+ return D
634
+
635
+ ################################################################################
636
+
637
+ def robust_maha_dist(xi, xr, S_robust) :
638
+ """
639
+ Calculates the Robust Mahalanobis distance between a pair of vectors.
640
+
641
+ Parameters (inputs)
642
+ ----------
643
+ xi, xr: a pair of quantitative vectors. They represent a couple of statistical observations.
644
+ S_robust: the robust covariance matrix of the data matrix to which `xi` and `xr` belong.
645
+
646
+ Returns (outputs)
647
+ -------
648
+ The Robust Mahalanobis distance between the observations `xi` and `xr`.
649
+ """
650
+
651
+ if isinstance(xi, (pl.DataFrame, pd.DataFrame)) :
652
+ xi = xi.to_numpy().flatten()
653
+ elif isinstance(xi, (pd.Series, pl.Series)) :
654
+ xi = xi.to_numpy()
655
+ if isinstance(xr, (pl.DataFrame, pd.DataFrame)) :
656
+ xr = xr.to_numpy().flatten()
657
+ elif isinstance(xr, (pd.Series, pl.Series)) :
658
+ xr = xr.to_numpy()
659
+
660
+ X = np.array([xi, xr])
661
+ dist_xi_xr = robust_maha_dist_matrix(X, S_robust)
662
+ dist_xi_xr = dist_xi_xr[0,1]
663
+
664
+ return dist_xi_xr
665
+
666
+ ################################################################################
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.4
2
+ Name: robust-mixed-dist
3
+ Version: 0.1.0
4
+ Summary: For more information, check out the official documentation of `robust_mixed_dist` at: https://fabioscielzoortiz.github.io/robust_mixed_dist-docu/intro.html
5
+ Home-page: https://github.com/FabioScielzoOrtiz/robust_mixed_dist-package
6
+ Author: Fabio Scielzo Ortiz
7
+ Author-email: fabio.scielzoortiz@gmail.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Dynamic: author
15
+ Dynamic: author-email
16
+ Dynamic: classifier
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license-file
21
+ Dynamic: requires-python
22
+ Dynamic: summary
23
+
24
+ # robust_mixed_dist
25
+
26
+ For more information, check out the official documentation of `robust_mixed_dist` at: https://fabioscielzoortiz.github.io/robust_mixed_dist-docu/intro.html
27
+
@@ -0,0 +1,10 @@
1
+ robust_mixed_dist/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ robust_mixed_dist/binary.py,sha256=n_RIANha7PDeeP8qKTizJQtA5zTP5KoOjcIY8vDkRjE,3322
3
+ robust_mixed_dist/mixed.py,sha256=mtHjh8e3ahxq51X0ri74N1O31OMQMUpmxvzmyJbsJVc,39403
4
+ robust_mixed_dist/multiclass.py,sha256=diUMIvP_O3BlOlMxz6Q7HIkmdDu18Pl9bbiszTHjweI,1778
5
+ robust_mixed_dist/quantitative.py,sha256=_wiIhyuwEjHW5twlYDCCfK8qXITPT8XPFz1wBQRq7Ho,22397
6
+ robust_mixed_dist-0.1.0.dist-info/licenses/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
7
+ robust_mixed_dist-0.1.0.dist-info/METADATA,sha256=D_e66QD2y5LVZvyjvwox9NgKBQjUv3HQtnLb2xepGQA,1004
8
+ robust_mixed_dist-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ robust_mixed_dist-0.1.0.dist-info/top_level.txt,sha256=kQcI1A0TrhkUiY8uvP0QHpZMPOwuLq-KojGhJoW9cjs,18
10
+ robust_mixed_dist-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+