arraykit 1.6.0__tar.gz → 1.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arraykit-1.6.0/arraykit.egg-info → arraykit-1.7.0}/PKG-INFO +9 -3
- {arraykit-1.6.0 → arraykit-1.7.0}/README.rst +8 -2
- arraykit-1.7.0/VERSION +2 -0
- {arraykit-1.6.0 → arraykit-1.7.0/arraykit.egg-info}/PKG-INFO +9 -3
- {arraykit-1.6.0 → arraykit-1.7.0}/arraykit.egg-info/SOURCES.txt +1 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/__init__.py +1 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/__init__.pyi +3 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/_arraykit.c +4 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/methods.c +143 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/methods.h +3 -0
- arraykit-1.7.0/test/test_group_ordering.py +231 -0
- arraykit-1.6.0/VERSION +0 -2
- {arraykit-1.6.0 → arraykit-1.7.0}/LICENSE.txt +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/MANIFEST.in +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/arraykit.egg-info/dependency_links.txt +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/arraykit.egg-info/requires.txt +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/arraykit.egg-info/top_level.txt +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/pyproject.toml +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/setup.cfg +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/setup.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/array_go.c +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/array_go.h +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/array_to_tuple.c +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/array_to_tuple.h +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/auto_map.c +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/auto_map.h +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/block_index.c +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/block_index.h +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/delimited_to_arrays.c +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/delimited_to_arrays.h +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/py.typed +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/tri_map.c +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/tri_map.h +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/src/utilities.h +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_array_go.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_astype_array.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_auto_map.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_auto_map_property.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_block_index.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_delimited_to_arrays.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_delimited_to_arrays_integration.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_delimited_to_arrays_property.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_factorize.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_nonzero_1d.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_nonzero_1d_property.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_objectable.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_pyi.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_split_after_count.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_transition_slices_from_group.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_tri_map.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_type_discovery.py +0 -0
- {arraykit-1.6.0 → arraykit-1.7.0}/test/test_util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: arraykit
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: Array utilities for StaticFrame
|
|
5
5
|
Author: Christopher Ariza, Brandt Bucher, Charles Burkland
|
|
6
6
|
License: MIT
|
|
@@ -64,14 +64,20 @@ ArrayKit requires the following:
|
|
|
64
64
|
What is New in ArrayKit
|
|
65
65
|
-------------------------
|
|
66
66
|
|
|
67
|
+
1.7.0
|
|
68
|
+
............
|
|
69
|
+
|
|
70
|
+
Added ``group_ordering()``.
|
|
71
|
+
|
|
72
|
+
|
|
67
73
|
1.6.0
|
|
68
74
|
............
|
|
69
|
-
Added ``factorize``.
|
|
75
|
+
Added ``factorize()``.
|
|
70
76
|
|
|
71
77
|
|
|
72
78
|
1.5.0
|
|
73
79
|
............
|
|
74
|
-
Added ``transition_slices_from_group``.
|
|
80
|
+
Added ``transition_slices_from_group()``.
|
|
75
81
|
|
|
76
82
|
|
|
77
83
|
1.4.0
|
|
@@ -35,14 +35,20 @@ ArrayKit requires the following:
|
|
|
35
35
|
What is New in ArrayKit
|
|
36
36
|
-------------------------
|
|
37
37
|
|
|
38
|
+
1.7.0
|
|
39
|
+
............
|
|
40
|
+
|
|
41
|
+
Added ``group_ordering()``.
|
|
42
|
+
|
|
43
|
+
|
|
38
44
|
1.6.0
|
|
39
45
|
............
|
|
40
|
-
Added ``factorize``.
|
|
46
|
+
Added ``factorize()``.
|
|
41
47
|
|
|
42
48
|
|
|
43
49
|
1.5.0
|
|
44
50
|
............
|
|
45
|
-
Added ``transition_slices_from_group``.
|
|
51
|
+
Added ``transition_slices_from_group()``.
|
|
46
52
|
|
|
47
53
|
|
|
48
54
|
1.4.0
|
arraykit-1.7.0/VERSION
ADDED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: arraykit
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: Array utilities for StaticFrame
|
|
5
5
|
Author: Christopher Ariza, Brandt Bucher, Charles Burkland
|
|
6
6
|
License: MIT
|
|
@@ -64,14 +64,20 @@ ArrayKit requires the following:
|
|
|
64
64
|
What is New in ArrayKit
|
|
65
65
|
-------------------------
|
|
66
66
|
|
|
67
|
+
1.7.0
|
|
68
|
+
............
|
|
69
|
+
|
|
70
|
+
Added ``group_ordering()``.
|
|
71
|
+
|
|
72
|
+
|
|
67
73
|
1.6.0
|
|
68
74
|
............
|
|
69
|
-
Added ``factorize``.
|
|
75
|
+
Added ``factorize()``.
|
|
70
76
|
|
|
71
77
|
|
|
72
78
|
1.5.0
|
|
73
79
|
............
|
|
74
|
-
Added ``transition_slices_from_group``.
|
|
80
|
+
Added ``transition_slices_from_group()``.
|
|
75
81
|
|
|
76
82
|
|
|
77
83
|
1.4.0
|
|
@@ -37,6 +37,7 @@ test/test_delimited_to_arrays.py
|
|
|
37
37
|
test/test_delimited_to_arrays_integration.py
|
|
38
38
|
test/test_delimited_to_arrays_property.py
|
|
39
39
|
test/test_factorize.py
|
|
40
|
+
test/test_group_ordering.py
|
|
40
41
|
test/test_nonzero_1d.py
|
|
41
42
|
test/test_nonzero_1d_property.py
|
|
42
43
|
test/test_objectable.py
|
|
@@ -26,6 +26,7 @@ from ._arraykit import split_after_count as split_after_count
|
|
|
26
26
|
from ._arraykit import get_new_indexers_and_screen as get_new_indexers_and_screen
|
|
27
27
|
from ._arraykit import write_array_to_file as write_array_to_file
|
|
28
28
|
from ._arraykit import factorize as factorize
|
|
29
|
+
from ._arraykit import group_ordering as group_ordering
|
|
29
30
|
from ._arraykit import count_iteration as count_iteration
|
|
30
31
|
from ._arraykit import first_true_1d as first_true_1d
|
|
31
32
|
from ._arraykit import first_true_2d as first_true_2d
|
|
@@ -227,6 +227,9 @@ def write_array_to_file(
|
|
|
227
227
|
def factorize(
|
|
228
228
|
array: np.ndarray, *, sort: bool = ...
|
|
229
229
|
) -> tp.Tuple[np.ndarray, np.ndarray]: ...
|
|
230
|
+
def group_ordering(
|
|
231
|
+
codes: np.ndarray, *, size: tp.Optional[int] = ...
|
|
232
|
+
) -> tp.Tuple[np.ndarray, np.ndarray]: ...
|
|
230
233
|
def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ...
|
|
231
234
|
def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ...
|
|
232
235
|
def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ...
|
|
@@ -985,6 +985,149 @@ first_true_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
|
|
|
985
985
|
return (PyObject *)array_pos;
|
|
986
986
|
}
|
|
987
987
|
|
|
988
|
+
static char *group_ordering_kwarg_names[] = {
|
|
989
|
+
"codes",
|
|
990
|
+
"size",
|
|
991
|
+
NULL
|
|
992
|
+
};
|
|
993
|
+
|
|
994
|
+
// Stable counting sort of dense factorize codes. Given `codes` in [0, size),
|
|
995
|
+
// return (permutation, offsets) such that permutation[offsets[g]:offsets[g+1]]
|
|
996
|
+
// are the input positions of group g, in ascending (stable) order. This is an
|
|
997
|
+
// O(n) alternative to np.argsort(codes, kind='stable') for already-dense codes.
|
|
998
|
+
PyObject *
|
|
999
|
+
group_ordering(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
|
|
1000
|
+
{
|
|
1001
|
+
PyArrayObject *codes = NULL;
|
|
1002
|
+
PyObject *size_obj = NULL;
|
|
1003
|
+
|
|
1004
|
+
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
|
|
1005
|
+
"O!|$O:group_ordering",
|
|
1006
|
+
group_ordering_kwarg_names,
|
|
1007
|
+
&PyArray_Type, &codes,
|
|
1008
|
+
&size_obj
|
|
1009
|
+
)) {
|
|
1010
|
+
return NULL;
|
|
1011
|
+
}
|
|
1012
|
+
if (PyArray_NDIM(codes) != 1) {
|
|
1013
|
+
PyErr_SetString(PyExc_ValueError, "Array must be 1-dimensional");
|
|
1014
|
+
return NULL;
|
|
1015
|
+
}
|
|
1016
|
+
if (PyArray_TYPE(codes) != NPY_INTP) {
|
|
1017
|
+
PyErr_SetString(PyExc_ValueError, "Array must be of type intp");
|
|
1018
|
+
return NULL;
|
|
1019
|
+
}
|
|
1020
|
+
if (!PyArray_IS_C_CONTIGUOUS(codes)) {
|
|
1021
|
+
PyErr_SetString(PyExc_ValueError, "Array must be contiguous");
|
|
1022
|
+
return NULL;
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
npy_intp n = PyArray_SIZE(codes);
|
|
1026
|
+
npy_intp *codes_buffer = (npy_intp*)PyArray_DATA(codes);
|
|
1027
|
+
|
|
1028
|
+
// Determine the number of groups: caller-provided, else max(codes) + 1.
|
|
1029
|
+
npy_intp size = 0;
|
|
1030
|
+
int size_given = (size_obj != NULL && size_obj != Py_None);
|
|
1031
|
+
if (size_given) {
|
|
1032
|
+
size = (npy_intp)PyNumber_AsSsize_t(size_obj, PyExc_OverflowError);
|
|
1033
|
+
if (size == -1 && PyErr_Occurred()) {
|
|
1034
|
+
return NULL;
|
|
1035
|
+
}
|
|
1036
|
+
if (size < 0) {
|
|
1037
|
+
PyErr_SetString(PyExc_ValueError, "size must be non-negative");
|
|
1038
|
+
return NULL;
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
else {
|
|
1042
|
+
for (npy_intp i = 0; i < n; i++) {
|
|
1043
|
+
npy_intp c = codes_buffer[i];
|
|
1044
|
+
if (c < 0) {
|
|
1045
|
+
PyErr_SetString(PyExc_ValueError, "codes must be non-negative");
|
|
1046
|
+
return NULL;
|
|
1047
|
+
}
|
|
1048
|
+
// guard c + 1 against signed overflow (undefined behavior)
|
|
1049
|
+
if (c == NPY_MAX_INTP) {
|
|
1050
|
+
PyErr_SetString(PyExc_OverflowError,
|
|
1051
|
+
"cannot infer size: code value too large");
|
|
1052
|
+
return NULL;
|
|
1053
|
+
}
|
|
1054
|
+
if (c + 1 > size) {
|
|
1055
|
+
size = c + 1;
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
// offsets has length size + 1; guard that against signed overflow (covers
|
|
1061
|
+
// both a caller-provided size and an inferred one)
|
|
1062
|
+
if (size == NPY_MAX_INTP) {
|
|
1063
|
+
PyErr_SetString(PyExc_OverflowError, "size too large");
|
|
1064
|
+
return NULL;
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
PyObject *perm_arr = NULL;
|
|
1068
|
+
PyObject *offsets_arr = NULL;
|
|
1069
|
+
npy_intp *cursor = NULL;
|
|
1070
|
+
|
|
1071
|
+
perm_arr = PyArray_EMPTY(1, &n, NPY_INTP, 0);
|
|
1072
|
+
if (!perm_arr) {
|
|
1073
|
+
goto fail;
|
|
1074
|
+
}
|
|
1075
|
+
npy_intp size_plus = size + 1;
|
|
1076
|
+
offsets_arr = PyArray_ZEROS(1, &size_plus, NPY_INTP, 0);
|
|
1077
|
+
if (!offsets_arr) {
|
|
1078
|
+
goto fail;
|
|
1079
|
+
}
|
|
1080
|
+
npy_intp *perm = (npy_intp*)PyArray_DATA((PyArrayObject*)perm_arr);
|
|
1081
|
+
npy_intp *offsets = (npy_intp*)PyArray_DATA((PyArrayObject*)offsets_arr);
|
|
1082
|
+
|
|
1083
|
+
// Count pass: tally each group into offsets[c + 1]. When size was inferred
|
|
1084
|
+
// the codes are already known to be in [0, size); only a caller-provided
|
|
1085
|
+
// size needs the range validated here.
|
|
1086
|
+
for (npy_intp i = 0; i < n; i++) {
|
|
1087
|
+
npy_intp c = codes_buffer[i];
|
|
1088
|
+
if (size_given && (c < 0 || c >= size)) {
|
|
1089
|
+
PyErr_Format(PyExc_ValueError,
|
|
1090
|
+
"code %zd out of range [0, %zd)",
|
|
1091
|
+
(Py_ssize_t)c, (Py_ssize_t)size);
|
|
1092
|
+
goto fail;
|
|
1093
|
+
}
|
|
1094
|
+
offsets[c + 1]++;
|
|
1095
|
+
}
|
|
1096
|
+
// Prefix sum: offsets[g] becomes the start index of group g; offsets[size] == n.
|
|
1097
|
+
for (npy_intp g = 0; g < size; g++) {
|
|
1098
|
+
offsets[g + 1] += offsets[g];
|
|
1099
|
+
}
|
|
1100
|
+
// Scatter pass: place each input position at its group's running cursor.
|
|
1101
|
+
// Ascending i preserves original order within each group (stability).
|
|
1102
|
+
if (size > 0) {
|
|
1103
|
+
cursor = PyMem_New(npy_intp, size);
|
|
1104
|
+
if (!cursor) {
|
|
1105
|
+
PyErr_NoMemory();
|
|
1106
|
+
goto fail;
|
|
1107
|
+
}
|
|
1108
|
+
memcpy(cursor, offsets, size * sizeof(npy_intp));
|
|
1109
|
+
for (npy_intp i = 0; i < n; i++) {
|
|
1110
|
+
perm[cursor[codes_buffer[i]]++] = i;
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
PyArray_CLEARFLAGS((PyArrayObject*)perm_arr, NPY_ARRAY_WRITEABLE);
|
|
1115
|
+
PyArray_CLEARFLAGS((PyArrayObject*)offsets_arr, NPY_ARRAY_WRITEABLE);
|
|
1116
|
+
|
|
1117
|
+
PyMem_Free(cursor);
|
|
1118
|
+
|
|
1119
|
+
PyObject *result = PyTuple_Pack(2, perm_arr, offsets_arr);
|
|
1120
|
+
Py_DECREF(perm_arr);
|
|
1121
|
+
Py_DECREF(offsets_arr);
|
|
1122
|
+
return result;
|
|
1123
|
+
|
|
1124
|
+
fail:
|
|
1125
|
+
PyMem_Free(cursor);
|
|
1126
|
+
Py_XDECREF(perm_arr);
|
|
1127
|
+
Py_XDECREF(offsets_arr);
|
|
1128
|
+
return NULL;
|
|
1129
|
+
}
|
|
1130
|
+
|
|
988
1131
|
PyObject *
|
|
989
1132
|
dtype_from_element(PyObject *Py_UNUSED(m), PyObject *arg)
|
|
990
1133
|
{
|
|
@@ -69,6 +69,9 @@ first_true_1d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs);
|
|
|
69
69
|
PyObject *
|
|
70
70
|
first_true_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs);
|
|
71
71
|
|
|
72
|
+
PyObject *
|
|
73
|
+
group_ordering(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs);
|
|
74
|
+
|
|
72
75
|
PyObject *
|
|
73
76
|
dtype_from_element(PyObject *Py_UNUSED(m), PyObject *arg);
|
|
74
77
|
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from arraykit import group_ordering
|
|
6
|
+
from arraykit import factorize
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def offsets_from_codes(codes, size):
|
|
10
|
+
# CSR-style offsets: [0, *cumsum(bincount(codes, minlength=size))]
|
|
11
|
+
counts = np.bincount(codes, minlength=size)
|
|
12
|
+
return np.concatenate([[0], np.cumsum(counts)]).astype(np.intp)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestUnit(unittest.TestCase):
|
|
16
|
+
# ------------------------------------------------------------------
|
|
17
|
+
# basic behavior
|
|
18
|
+
|
|
19
|
+
def test_group_ordering_basic_a(self) -> None:
|
|
20
|
+
codes = np.array([0, 0, 0, 1, 1, 2], dtype=np.intp)
|
|
21
|
+
perm, offsets = group_ordering(codes)
|
|
22
|
+
self.assertEqual(perm.tolist(), [0, 1, 2, 3, 4, 5])
|
|
23
|
+
self.assertEqual(offsets.tolist(), [0, 3, 5, 6])
|
|
24
|
+
|
|
25
|
+
def test_group_ordering_basic_b(self) -> None:
|
|
26
|
+
codes = np.array([2, 0, 0, 2, 1, 1, 0, 0, 3, 0], dtype=np.intp)
|
|
27
|
+
perm, offsets = group_ordering(codes)
|
|
28
|
+
self.assertEqual(perm.tolist(), [1, 2, 6, 7, 9, 4, 5, 0, 3, 8])
|
|
29
|
+
self.assertEqual(offsets.tolist(), [0, 5, 7, 9, 10])
|
|
30
|
+
|
|
31
|
+
def test_group_ordering_interleaved(self) -> None:
|
|
32
|
+
codes = np.array([2, 0, 1, 0, 2, 1], dtype=np.intp)
|
|
33
|
+
perm, offsets = group_ordering(codes)
|
|
34
|
+
# group 0 -> positions 1, 3; group 1 -> 2, 5; group 2 -> 0, 4
|
|
35
|
+
self.assertEqual(offsets.tolist(), [0, 2, 4, 6])
|
|
36
|
+
self.assertEqual(perm[offsets[0]:offsets[1]].tolist(), [1, 3])
|
|
37
|
+
self.assertEqual(perm[offsets[1]:offsets[2]].tolist(), [2, 5])
|
|
38
|
+
self.assertEqual(perm[offsets[2]:offsets[3]].tolist(), [0, 4])
|
|
39
|
+
|
|
40
|
+
def test_group_ordering_stability(self) -> None:
|
|
41
|
+
# original positions within each group must stay ascending
|
|
42
|
+
codes = np.array([0, 1, 0, 1, 0, 1], dtype=np.intp)
|
|
43
|
+
perm, offsets = group_ordering(codes)
|
|
44
|
+
self.assertEqual(perm[offsets[0]:offsets[1]].tolist(), [0, 2, 4])
|
|
45
|
+
self.assertEqual(perm[offsets[1]:offsets[2]].tolist(), [1, 3, 5])
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ------------------------------------------------------------------
|
|
49
|
+
# parity against numpy oracle
|
|
50
|
+
|
|
51
|
+
def test_group_ordering_parity_argsort(self) -> None:
|
|
52
|
+
rng = np.random.default_rng(0)
|
|
53
|
+
for size in (1, 5, 50, 500):
|
|
54
|
+
codes = rng.integers(0, size, size=10_000).astype(np.intp)
|
|
55
|
+
perm, offsets = group_ordering(codes)
|
|
56
|
+
expected = np.argsort(codes, kind='stable').astype(np.intp)
|
|
57
|
+
self.assertEqual(perm.tolist(), expected.tolist())
|
|
58
|
+
self.assertEqual(
|
|
59
|
+
offsets.tolist(), offsets_from_codes(codes, size).tolist()
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def test_group_ordering_parity_inferred_size(self) -> None:
|
|
63
|
+
rng = np.random.default_rng(1)
|
|
64
|
+
codes = rng.integers(0, 100, size=5_000).astype(np.intp)
|
|
65
|
+
perm, offsets = group_ordering(codes)
|
|
66
|
+
size = int(codes.max()) + 1
|
|
67
|
+
self.assertEqual(len(offsets), size + 1)
|
|
68
|
+
expected = np.argsort(codes, kind='stable').astype(np.intp)
|
|
69
|
+
self.assertEqual(perm.tolist(), expected.tolist())
|
|
70
|
+
|
|
71
|
+
# ------------------------------------------------------------------
|
|
72
|
+
# dtype / shape
|
|
73
|
+
|
|
74
|
+
def test_group_ordering_dtypes(self) -> None:
|
|
75
|
+
codes = np.array([0, 1, 0, 2], dtype=np.intp)
|
|
76
|
+
perm, offsets = group_ordering(codes)
|
|
77
|
+
self.assertEqual(perm.dtype, np.dtype(np.intp))
|
|
78
|
+
self.assertEqual(offsets.dtype, np.dtype(np.intp))
|
|
79
|
+
|
|
80
|
+
def test_group_ordering_offsets_length(self) -> None:
|
|
81
|
+
codes = np.array([0, 1, 2, 3], dtype=np.intp)
|
|
82
|
+
_, offsets = group_ordering(codes, size=10)
|
|
83
|
+
self.assertEqual(len(offsets), 11)
|
|
84
|
+
self.assertEqual(offsets[-1], 4)
|
|
85
|
+
|
|
86
|
+
# ------------------------------------------------------------------
|
|
87
|
+
# size keyword
|
|
88
|
+
|
|
89
|
+
def test_group_ordering_size_explicit(self) -> None:
|
|
90
|
+
codes = np.array([0, 0, 1, 1], dtype=np.intp)
|
|
91
|
+
perm, offsets = group_ordering(codes, size=2)
|
|
92
|
+
self.assertEqual(perm.tolist(), [0, 1, 2, 3])
|
|
93
|
+
self.assertEqual(offsets.tolist(), [0, 2, 4])
|
|
94
|
+
|
|
95
|
+
def test_group_ordering_size_trailing_empty(self) -> None:
|
|
96
|
+
codes = np.array([0, 0, 1], dtype=np.intp)
|
|
97
|
+
_, offsets = group_ordering(codes, size=4)
|
|
98
|
+
# groups 2 and 3 are empty: offsets[g] == offsets[g+1]
|
|
99
|
+
self.assertEqual(offsets.tolist(), [0, 2, 3, 3, 3])
|
|
100
|
+
|
|
101
|
+
def test_group_ordering_size_none(self) -> None:
|
|
102
|
+
codes = np.array([0, 1, 1], dtype=np.intp)
|
|
103
|
+
perm, offsets = group_ordering(codes, size=None)
|
|
104
|
+
self.assertEqual(offsets.tolist(), [0, 1, 3])
|
|
105
|
+
|
|
106
|
+
def test_group_ordering_size_is_keyword_only(self) -> None:
|
|
107
|
+
codes = np.array([0, 1], dtype=np.intp)
|
|
108
|
+
with self.assertRaises(TypeError):
|
|
109
|
+
group_ordering(codes, 2)
|
|
110
|
+
|
|
111
|
+
# ------------------------------------------------------------------
|
|
112
|
+
# edge cases
|
|
113
|
+
|
|
114
|
+
def test_group_ordering_empty(self) -> None:
|
|
115
|
+
codes = np.array([], dtype=np.intp)
|
|
116
|
+
perm, offsets = group_ordering(codes)
|
|
117
|
+
self.assertEqual(perm.tolist(), [])
|
|
118
|
+
self.assertEqual(offsets.tolist(), [0])
|
|
119
|
+
|
|
120
|
+
def test_group_ordering_empty_with_size(self) -> None:
|
|
121
|
+
codes = np.array([], dtype=np.intp)
|
|
122
|
+
perm, offsets = group_ordering(codes, size=3)
|
|
123
|
+
self.assertEqual(perm.tolist(), [])
|
|
124
|
+
self.assertEqual(offsets.tolist(), [0, 0, 0, 0])
|
|
125
|
+
|
|
126
|
+
def test_group_ordering_single(self) -> None:
|
|
127
|
+
codes = np.array([0], dtype=np.intp)
|
|
128
|
+
perm, offsets = group_ordering(codes)
|
|
129
|
+
self.assertEqual(perm.tolist(), [0])
|
|
130
|
+
self.assertEqual(offsets.tolist(), [0, 1])
|
|
131
|
+
|
|
132
|
+
def test_group_ordering_single_group(self) -> None:
|
|
133
|
+
codes = np.array([0, 0, 0], dtype=np.intp)
|
|
134
|
+
perm, offsets = group_ordering(codes)
|
|
135
|
+
self.assertEqual(perm.tolist(), [0, 1, 2])
|
|
136
|
+
self.assertEqual(offsets.tolist(), [0, 3])
|
|
137
|
+
|
|
138
|
+
def test_group_ordering_all_distinct(self) -> None:
|
|
139
|
+
codes = np.array([3, 2, 1, 0], dtype=np.intp)
|
|
140
|
+
perm, offsets = group_ordering(codes)
|
|
141
|
+
self.assertEqual(perm.tolist(), [3, 2, 1, 0])
|
|
142
|
+
self.assertEqual(offsets.tolist(), [0, 1, 2, 3, 4])
|
|
143
|
+
|
|
144
|
+
# ------------------------------------------------------------------
|
|
145
|
+
# validation
|
|
146
|
+
|
|
147
|
+
def test_group_ordering_not_array(self) -> None:
|
|
148
|
+
with self.assertRaises(TypeError):
|
|
149
|
+
group_ordering([0, 1, 2])
|
|
150
|
+
|
|
151
|
+
def test_group_ordering_2d(self) -> None:
|
|
152
|
+
codes = np.array([[0, 1], [1, 0]], dtype=np.intp)
|
|
153
|
+
with self.assertRaises(ValueError):
|
|
154
|
+
group_ordering(codes)
|
|
155
|
+
|
|
156
|
+
def test_group_ordering_wrong_dtype(self) -> None:
|
|
157
|
+
# pick an integer width that differs from intp on this platform
|
|
158
|
+
# (intp is 32-bit on some Windows builds, 64-bit elsewhere)
|
|
159
|
+
wrong = np.int32 if np.dtype(np.intp).itemsize != 4 else np.int64
|
|
160
|
+
codes = np.array([0, 1, 2], dtype=wrong)
|
|
161
|
+
with self.assertRaises(ValueError):
|
|
162
|
+
group_ordering(codes)
|
|
163
|
+
|
|
164
|
+
def test_group_ordering_wrong_dtype_float(self) -> None:
|
|
165
|
+
codes = np.array([0.0, 1.0, 2.0], dtype=np.float64)
|
|
166
|
+
with self.assertRaises(ValueError):
|
|
167
|
+
group_ordering(codes)
|
|
168
|
+
|
|
169
|
+
def test_group_ordering_non_contiguous(self) -> None:
|
|
170
|
+
codes = np.arange(10, dtype=np.intp)[::2]
|
|
171
|
+
self.assertFalse(codes.flags['C_CONTIGUOUS'])
|
|
172
|
+
with self.assertRaises(ValueError):
|
|
173
|
+
group_ordering(codes)
|
|
174
|
+
|
|
175
|
+
def test_group_ordering_negative_code_inferred(self) -> None:
|
|
176
|
+
codes = np.array([0, -1, 1], dtype=np.intp)
|
|
177
|
+
with self.assertRaises(ValueError):
|
|
178
|
+
group_ordering(codes)
|
|
179
|
+
|
|
180
|
+
def test_group_ordering_out_of_range(self) -> None:
|
|
181
|
+
codes = np.array([0, 1, 5], dtype=np.intp)
|
|
182
|
+
with self.assertRaises(ValueError):
|
|
183
|
+
group_ordering(codes, size=3)
|
|
184
|
+
|
|
185
|
+
def test_group_ordering_negative_size(self) -> None:
|
|
186
|
+
codes = np.array([0, 1], dtype=np.intp)
|
|
187
|
+
with self.assertRaises(ValueError):
|
|
188
|
+
group_ordering(codes, size=-1)
|
|
189
|
+
|
|
190
|
+
def test_group_ordering_size_out_of_range_zero(self) -> None:
|
|
191
|
+
codes = np.array([0, 1], dtype=np.intp)
|
|
192
|
+
with self.assertRaises(ValueError):
|
|
193
|
+
group_ordering(codes, size=0)
|
|
194
|
+
|
|
195
|
+
def test_group_ordering_infer_overflow(self) -> None:
|
|
196
|
+
# a code at the intp max would overflow when inferring size = c + 1
|
|
197
|
+
codes = np.array([np.iinfo(np.intp).max], dtype=np.intp)
|
|
198
|
+
with self.assertRaises(OverflowError):
|
|
199
|
+
group_ordering(codes)
|
|
200
|
+
|
|
201
|
+
def test_group_ordering_size_overflow(self) -> None:
|
|
202
|
+
# an explicit size at the intp max would overflow computing size + 1
|
|
203
|
+
codes = np.array([0, 1], dtype=np.intp)
|
|
204
|
+
with self.assertRaises(OverflowError):
|
|
205
|
+
group_ordering(codes, size=np.iinfo(np.intp).max)
|
|
206
|
+
|
|
207
|
+
# ------------------------------------------------------------------
|
|
208
|
+
# immutability
|
|
209
|
+
|
|
210
|
+
def test_group_ordering_outputs_immutable(self) -> None:
|
|
211
|
+
codes = np.array([0, 1, 0], dtype=np.intp)
|
|
212
|
+
perm, offsets = group_ordering(codes)
|
|
213
|
+
self.assertFalse(perm.flags.writeable)
|
|
214
|
+
self.assertFalse(offsets.flags.writeable)
|
|
215
|
+
|
|
216
|
+
# ------------------------------------------------------------------
|
|
217
|
+
# round-trip with factorize
|
|
218
|
+
|
|
219
|
+
def test_group_ordering_with_factorize(self) -> None:
|
|
220
|
+
a = np.array(['b', 'a', 'b', 'c', 'a', 'a'])
|
|
221
|
+
uniques, codes = factorize(a)
|
|
222
|
+
perm, offsets = group_ordering(codes, size=len(uniques))
|
|
223
|
+
ordered = a[perm]
|
|
224
|
+
# each group's slice of the reordered array is constant
|
|
225
|
+
for g in range(len(uniques)):
|
|
226
|
+
segment = ordered[offsets[g]:offsets[g + 1]]
|
|
227
|
+
self.assertTrue((segment == segment[0]).all())
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
if __name__ == '__main__':
|
|
231
|
+
unittest.main()
|
arraykit-1.6.0/VERSION
DELETED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|