arraykit 1.6.0__tar.gz → 1.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {arraykit-1.6.0/arraykit.egg-info → arraykit-1.7.0}/PKG-INFO +9 -3
  2. {arraykit-1.6.0 → arraykit-1.7.0}/README.rst +8 -2
  3. arraykit-1.7.0/VERSION +2 -0
  4. {arraykit-1.6.0 → arraykit-1.7.0/arraykit.egg-info}/PKG-INFO +9 -3
  5. {arraykit-1.6.0 → arraykit-1.7.0}/arraykit.egg-info/SOURCES.txt +1 -0
  6. {arraykit-1.6.0 → arraykit-1.7.0}/src/__init__.py +1 -0
  7. {arraykit-1.6.0 → arraykit-1.7.0}/src/__init__.pyi +3 -0
  8. {arraykit-1.6.0 → arraykit-1.7.0}/src/_arraykit.c +4 -0
  9. {arraykit-1.6.0 → arraykit-1.7.0}/src/methods.c +143 -0
  10. {arraykit-1.6.0 → arraykit-1.7.0}/src/methods.h +3 -0
  11. arraykit-1.7.0/test/test_group_ordering.py +231 -0
  12. arraykit-1.6.0/VERSION +0 -2
  13. {arraykit-1.6.0 → arraykit-1.7.0}/LICENSE.txt +0 -0
  14. {arraykit-1.6.0 → arraykit-1.7.0}/MANIFEST.in +0 -0
  15. {arraykit-1.6.0 → arraykit-1.7.0}/arraykit.egg-info/dependency_links.txt +0 -0
  16. {arraykit-1.6.0 → arraykit-1.7.0}/arraykit.egg-info/requires.txt +0 -0
  17. {arraykit-1.6.0 → arraykit-1.7.0}/arraykit.egg-info/top_level.txt +0 -0
  18. {arraykit-1.6.0 → arraykit-1.7.0}/pyproject.toml +0 -0
  19. {arraykit-1.6.0 → arraykit-1.7.0}/setup.cfg +0 -0
  20. {arraykit-1.6.0 → arraykit-1.7.0}/setup.py +0 -0
  21. {arraykit-1.6.0 → arraykit-1.7.0}/src/array_go.c +0 -0
  22. {arraykit-1.6.0 → arraykit-1.7.0}/src/array_go.h +0 -0
  23. {arraykit-1.6.0 → arraykit-1.7.0}/src/array_to_tuple.c +0 -0
  24. {arraykit-1.6.0 → arraykit-1.7.0}/src/array_to_tuple.h +0 -0
  25. {arraykit-1.6.0 → arraykit-1.7.0}/src/auto_map.c +0 -0
  26. {arraykit-1.6.0 → arraykit-1.7.0}/src/auto_map.h +0 -0
  27. {arraykit-1.6.0 → arraykit-1.7.0}/src/block_index.c +0 -0
  28. {arraykit-1.6.0 → arraykit-1.7.0}/src/block_index.h +0 -0
  29. {arraykit-1.6.0 → arraykit-1.7.0}/src/delimited_to_arrays.c +0 -0
  30. {arraykit-1.6.0 → arraykit-1.7.0}/src/delimited_to_arrays.h +0 -0
  31. {arraykit-1.6.0 → arraykit-1.7.0}/src/py.typed +0 -0
  32. {arraykit-1.6.0 → arraykit-1.7.0}/src/tri_map.c +0 -0
  33. {arraykit-1.6.0 → arraykit-1.7.0}/src/tri_map.h +0 -0
  34. {arraykit-1.6.0 → arraykit-1.7.0}/src/utilities.h +0 -0
  35. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_array_go.py +0 -0
  36. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_astype_array.py +0 -0
  37. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_auto_map.py +0 -0
  38. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_auto_map_property.py +0 -0
  39. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_block_index.py +0 -0
  40. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_delimited_to_arrays.py +0 -0
  41. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_delimited_to_arrays_integration.py +0 -0
  42. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_delimited_to_arrays_property.py +0 -0
  43. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_factorize.py +0 -0
  44. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_nonzero_1d.py +0 -0
  45. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_nonzero_1d_property.py +0 -0
  46. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_objectable.py +0 -0
  47. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_pyi.py +0 -0
  48. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_split_after_count.py +0 -0
  49. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_transition_slices_from_group.py +0 -0
  50. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_tri_map.py +0 -0
  51. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_type_discovery.py +0 -0
  52. {arraykit-1.6.0 → arraykit-1.7.0}/test/test_util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arraykit
3
- Version: 1.6.0
3
+ Version: 1.7.0
4
4
  Summary: Array utilities for StaticFrame
5
5
  Author: Christopher Ariza, Brandt Bucher, Charles Burkland
6
6
  License: MIT
@@ -64,14 +64,20 @@ ArrayKit requires the following:
64
64
  What is New in ArrayKit
65
65
  -------------------------
66
66
 
67
+ 1.7.0
68
+ ............
69
+
70
+ Added ``group_ordering()``.
71
+
72
+
67
73
  1.6.0
68
74
  ............
69
- Added ``factorize``.
75
+ Added ``factorize()``.
70
76
 
71
77
 
72
78
  1.5.0
73
79
  ............
74
- Added ``transition_slices_from_group``.
80
+ Added ``transition_slices_from_group()``.
75
81
 
76
82
 
77
83
  1.4.0
@@ -35,14 +35,20 @@ ArrayKit requires the following:
35
35
  What is New in ArrayKit
36
36
  -------------------------
37
37
 
38
+ 1.7.0
39
+ ............
40
+
41
+ Added ``group_ordering()``.
42
+
43
+
38
44
  1.6.0
39
45
  ............
40
- Added ``factorize``.
46
+ Added ``factorize()``.
41
47
 
42
48
 
43
49
  1.5.0
44
50
  ............
45
- Added ``transition_slices_from_group``.
51
+ Added ``transition_slices_from_group()``.
46
52
 
47
53
 
48
54
  1.4.0
arraykit-1.7.0/VERSION ADDED
@@ -0,0 +1,2 @@
1
+ 1.7.0
2
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arraykit
3
- Version: 1.6.0
3
+ Version: 1.7.0
4
4
  Summary: Array utilities for StaticFrame
5
5
  Author: Christopher Ariza, Brandt Bucher, Charles Burkland
6
6
  License: MIT
@@ -64,14 +64,20 @@ ArrayKit requires the following:
64
64
  What is New in ArrayKit
65
65
  -------------------------
66
66
 
67
+ 1.7.0
68
+ ............
69
+
70
+ Added ``group_ordering()``.
71
+
72
+
67
73
  1.6.0
68
74
  ............
69
- Added ``factorize``.
75
+ Added ``factorize()``.
70
76
 
71
77
 
72
78
  1.5.0
73
79
  ............
74
- Added ``transition_slices_from_group``.
80
+ Added ``transition_slices_from_group()``.
75
81
 
76
82
 
77
83
  1.4.0
@@ -37,6 +37,7 @@ test/test_delimited_to_arrays.py
37
37
  test/test_delimited_to_arrays_integration.py
38
38
  test/test_delimited_to_arrays_property.py
39
39
  test/test_factorize.py
40
+ test/test_group_ordering.py
40
41
  test/test_nonzero_1d.py
41
42
  test/test_nonzero_1d_property.py
42
43
  test/test_objectable.py
@@ -26,6 +26,7 @@ from ._arraykit import split_after_count as split_after_count
26
26
  from ._arraykit import get_new_indexers_and_screen as get_new_indexers_and_screen
27
27
  from ._arraykit import write_array_to_file as write_array_to_file
28
28
  from ._arraykit import factorize as factorize
29
+ from ._arraykit import group_ordering as group_ordering
29
30
  from ._arraykit import count_iteration as count_iteration
30
31
  from ._arraykit import first_true_1d as first_true_1d
31
32
  from ._arraykit import first_true_2d as first_true_2d
@@ -227,6 +227,9 @@ def write_array_to_file(
227
227
  def factorize(
228
228
  array: np.ndarray, *, sort: bool = ...
229
229
  ) -> tp.Tuple[np.ndarray, np.ndarray]: ...
230
+ def group_ordering(
231
+ codes: np.ndarray, *, size: tp.Optional[int] = ...
232
+ ) -> tp.Tuple[np.ndarray, np.ndarray]: ...
230
233
  def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ...
231
234
  def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ...
232
235
  def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ...
@@ -74,6 +74,10 @@ static PyMethodDef arraykit_methods[] = {
74
74
  (PyCFunction)factorize,
75
75
  METH_VARARGS | METH_KEYWORDS,
76
76
  NULL},
77
+ {"group_ordering",
78
+ (PyCFunction)group_ordering,
79
+ METH_VARARGS | METH_KEYWORDS,
80
+ NULL},
77
81
  {NULL},
78
82
  };
79
83
 
@@ -985,6 +985,149 @@ first_true_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
985
985
  return (PyObject *)array_pos;
986
986
  }
987
987
 
988
+ static char *group_ordering_kwarg_names[] = {
989
+ "codes",
990
+ "size",
991
+ NULL
992
+ };
993
+
994
+ // Stable counting sort of dense factorize codes. Given `codes` in [0, size),
995
+ // return (permutation, offsets) such that permutation[offsets[g]:offsets[g+1]]
996
+ // are the input positions of group g, in ascending (stable) order. This is an
997
+ // O(n) alternative to np.argsort(codes, kind='stable') for already-dense codes.
998
+ PyObject *
999
+ group_ordering(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
1000
+ {
1001
+ PyArrayObject *codes = NULL;
1002
+ PyObject *size_obj = NULL;
1003
+
1004
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1005
+ "O!|$O:group_ordering",
1006
+ group_ordering_kwarg_names,
1007
+ &PyArray_Type, &codes,
1008
+ &size_obj
1009
+ )) {
1010
+ return NULL;
1011
+ }
1012
+ if (PyArray_NDIM(codes) != 1) {
1013
+ PyErr_SetString(PyExc_ValueError, "Array must be 1-dimensional");
1014
+ return NULL;
1015
+ }
1016
+ if (PyArray_TYPE(codes) != NPY_INTP) {
1017
+ PyErr_SetString(PyExc_ValueError, "Array must be of type intp");
1018
+ return NULL;
1019
+ }
1020
+ if (!PyArray_IS_C_CONTIGUOUS(codes)) {
1021
+ PyErr_SetString(PyExc_ValueError, "Array must be contiguous");
1022
+ return NULL;
1023
+ }
1024
+
1025
+ npy_intp n = PyArray_SIZE(codes);
1026
+ npy_intp *codes_buffer = (npy_intp*)PyArray_DATA(codes);
1027
+
1028
+ // Determine the number of groups: caller-provided, else max(codes) + 1.
1029
+ npy_intp size = 0;
1030
+ int size_given = (size_obj != NULL && size_obj != Py_None);
1031
+ if (size_given) {
1032
+ size = (npy_intp)PyNumber_AsSsize_t(size_obj, PyExc_OverflowError);
1033
+ if (size == -1 && PyErr_Occurred()) {
1034
+ return NULL;
1035
+ }
1036
+ if (size < 0) {
1037
+ PyErr_SetString(PyExc_ValueError, "size must be non-negative");
1038
+ return NULL;
1039
+ }
1040
+ }
1041
+ else {
1042
+ for (npy_intp i = 0; i < n; i++) {
1043
+ npy_intp c = codes_buffer[i];
1044
+ if (c < 0) {
1045
+ PyErr_SetString(PyExc_ValueError, "codes must be non-negative");
1046
+ return NULL;
1047
+ }
1048
+ // guard c + 1 against signed overflow (undefined behavior)
1049
+ if (c == NPY_MAX_INTP) {
1050
+ PyErr_SetString(PyExc_OverflowError,
1051
+ "cannot infer size: code value too large");
1052
+ return NULL;
1053
+ }
1054
+ if (c + 1 > size) {
1055
+ size = c + 1;
1056
+ }
1057
+ }
1058
+ }
1059
+
1060
+ // offsets has length size + 1; guard that against signed overflow (covers
1061
+ // both a caller-provided size and an inferred one)
1062
+ if (size == NPY_MAX_INTP) {
1063
+ PyErr_SetString(PyExc_OverflowError, "size too large");
1064
+ return NULL;
1065
+ }
1066
+
1067
+ PyObject *perm_arr = NULL;
1068
+ PyObject *offsets_arr = NULL;
1069
+ npy_intp *cursor = NULL;
1070
+
1071
+ perm_arr = PyArray_EMPTY(1, &n, NPY_INTP, 0);
1072
+ if (!perm_arr) {
1073
+ goto fail;
1074
+ }
1075
+ npy_intp size_plus = size + 1;
1076
+ offsets_arr = PyArray_ZEROS(1, &size_plus, NPY_INTP, 0);
1077
+ if (!offsets_arr) {
1078
+ goto fail;
1079
+ }
1080
+ npy_intp *perm = (npy_intp*)PyArray_DATA((PyArrayObject*)perm_arr);
1081
+ npy_intp *offsets = (npy_intp*)PyArray_DATA((PyArrayObject*)offsets_arr);
1082
+
1083
+ // Count pass: tally each group into offsets[c + 1]. When size was inferred
1084
+ // the codes are already known to be in [0, size); only a caller-provided
1085
+ // size needs the range validated here.
1086
+ for (npy_intp i = 0; i < n; i++) {
1087
+ npy_intp c = codes_buffer[i];
1088
+ if (size_given && (c < 0 || c >= size)) {
1089
+ PyErr_Format(PyExc_ValueError,
1090
+ "code %zd out of range [0, %zd)",
1091
+ (Py_ssize_t)c, (Py_ssize_t)size);
1092
+ goto fail;
1093
+ }
1094
+ offsets[c + 1]++;
1095
+ }
1096
+ // Prefix sum: offsets[g] becomes the start index of group g; offsets[size] == n.
1097
+ for (npy_intp g = 0; g < size; g++) {
1098
+ offsets[g + 1] += offsets[g];
1099
+ }
1100
+ // Scatter pass: place each input position at its group's running cursor.
1101
+ // Ascending i preserves original order within each group (stability).
1102
+ if (size > 0) {
1103
+ cursor = PyMem_New(npy_intp, size);
1104
+ if (!cursor) {
1105
+ PyErr_NoMemory();
1106
+ goto fail;
1107
+ }
1108
+ memcpy(cursor, offsets, size * sizeof(npy_intp));
1109
+ for (npy_intp i = 0; i < n; i++) {
1110
+ perm[cursor[codes_buffer[i]]++] = i;
1111
+ }
1112
+ }
1113
+
1114
+ PyArray_CLEARFLAGS((PyArrayObject*)perm_arr, NPY_ARRAY_WRITEABLE);
1115
+ PyArray_CLEARFLAGS((PyArrayObject*)offsets_arr, NPY_ARRAY_WRITEABLE);
1116
+
1117
+ PyMem_Free(cursor);
1118
+
1119
+ PyObject *result = PyTuple_Pack(2, perm_arr, offsets_arr);
1120
+ Py_DECREF(perm_arr);
1121
+ Py_DECREF(offsets_arr);
1122
+ return result;
1123
+
1124
+ fail:
1125
+ PyMem_Free(cursor);
1126
+ Py_XDECREF(perm_arr);
1127
+ Py_XDECREF(offsets_arr);
1128
+ return NULL;
1129
+ }
1130
+
988
1131
  PyObject *
989
1132
  dtype_from_element(PyObject *Py_UNUSED(m), PyObject *arg)
990
1133
  {
@@ -69,6 +69,9 @@ first_true_1d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs);
69
69
  PyObject *
70
70
  first_true_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs);
71
71
 
72
+ PyObject *
73
+ group_ordering(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs);
74
+
72
75
  PyObject *
73
76
  dtype_from_element(PyObject *Py_UNUSED(m), PyObject *arg);
74
77
 
@@ -0,0 +1,231 @@
1
+ import unittest
2
+
3
+ import numpy as np
4
+
5
+ from arraykit import group_ordering
6
+ from arraykit import factorize
7
+
8
+
9
+ def offsets_from_codes(codes, size):
10
+ # CSR-style offsets: [0, *cumsum(bincount(codes, minlength=size))]
11
+ counts = np.bincount(codes, minlength=size)
12
+ return np.concatenate([[0], np.cumsum(counts)]).astype(np.intp)
13
+
14
+
15
+ class TestUnit(unittest.TestCase):
16
+ # ------------------------------------------------------------------
17
+ # basic behavior
18
+
19
+ def test_group_ordering_basic_a(self) -> None:
20
+ codes = np.array([0, 0, 0, 1, 1, 2], dtype=np.intp)
21
+ perm, offsets = group_ordering(codes)
22
+ self.assertEqual(perm.tolist(), [0, 1, 2, 3, 4, 5])
23
+ self.assertEqual(offsets.tolist(), [0, 3, 5, 6])
24
+
25
+ def test_group_ordering_basic_b(self) -> None:
26
+ codes = np.array([2, 0, 0, 2, 1, 1, 0, 0, 3, 0], dtype=np.intp)
27
+ perm, offsets = group_ordering(codes)
28
+ self.assertEqual(perm.tolist(), [1, 2, 6, 7, 9, 4, 5, 0, 3, 8])
29
+ self.assertEqual(offsets.tolist(), [0, 5, 7, 9, 10])
30
+
31
+ def test_group_ordering_interleaved(self) -> None:
32
+ codes = np.array([2, 0, 1, 0, 2, 1], dtype=np.intp)
33
+ perm, offsets = group_ordering(codes)
34
+ # group 0 -> positions 1, 3; group 1 -> 2, 5; group 2 -> 0, 4
35
+ self.assertEqual(offsets.tolist(), [0, 2, 4, 6])
36
+ self.assertEqual(perm[offsets[0]:offsets[1]].tolist(), [1, 3])
37
+ self.assertEqual(perm[offsets[1]:offsets[2]].tolist(), [2, 5])
38
+ self.assertEqual(perm[offsets[2]:offsets[3]].tolist(), [0, 4])
39
+
40
+ def test_group_ordering_stability(self) -> None:
41
+ # original positions within each group must stay ascending
42
+ codes = np.array([0, 1, 0, 1, 0, 1], dtype=np.intp)
43
+ perm, offsets = group_ordering(codes)
44
+ self.assertEqual(perm[offsets[0]:offsets[1]].tolist(), [0, 2, 4])
45
+ self.assertEqual(perm[offsets[1]:offsets[2]].tolist(), [1, 3, 5])
46
+
47
+
48
+ # ------------------------------------------------------------------
49
+ # parity against numpy oracle
50
+
51
+ def test_group_ordering_parity_argsort(self) -> None:
52
+ rng = np.random.default_rng(0)
53
+ for size in (1, 5, 50, 500):
54
+ codes = rng.integers(0, size, size=10_000).astype(np.intp)
55
+ perm, offsets = group_ordering(codes)
56
+ expected = np.argsort(codes, kind='stable').astype(np.intp)
57
+ self.assertEqual(perm.tolist(), expected.tolist())
58
+ self.assertEqual(
59
+ offsets.tolist(), offsets_from_codes(codes, size).tolist()
60
+ )
61
+
62
+ def test_group_ordering_parity_inferred_size(self) -> None:
63
+ rng = np.random.default_rng(1)
64
+ codes = rng.integers(0, 100, size=5_000).astype(np.intp)
65
+ perm, offsets = group_ordering(codes)
66
+ size = int(codes.max()) + 1
67
+ self.assertEqual(len(offsets), size + 1)
68
+ expected = np.argsort(codes, kind='stable').astype(np.intp)
69
+ self.assertEqual(perm.tolist(), expected.tolist())
70
+
71
+ # ------------------------------------------------------------------
72
+ # dtype / shape
73
+
74
+ def test_group_ordering_dtypes(self) -> None:
75
+ codes = np.array([0, 1, 0, 2], dtype=np.intp)
76
+ perm, offsets = group_ordering(codes)
77
+ self.assertEqual(perm.dtype, np.dtype(np.intp))
78
+ self.assertEqual(offsets.dtype, np.dtype(np.intp))
79
+
80
+ def test_group_ordering_offsets_length(self) -> None:
81
+ codes = np.array([0, 1, 2, 3], dtype=np.intp)
82
+ _, offsets = group_ordering(codes, size=10)
83
+ self.assertEqual(len(offsets), 11)
84
+ self.assertEqual(offsets[-1], 4)
85
+
86
+ # ------------------------------------------------------------------
87
+ # size keyword
88
+
89
+ def test_group_ordering_size_explicit(self) -> None:
90
+ codes = np.array([0, 0, 1, 1], dtype=np.intp)
91
+ perm, offsets = group_ordering(codes, size=2)
92
+ self.assertEqual(perm.tolist(), [0, 1, 2, 3])
93
+ self.assertEqual(offsets.tolist(), [0, 2, 4])
94
+
95
+ def test_group_ordering_size_trailing_empty(self) -> None:
96
+ codes = np.array([0, 0, 1], dtype=np.intp)
97
+ _, offsets = group_ordering(codes, size=4)
98
+ # groups 2 and 3 are empty: offsets[g] == offsets[g+1]
99
+ self.assertEqual(offsets.tolist(), [0, 2, 3, 3, 3])
100
+
101
+ def test_group_ordering_size_none(self) -> None:
102
+ codes = np.array([0, 1, 1], dtype=np.intp)
103
+ perm, offsets = group_ordering(codes, size=None)
104
+ self.assertEqual(offsets.tolist(), [0, 1, 3])
105
+
106
+ def test_group_ordering_size_is_keyword_only(self) -> None:
107
+ codes = np.array([0, 1], dtype=np.intp)
108
+ with self.assertRaises(TypeError):
109
+ group_ordering(codes, 2)
110
+
111
+ # ------------------------------------------------------------------
112
+ # edge cases
113
+
114
+ def test_group_ordering_empty(self) -> None:
115
+ codes = np.array([], dtype=np.intp)
116
+ perm, offsets = group_ordering(codes)
117
+ self.assertEqual(perm.tolist(), [])
118
+ self.assertEqual(offsets.tolist(), [0])
119
+
120
+ def test_group_ordering_empty_with_size(self) -> None:
121
+ codes = np.array([], dtype=np.intp)
122
+ perm, offsets = group_ordering(codes, size=3)
123
+ self.assertEqual(perm.tolist(), [])
124
+ self.assertEqual(offsets.tolist(), [0, 0, 0, 0])
125
+
126
+ def test_group_ordering_single(self) -> None:
127
+ codes = np.array([0], dtype=np.intp)
128
+ perm, offsets = group_ordering(codes)
129
+ self.assertEqual(perm.tolist(), [0])
130
+ self.assertEqual(offsets.tolist(), [0, 1])
131
+
132
+ def test_group_ordering_single_group(self) -> None:
133
+ codes = np.array([0, 0, 0], dtype=np.intp)
134
+ perm, offsets = group_ordering(codes)
135
+ self.assertEqual(perm.tolist(), [0, 1, 2])
136
+ self.assertEqual(offsets.tolist(), [0, 3])
137
+
138
+ def test_group_ordering_all_distinct(self) -> None:
139
+ codes = np.array([3, 2, 1, 0], dtype=np.intp)
140
+ perm, offsets = group_ordering(codes)
141
+ self.assertEqual(perm.tolist(), [3, 2, 1, 0])
142
+ self.assertEqual(offsets.tolist(), [0, 1, 2, 3, 4])
143
+
144
+ # ------------------------------------------------------------------
145
+ # validation
146
+
147
+ def test_group_ordering_not_array(self) -> None:
148
+ with self.assertRaises(TypeError):
149
+ group_ordering([0, 1, 2])
150
+
151
+ def test_group_ordering_2d(self) -> None:
152
+ codes = np.array([[0, 1], [1, 0]], dtype=np.intp)
153
+ with self.assertRaises(ValueError):
154
+ group_ordering(codes)
155
+
156
+ def test_group_ordering_wrong_dtype(self) -> None:
157
+ # pick an integer width that differs from intp on this platform
158
+ # (intp is 32-bit on some Windows builds, 64-bit elsewhere)
159
+ wrong = np.int32 if np.dtype(np.intp).itemsize != 4 else np.int64
160
+ codes = np.array([0, 1, 2], dtype=wrong)
161
+ with self.assertRaises(ValueError):
162
+ group_ordering(codes)
163
+
164
+ def test_group_ordering_wrong_dtype_float(self) -> None:
165
+ codes = np.array([0.0, 1.0, 2.0], dtype=np.float64)
166
+ with self.assertRaises(ValueError):
167
+ group_ordering(codes)
168
+
169
+ def test_group_ordering_non_contiguous(self) -> None:
170
+ codes = np.arange(10, dtype=np.intp)[::2]
171
+ self.assertFalse(codes.flags['C_CONTIGUOUS'])
172
+ with self.assertRaises(ValueError):
173
+ group_ordering(codes)
174
+
175
+ def test_group_ordering_negative_code_inferred(self) -> None:
176
+ codes = np.array([0, -1, 1], dtype=np.intp)
177
+ with self.assertRaises(ValueError):
178
+ group_ordering(codes)
179
+
180
+ def test_group_ordering_out_of_range(self) -> None:
181
+ codes = np.array([0, 1, 5], dtype=np.intp)
182
+ with self.assertRaises(ValueError):
183
+ group_ordering(codes, size=3)
184
+
185
+ def test_group_ordering_negative_size(self) -> None:
186
+ codes = np.array([0, 1], dtype=np.intp)
187
+ with self.assertRaises(ValueError):
188
+ group_ordering(codes, size=-1)
189
+
190
+ def test_group_ordering_size_out_of_range_zero(self) -> None:
191
+ codes = np.array([0, 1], dtype=np.intp)
192
+ with self.assertRaises(ValueError):
193
+ group_ordering(codes, size=0)
194
+
195
+ def test_group_ordering_infer_overflow(self) -> None:
196
+ # a code at the intp max would overflow when inferring size = c + 1
197
+ codes = np.array([np.iinfo(np.intp).max], dtype=np.intp)
198
+ with self.assertRaises(OverflowError):
199
+ group_ordering(codes)
200
+
201
+ def test_group_ordering_size_overflow(self) -> None:
202
+ # an explicit size at the intp max would overflow computing size + 1
203
+ codes = np.array([0, 1], dtype=np.intp)
204
+ with self.assertRaises(OverflowError):
205
+ group_ordering(codes, size=np.iinfo(np.intp).max)
206
+
207
+ # ------------------------------------------------------------------
208
+ # immutability
209
+
210
+ def test_group_ordering_outputs_immutable(self) -> None:
211
+ codes = np.array([0, 1, 0], dtype=np.intp)
212
+ perm, offsets = group_ordering(codes)
213
+ self.assertFalse(perm.flags.writeable)
214
+ self.assertFalse(offsets.flags.writeable)
215
+
216
+ # ------------------------------------------------------------------
217
+ # round-trip with factorize
218
+
219
+ def test_group_ordering_with_factorize(self) -> None:
220
+ a = np.array(['b', 'a', 'b', 'c', 'a', 'a'])
221
+ uniques, codes = factorize(a)
222
+ perm, offsets = group_ordering(codes, size=len(uniques))
223
+ ordered = a[perm]
224
+ # each group's slice of the reordered array is constant
225
+ for g in range(len(uniques)):
226
+ segment = ordered[offsets[g]:offsets[g + 1]]
227
+ self.assertTrue((segment == segment[0]).all())
228
+
229
+
230
+ if __name__ == '__main__':
231
+ unittest.main()
arraykit-1.6.0/VERSION DELETED
@@ -1,2 +0,0 @@
1
- 1.6.0
2
-
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes