PyPI - fastremap - Versions diffs - 1.16.0__cp39-cp39-win32.whl → 1.17.0__cp39-cp39-win32.whl - Mend

fastremap 1.16.0cp39-cp39-win32.whl → 1.17.0cp39-cp39-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

fastremap/__init__.py +47 -0
fastremap/fastremap.cp39-win32.pyd +0 -0
fastremap/fastremap.pxd +100 -0
fastremap/fastremap.pyi +493 -0
fastremap/fastremap.pyx +1630 -0
fastremap/ipt.hpp +354 -0
fastremap/py.typed +0 -0
fastremap/ska_flat_hash_map.hpp +1572 -0
{fastremap-1.16.0.dist-info → fastremap-1.17.0.dist-info}/METADATA +4 -4
fastremap-1.17.0.dist-info/RECORD +15 -0
{fastremap-1.16.0.dist-info → fastremap-1.17.0.dist-info}/WHEEL +1 -1
fastremap-1.17.0.dist-info/pbr.json +1 -0
fastremap-1.16.0.dist-info/RECORD +0 -8
fastremap-1.16.0.dist-info/pbr.json +0 -1
fastremap.cp39-win32.pyd +0 -0
{fastremap-1.16.0.dist-info → fastremap-1.17.0.dist-info}/licenses/AUTHORS +0 -0
{fastremap-1.16.0.dist-info → fastremap-1.17.0.dist-info}/licenses/LICENSE +0 -0
{fastremap-1.16.0.dist-info → fastremap-1.17.0.dist-info}/top_level.txt +0 -0

fastremap/ipt.hpp ADDED Viewed

@@ -0,0 +1,354 @@
+/* ipt.hpp - In-Place Transposition
+ *
+ * When transitioning between different media,
+ * e.g. CPU to GPU, CPU to Network, CPU to disk,
+ * it's often necessary to physically transpose
+ * multi-dimensional arrays to reformat as C or
+ * Fortran order. Tranposing matrices is also
+ * a common action in linear algebra, but often
+ * you can get away with just changing the strides.
+ *
+ * An out-of-place transposition is easy to write,
+ * often faster, but will spike peak memory consumption.
+ *
+ * This library grants the user the option of performing
+ * an in-place transposition which trades CPU time for
+ * peak memory usage.
+ *
+ * Author: William Silversmith
+ * Date: Feb. 2019
+ */
+#include <algorithm>
+#include <cstdint>
+#include <vector>
+#ifndef IN_PLACE_TRANSPOSE_H
+#define IN_PLACE_TRANSPOSE_H
+// ipt = in-place transpose
+// call as:
+// 2d: ipt::ipt<T>(arr, sx, sy);
+// 3d: ipt::ipt<T>(arr, sx, sy, sz);
+// 4d: ipt::ipt<T>(arr, sx, sy, sz, sw);
+namespace ipt {
+template <typename T>
+void square_ipt(T* arr, const size_t sx, const size_t sy) {
+  T tmp = 0;
+  size_t k = 0;
+  size_t next_k = 0;
+  size_t base_k = 0; // just for going faster
+  for (size_t y = 0; y < sy; y++) {
+    base_k = sx * y;
+    for (size_t x = y; x < sx; x++) {
+      k = x + base_k;
+      next_k = y + sy * x;
+      tmp = arr[next_k];
+      arr[next_k] = arr[k];
+      arr[k] = tmp;
+    }
+  }
+}
+/* A permutation, P(k), is a mapping of
+  * one arrangement of numbers to another.
+  * For an m x n array, the permuatation
+  * mapping from C to Fortran order is:
+  *
+  * P(k) := mk mod mn - 1
+  * iP(k) := nk mod mn - 1 (the inverse)
+  *
+  * Where does this come from? Assume we are
+  * going from C to Fortran order (it doesn't
+  * matter either way). The indicies are defined
+  * as:
+  *
+  * k = C(x,y) = x + sx * y
+  *     F(x,y) = y + sy * x
+  *
+  * The permutation P(k) is the transformation:
+  *
+  * P(C(x,y)) = F(x,y)
+  *
+  * 1. P(x + sx * y) = y + sx * x
+  * 2. sy (x + sx y) = sy x + sx sy y
+  * 3. Let q = (sx sy - 1)
+  * 4. sy x + sx sy y % q
+  * 5. ((sy x % q) + (sx sy y % q)) % q by distributive identity
+  * 6. sy x is identical b/c q is always bigger
+  * 7. sx sy y reduces to y
+  * 8 q is always bigger than sy x + y so it disappears
+  *
+  * ==> P(k) = y + sy * x = F(x,y)
+  * ==> P(k) = sy * k % (sx sy - 1)
+  *
+  * Note that P(0) and P(q) are always 0 and q respectively.
+  *
+  * Now we need a way to implement this insight.
+  * How can we move the data around without using too
+  * much extra space? A simple algorithm is
+  * "follow-the-cycles". Each time you try moving a
+  * k to P(k), it displaces the resident tile. Eventually,
+  * this forms a cycle. When you reach the end of a cycle,
+  * you can stop processing and move to unvisited parts of
+  * the array. This requires storing a packed bit representation
+  * of where we've visited to make sure we get everything.
+  * This means we need to store between 2.0x and 1.016x
+  * memory in the size of the original array depending on its
+  * data type (2.0x would be a transpose of another bit packed
+  * array and 1.016x would be 64-bit data types).
+  *
+  * There are fancier algorithms that use divide-and-conquer,
+  * and SIMD tricks, and near zero extra memory, but
+  * this is a good place to start. Fwiw, the bit vector
+  * has an O(nm) time complexity (really 2nm) while the
+  * sans-bit vector algorithms are O(nm log nm).
+  */
+template <typename T>
+void rect_ipt(T* arr, const size_t sx, const size_t sy) {
+  const size_t sxy = sx * sy;
+  std::vector<bool> visited;
+  visited.resize(sxy);
+  visited[0] = true;
+  visited[sxy - 1] = true;
+  const size_t q = sxy - 1;
+  size_t k, next_k;
+  T tmp1, tmp2;
+  for (size_t i = 1; i < q; i++) {
+    if (visited[i]) {
+      continue;
+    }
+    k = i;
+    tmp1 = arr[k];
+    next_k = sy * k - q * (k / sx); // P(k)
+    while (!visited[next_k]) {
+      tmp2 = arr[next_k];
+      arr[next_k] = tmp1;
+      tmp1 = tmp2;
+      visited[next_k] = true;
+      k = next_k;
+      next_k = sy * k - q * (k / sx); // P(k)
+    }
+  }
+}
+// note: sx == sy == sz... find better convention?
+// still good for mutliple-dispatch.
+template <typename T>
+void square_ipt(
+    T* arr,
+    const size_t sx, const size_t sy, const size_t sz
+  ) {
+  T tmp = 0;
+  const size_t sxy = sx * sy;
+  const size_t syz = sy * sz;
+  size_t k = 0;
+  size_t next_k = 0;
+  size_t base_k = 0;
+  for (size_t z = 0; z < sz; z++) {
+    for (size_t y = 0; y < sy; y++) {
+      base_k = sx * y + sxy * z;
+      for (size_t x = z; x < sx; x++) {
+        k = x + base_k;
+        next_k = z + sz * y + syz * x;
+        tmp = arr[next_k];
+        arr[next_k] = arr[k];
+        arr[k] = tmp;
+      }
+    }
+  }
+}
+inline size_t P_3d(
+    const size_t k,
+    const size_t sx, const size_t sy, const size_t sz
+  ) {
+  const size_t sxy = sx * sy;
+  // k = x + sx y + sx sy z
+  size_t z = k / sxy;
+  size_t y = (k - (z * sxy)) / sx;
+  size_t x = k - sx * (y + z * sy);
+  return z + sz * (y + sy * x);
+}
+template <typename T>
+void rect_ipt(
+    T* arr,
+    const size_t sx, const size_t sy, const size_t sz
+  ) {
+  const size_t sxy = sx * sy;
+  const size_t N = sxy * sz;
+  std::vector<bool> visited;
+  visited.resize(N);
+  visited[0] = true;
+  visited[N - 1] = true;
+  size_t k, next_k;
+  T tmp1 = 0, tmp2 = 0;
+  for (size_t i = 1; i < (N - 1); i++) {
+    if (visited[i]) {
+      continue;
+    }
+    k = i;
+    tmp1 = arr[k];
+    next_k = P_3d(k, sx, sy, sz);
+    while (!visited[next_k]) {
+      tmp2 = arr[next_k];
+      arr[next_k] = tmp1;
+      tmp1 = tmp2;
+      visited[next_k] = true;
+      k = next_k;
+      next_k = P_3d(k, sx, sy, sz);
+    }
+  }
+}
+inline size_t P_4d(
+    const size_t k,
+    const size_t sx, const size_t sy, const size_t sz, const size_t sw
+  ) {
+  const size_t sxy = sx * sy;
+  const size_t sxyz = sxy * sz;
+  // k = x + sx y + sx sy z + sx sy sz w
+  size_t w = k / sxyz;
+  size_t z = (k - w * sxyz) / sxy;
+  size_t y = (k - (w * sxyz) - (z * sxy)) / sx;
+  size_t x = k - (w * sxyz) - (z * sxy) - y * sx;
+  return w + sw * (z + sz * (y + sy * x));
+}
+template <typename T>
+void rect_ipt(
+    T* arr,
+    const size_t sx, const size_t sy, const size_t sz, const size_t sw
+  ) {
+  const size_t N = sx * sy * sz * sw;
+  std::vector<bool> visited;
+  visited.resize(N);
+  visited[0] = true;
+  visited[N - 1] = true;
+  size_t k, next_k;
+  T tmp1 = 0, tmp2 = 0;
+  for (size_t i = 1; i < (N - 1); i++) {
+    if (visited[i]) {
+      continue;
+    }
+    k = i;
+    tmp1 = arr[k];
+    next_k = P_4d(k, sx, sy, sz, sw);
+    while (!visited[next_k]) {
+      tmp2 = arr[next_k];
+      arr[next_k] = tmp1;
+      tmp1 = tmp2;
+      visited[next_k] = true;
+      k = next_k;
+      next_k = P_4d(k, sx, sy, sz, sw);
+    }
+  }
+}
+template <typename T>
+void ipt(T* arr, const size_t sx) {
+  return;
+}
+template <typename T>
+void ipt(T* arr, const size_t sx, const size_t sy) {
+  if (sx * sy <= 1) {
+    return;
+  }
+  if (sx == sy) {
+    square_ipt(arr, sx, sy);
+  }
+  else {
+    rect_ipt(arr, sx, sy);
+  }
+}
+template <typename T>
+void ipt(T* arr, const size_t sx, const size_t sy, const size_t sz) {
+  if (sx * sy * sz <= 1) {
+    return;
+  }
+  if (sx == sy && sy == sz) {
+    square_ipt(arr, sx, sy, sz);
+  }
+  else {
+    rect_ipt(arr, sx, sy, sz);
+  }
+}
+template <typename T>
+void ipt(
+  T* arr,
+  const size_t sx, const size_t sy,
+  const size_t sz, const size_t sw
+) {
+  if (sx * sy * sz * sw <= 1) {
+    return;
+  }
+  rect_ipt(arr, sx, sy, sz, sw);
+}
+};
+namespace pyipt {
+template <typename T>
+void _ipt2d(T* arr, const size_t sx, const size_t sy) {
+  ipt::ipt(arr, sx, sy);
+}
+template <typename T>
+void _ipt3d(T* arr, const size_t sx, const size_t sy, const size_t sz) {
+  ipt::ipt(arr, sx, sy, sz);
+}
+template <typename T>
+void _ipt4d(
+    T* arr,
+    const size_t sx, const size_t sy,
+    const size_t sz, const size_t sw
+  ) {
+  ipt::ipt(arr, sx, sy, sz, sw);
+}
+};
+#endif

fastremap/py.typed ADDED Viewed

File without changes

fastremap 1.16.0__cp39-cp39-win32.whl → 1.17.0__cp39-cp39-win32.whl

fastremap 1.16.0cp39-cp39-win32.whl → 1.17.0cp39-cp39-win32.whl