PyPI - pragmastat - Versions diffs - 3.1.29__tar.gz → 3.1.30__tar.gz - Mend

pragmastat 3.1.29tar.gz → 3.1.30tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pragmastat might be problematic. Click here for more details.

Files changed (24) hide show

{pragmastat-3.1.29/pragmastat.egg-info → pragmastat-3.1.30}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pragmastat
-Version: 3.1.29
+Version: 3.1.30
 Summary: Pragmastat: Pragmatic Statistical Toolkit
 Author: Andrey Akinshin
 License-Expression: MIT
@@ -18,9 +18,9 @@ Dynamic: license-file
 This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit', which presents a toolkit of statistical procedures that provide reliable results across diverse real-world distributions, with ready-to-use implementations and detailed explanations.
-- PDF manual for this version: [pragmastat-v3.1.29.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.pdf)
-- Markdown manual for this version: [pragmastat-v3.1.29.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.md)
-- Source code for this version: [pragmastat/py/v3.1.29](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.29/py)
+- PDF manual for this version: [pragmastat-v3.1.30.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.pdf)
+- Markdown manual for this version: [pragmastat-v3.1.30.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.md)
+- Source code for this version: [pragmastat/py/v3.1.30](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.30/py)
 - Latest online manual: https://pragmastat.dev
 - Manual DOI: [10.5281/zenodo.17236778](https://doi.org/10.5281/zenodo.17236778)
@@ -29,7 +29,7 @@ This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit',
 Install from PyPI:
 ```bash
-pip install pragmastat==3.1.29
+pip install pragmastat==3.1.30
 ```
 ## Demo

{pragmastat-3.1.29 → pragmastat-3.1.30}/README.md RENAMED Viewed

@@ -2,9 +2,9 @@
 This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit', which presents a toolkit of statistical procedures that provide reliable results across diverse real-world distributions, with ready-to-use implementations and detailed explanations.
-- PDF manual for this version: [pragmastat-v3.1.29.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.pdf)
-- Markdown manual for this version: [pragmastat-v3.1.29.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.md)
-- Source code for this version: [pragmastat/py/v3.1.29](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.29/py)
+- PDF manual for this version: [pragmastat-v3.1.30.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.pdf)
+- Markdown manual for this version: [pragmastat-v3.1.30.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.md)
+- Source code for this version: [pragmastat/py/v3.1.30](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.30/py)
 - Latest online manual: https://pragmastat.dev
 - Manual DOI: [10.5281/zenodo.17236778](https://doi.org/10.5281/zenodo.17236778)
@@ -13,7 +13,7 @@ This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit',
 Install from PyPI:
 ```bash
-pip install pragmastat==3.1.29
+pip install pragmastat==3.1.30
 ```
 ## Demo

{pragmastat-3.1.29 → pragmastat-3.1.30}/pragmastat/estimators.py RENAMED Viewed

@@ -3,6 +3,7 @@ import numpy as np
 from numpy.typing import NDArray
 from .fast_center import _fast_center
 from .fast_spread import _fast_spread
+from .fast_shift import _fast_shift
 def center(x: Union[Sequence[float], NDArray]) -> float:
@@ -39,8 +40,8 @@ def shift(
     y = np.asarray(y)
     if len(x) == 0 or len(y) == 0:
         raise ValueError("Input arrays cannot be empty")
-    pairwise_shifts = np.subtract.outer(x, y)
-    return float(np.median(pairwise_shifts))
+    # Use fast O((m+n) log L) algorithm instead of materializing all m*n differences
+    return float(_fast_shift(x, y, p=0.5))
 def ratio(

pragmastat-3.1.30/pragmastat/fast_shift.py ADDED Viewed

@@ -0,0 +1,243 @@
+"""Fast O((m+n) log L) implementation of the Shift estimator.
+Computes quantiles of all pairwise differences without materializing them.
+Uses binary search in value space with two-pointer counting.
+"""
+from typing import List, Union, Sequence
+import numpy as np
+from numpy.typing import NDArray
+# Try to import the C implementation, fall back to pure Python if unavailable
+try:
+    from . import _fast_shift_c
+    _HAS_C_EXTENSION = True
+except ImportError:
+    _HAS_C_EXTENSION = False
+def _midpoint(a: float, b: float) -> float:
+    """Compute numerically stable midpoint."""
+    return a + (b - a) * 0.5
+def _count_and_neighbors(
+    x: List[float], y: List[float], threshold: float
+) -> tuple[int, float, float]:
+    """
+    Count pairs where x[i] - y[j] <= threshold using two-pointer algorithm.
+    Also tracks the closest actual differences on either side of threshold.
+    Args:
+        x: Sorted array of x values
+        y: Sorted array of y values
+        threshold: The threshold value
+    Returns:
+        Tuple of (count_less_or_equal, closest_below, closest_above)
+    """
+    m = len(x)
+    n = len(y)
+    count = 0
+    max_below = float("-inf")
+    min_above = float("inf")
+    j = 0
+    for i in range(m):
+        # Move j forward while x[i] - y[j] > threshold
+        while j < n and x[i] - y[j] > threshold:
+            j += 1
+        # All elements from y[j] to y[n-1] satisfy x[i] - y[j] <= threshold
+        count += n - j
+        # Track boundary values
+        if j < n:
+            diff = x[i] - y[j]
+            max_below = max(max_below, diff)
+        if j > 0:
+            diff = x[i] - y[j - 1]
+            min_above = min(min_above, diff)
+    # Fallback to actual min/max if no boundaries found
+    if max_below == float("-inf"):
+        max_below = x[0] - y[n - 1]
+    if min_above == float("inf"):
+        min_above = x[m - 1] - y[0]
+    return count, max_below, min_above
+def _select_kth_pairwise_diff(x: List[float], y: List[float], k: int) -> float:
+    """
+    Select the k-th smallest pairwise difference (1-indexed).
+    Uses binary search in value space to avoid materializing all differences.
+    Args:
+        x: Sorted array of x values
+        y: Sorted array of y values
+        k: The rank to select (1-indexed)
+    Returns:
+        The k-th smallest pairwise difference
+    """
+    m = len(x)
+    n = len(y)
+    total = m * n
+    if k < 1 or k > total:
+        raise ValueError(f"k must be in [1, {total}], got {k}")
+    # Initialize search bounds
+    search_min = x[0] - y[n - 1]
+    search_max = x[m - 1] - y[0]
+    if np.isnan(search_min) or np.isnan(search_max):
+        raise ValueError("NaN in input values")
+    max_iterations = 128  # Sufficient for double precision convergence
+    prev_min = float("-inf")
+    prev_max = float("inf")
+    for _ in range(max_iterations):
+        if search_min == search_max:
+            break
+        mid = _midpoint(search_min, search_max)
+        count_le, closest_below, closest_above = _count_and_neighbors(x, y, mid)
+        # Check if we found the exact value
+        if closest_below == closest_above:
+            return closest_below
+        # No progress means we're stuck between two discrete values
+        if search_min == prev_min and search_max == prev_max:
+            return closest_below if count_le >= k else closest_above
+        prev_min = search_min
+        prev_max = search_max
+        # Narrow the search space
+        if count_le >= k:
+            search_max = closest_below
+        else:
+            search_min = closest_above
+    if search_min != search_max:
+        raise RuntimeError("Convergence failure (pathological input)")
+    return search_min
+def _fast_shift_python(
+    x: List[float], y: List[float], p: Union[float, List[float]] = 0.5
+) -> Union[float, List[float]]:
+    """
+    Pure Python implementation of fast shift estimator.
+    Computes quantiles of all pairwise differences {x_i - y_j} efficiently.
+    Time complexity: O((m + n) * log(precision)) per quantile
+    Space complexity: O(1)
+    Args:
+        x: First sample (will be sorted if needed)
+        y: Second sample (will be sorted if needed)
+        p: Quantile(s) to compute (0.5 for median). Can be a single float or list of floats.
+    Returns:
+        The quantile estimate(s). Returns float if p is float, list if p is list.
+    """
+    if len(x) == 0 or len(y) == 0:
+        raise ValueError("x and y must be non-empty")
+    # Handle single probability or list
+    return_single = isinstance(p, (float, int))
+    probabilities = [p] if return_single else list(p)
+    # Validate probabilities
+    for pk in probabilities:
+        if np.isnan(pk) or pk < 0.0 or pk > 1.0:
+            raise ValueError(f"Probabilities must be within [0, 1], got {pk}")
+    # Sort the arrays
+    xs = sorted(x)
+    ys = sorted(y)
+    m = len(xs)
+    n = len(ys)
+    total = m * n
+    # Type-7 quantile: h = 1 + (n-1)*p, then interpolate between floor(h) and ceil(h)
+    required_ranks = set()
+    interpolation_params = []
+    for pk in probabilities:
+        h = 1.0 + (total - 1) * pk
+        lower_rank = int(np.floor(h))
+        upper_rank = int(np.ceil(h))
+        weight = h - lower_rank
+        # Clamp to valid range
+        lower_rank = max(1, min(total, lower_rank))
+        upper_rank = max(1, min(total, upper_rank))
+        interpolation_params.append((lower_rank, upper_rank, weight))
+        required_ranks.add(lower_rank)
+        required_ranks.add(upper_rank)
+    # Compute required rank values
+    rank_values = {}
+    for rank in required_ranks:
+        rank_values[rank] = _select_kth_pairwise_diff(xs, ys, rank)
+    # Interpolate to get final quantile values
+    result = []
+    for lower_rank, upper_rank, weight in interpolation_params:
+        lower = rank_values[lower_rank]
+        upper = rank_values[upper_rank]
+        if weight == 0.0:
+            result.append(lower)
+        else:
+            result.append((1.0 - weight) * lower + weight * upper)
+    return result[0] if return_single else result
+def _fast_shift(
+    x: Union[Sequence[float], NDArray],
+    y: Union[Sequence[float], NDArray],
+    p: Union[float, List[float]] = 0.5,
+) -> Union[float, List[float]]:
+    """
+    Compute quantiles of all pairwise differences {x_i - y_j} efficiently.
+    Internal implementation - not part of public API.
+    Uses C implementation if available, falls back to pure Python.
+    Time complexity: O((m + n) * log(precision)) per quantile
+    Space complexity: O(1)
+    Args:
+        x: First sample
+        y: Second sample
+        p: Quantile(s) to compute (0.5 for median)
+    Returns:
+        The quantile estimate(s)
+    """
+    if _HAS_C_EXTENSION:
+        # Convert to numpy arrays and use C implementation
+        x_arr = np.asarray(x, dtype=np.float64)
+        y_arr = np.asarray(y, dtype=np.float64)
+        return_single = isinstance(p, (float, int))
+        p_arr = np.array([p] if return_single else p, dtype=np.float64)
+        result = _fast_shift_c.fast_shift_c(x_arr, y_arr, p_arr)
+        return float(result[0]) if return_single else result.tolist()
+    else:
+        # Fall back to pure Python implementation
+        return _fast_shift_python(x, y, p)

{pragmastat-3.1.29 → pragmastat-3.1.30/pragmastat.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pragmastat
-Version: 3.1.29
+Version: 3.1.30
 Summary: Pragmastat: Pragmatic Statistical Toolkit
 Author: Andrey Akinshin
 License-Expression: MIT
@@ -18,9 +18,9 @@ Dynamic: license-file
 This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit', which presents a toolkit of statistical procedures that provide reliable results across diverse real-world distributions, with ready-to-use implementations and detailed explanations.
-- PDF manual for this version: [pragmastat-v3.1.29.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.pdf)
-- Markdown manual for this version: [pragmastat-v3.1.29.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.md)
-- Source code for this version: [pragmastat/py/v3.1.29](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.29/py)
+- PDF manual for this version: [pragmastat-v3.1.30.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.pdf)
+- Markdown manual for this version: [pragmastat-v3.1.30.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.md)
+- Source code for this version: [pragmastat/py/v3.1.30](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.30/py)
 - Latest online manual: https://pragmastat.dev
 - Manual DOI: [10.5281/zenodo.17236778](https://doi.org/10.5281/zenodo.17236778)
@@ -29,7 +29,7 @@ This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit',
 Install from PyPI:
 ```bash
-pip install pragmastat==3.1.29
+pip install pragmastat==3.1.30
 ```
 ## Demo

{pragmastat-3.1.29 → pragmastat-3.1.30}/pragmastat.egg-info/SOURCES.txt RENAMED Viewed

@@ -8,6 +8,7 @@ setup.py
 ./pragmastat/__init__.py
 ./pragmastat/estimators.py
 ./pragmastat/fast_center.py
+./pragmastat/fast_shift.py
 ./pragmastat/fast_spread.py
 ./tests/test_invariance.py
 ./tests/test_performance.py
@@ -16,6 +17,7 @@ examples/demo.py
 pragmastat/__init__.py
 pragmastat/estimators.py
 pragmastat/fast_center.py
+pragmastat/fast_shift.py
 pragmastat/fast_spread.py
 pragmastat.egg-info/PKG-INFO
 pragmastat.egg-info/SOURCES.txt
@@ -23,6 +25,7 @@ pragmastat.egg-info/dependency_links.txt
 pragmastat.egg-info/requires.txt
 pragmastat.egg-info/top_level.txt
 src/fast_center_c.c
+src/fast_shift_c.c
 src/fast_spread_c.c
 tests/test_invariance.py
 tests/test_performance.py

{pragmastat-3.1.29 → pragmastat-3.1.30}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "pragmastat"
-version = "3.1.29"
+version = "3.1.30"
 description = "Pragmastat: Pragmatic Statistical Toolkit"
 readme = "README.md"
 requires-python = ">=3.8"

{pragmastat-3.1.29 → pragmastat-3.1.30}/setup.py RENAMED Viewed

@@ -15,6 +15,12 @@ extensions = [
         include_dirs=[numpy.get_include()],
         extra_compile_args=["-O3", "-Wall"],
     ),
+    Extension(
+        "pragmastat._fast_shift_c",
+        sources=["src/fast_shift_c.c"],
+        include_dirs=[numpy.get_include()],
+        extra_compile_args=["-O3", "-Wall"],
+    ),
 ]
 setup(

pragmastat-3.1.30/src/fast_shift_c.c ADDED Viewed

@@ -0,0 +1,354 @@
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <numpy/arrayobject.h>
+#include <math.h>
+#include <stdlib.h>
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+// Comparison function for qsort
+static int compare_doubles(const void *a, const void *b) {
+    double da = *(const double *)a;
+    double db = *(const double *)b;
+    if (da < db) return -1;
+    if (da > db) return 1;
+    return 0;
+}
+// Numerically stable midpoint
+static double midpoint(double a, double b) {
+    return a + (b - a) * 0.5;
+}
+// Two-pointer algorithm to count pairs where x[i] - y[j] <= threshold
+// Also tracks the closest actual differences on either side of threshold
+static void count_and_neighbors(
+    double *x, npy_intp m,
+    double *y, npy_intp n,
+    double threshold,
+    long long *count_le,
+    double *closest_below,
+    double *closest_above)
+{
+    long long count = 0;
+    double max_below = -INFINITY;
+    double min_above = INFINITY;
+    npy_intp j = 0;
+    for (npy_intp i = 0; i < m; i++) {
+        // Move j forward while x[i] - y[j] > threshold
+        while (j < n && x[i] - y[j] > threshold) {
+            j++;
+        }
+        // All elements from y[j] to y[n-1] satisfy x[i] - y[j] <= threshold
+        count += (n - j);
+        // Track boundary values
+        if (j < n) {
+            double diff = x[i] - y[j];
+            if (diff > max_below) max_below = diff;
+        }
+        if (j > 0) {
+            double diff = x[i] - y[j - 1];
+            if (diff < min_above) min_above = diff;
+        }
+    }
+    // Fallback to actual min/max if no boundaries found
+    if (isinf(max_below) && max_below < 0) {
+        max_below = x[0] - y[n - 1];
+    }
+    if (isinf(min_above) && min_above > 0) {
+        min_above = x[m - 1] - y[0];
+    }
+    *count_le = count;
+    *closest_below = max_below;
+    *closest_above = min_above;
+}
+// Select the k-th smallest pairwise difference (1-indexed)
+static double select_kth_pairwise_diff(
+    double *x, npy_intp m,
+    double *y, npy_intp n,
+    long long k)
+{
+    long long total = (long long)m * n;
+    if (k < 1 || k > total) {
+        PyErr_Format(PyExc_ValueError, "k must be in [1, %lld], got %lld", total, k);
+        return NAN;
+    }
+    // Initialize search bounds
+    double search_min = x[0] - y[n - 1];
+    double search_max = x[m - 1] - y[0];
+    if (isnan(search_min) || isnan(search_max)) {
+        PyErr_SetString(PyExc_ValueError, "NaN in input values");
+        return NAN;
+    }
+    const int max_iterations = 128;
+    double prev_min = -INFINITY;
+    double prev_max = INFINITY;
+    for (int iter = 0; iter < max_iterations && search_min != search_max; iter++) {
+        double mid = midpoint(search_min, search_max);
+        long long count_le;
+        double closest_below, closest_above;
+        count_and_neighbors(x, m, y, n, mid, &count_le, &closest_below, &closest_above);
+        // Check if we found the exact value
+        if (closest_below == closest_above) {
+            return closest_below;
+        }
+        // No progress means we're stuck between two discrete values
+        if (search_min == prev_min && search_max == prev_max) {
+            return (count_le >= k) ? closest_below : closest_above;
+        }
+        prev_min = search_min;
+        prev_max = search_max;
+        // Narrow the search space
+        if (count_le >= k) {
+            search_max = closest_below;
+        } else {
+            search_min = closest_above;
+        }
+    }
+    if (search_min != search_max) {
+        PyErr_SetString(PyExc_RuntimeError, "Convergence failure (pathological input)");
+        return NAN;
+    }
+    return search_min;
+}
+/*
+ * Fast O((m+n) log L) implementation of the Shift estimator
+ * Computes quantiles of all pairwise differences without materializing them
+ */
+static PyObject* fast_shift_c(PyObject* self, PyObject* args) {
+    PyArrayObject *x_array, *y_array, *p_array;
+    // Parse input
+    if (!PyArg_ParseTuple(args, "O!O!O!", &PyArray_Type, &x_array,
+                          &PyArray_Type, &y_array, &PyArray_Type, &p_array)) {
+        return NULL;
+    }
+    // Ensure arrays are 1D
+    if (PyArray_NDIM(x_array) != 1 || PyArray_NDIM(y_array) != 1 || PyArray_NDIM(p_array) != 1) {
+        PyErr_SetString(PyExc_ValueError, "All inputs must be 1-dimensional arrays");
+        return NULL;
+    }
+    npy_intp m = PyArray_DIM(x_array, 0);
+    npy_intp n = PyArray_DIM(y_array, 0);
+    npy_intp num_quantiles = PyArray_DIM(p_array, 0);
+    if (m == 0 || n == 0) {
+        PyErr_SetString(PyExc_ValueError, "x and y must be non-empty");
+        return NULL;
+    }
+    // Allocate and sort x and y
+    double *xs = (double*)malloc(m * sizeof(double));
+    double *ys = (double*)malloc(n * sizeof(double));
+    if (!xs || !ys) {
+        free(xs);
+        free(ys);
+        PyErr_NoMemory();
+        return NULL;
+    }
+    for (npy_intp i = 0; i < m; i++) {
+        xs[i] = *(double*)PyArray_GETPTR1(x_array, i);
+        if (isnan(xs[i])) {
+            free(xs);
+            free(ys);
+            PyErr_SetString(PyExc_ValueError, "NaN values not allowed in x");
+            return NULL;
+        }
+    }
+    for (npy_intp i = 0; i < n; i++) {
+        ys[i] = *(double*)PyArray_GETPTR1(y_array, i);
+        if (isnan(ys[i])) {
+            free(xs);
+            free(ys);
+            PyErr_SetString(PyExc_ValueError, "NaN values not allowed in y");
+            return NULL;
+        }
+    }
+    qsort(xs, m, sizeof(double), compare_doubles);
+    qsort(ys, n, sizeof(double), compare_doubles);
+    long long total = (long long)m * n;
+    // Process quantiles
+    // First, collect all required ranks and interpolation parameters
+    typedef struct {
+        long long lower_rank;
+        long long upper_rank;
+        double weight;
+    } InterpolationParam;
+    InterpolationParam *interp_params = (InterpolationParam*)malloc(num_quantiles * sizeof(InterpolationParam));
+    if (!interp_params) {
+        free(xs);
+        free(ys);
+        PyErr_NoMemory();
+        return NULL;
+    }
+    // Use a simple array to track unique ranks (could be optimized with hash set)
+    long long *required_ranks = (long long*)malloc(2 * num_quantiles * sizeof(long long));
+    int num_required = 0;
+    if (!required_ranks) {
+        free(xs);
+        free(ys);
+        free(interp_params);
+        PyErr_NoMemory();
+        return NULL;
+    }
+    // Collect required ranks
+    for (npy_intp i = 0; i < num_quantiles; i++) {
+        double pk = *(double*)PyArray_GETPTR1(p_array, i);
+        if (isnan(pk) || pk < 0.0 || pk > 1.0) {
+            free(xs);
+            free(ys);
+            free(interp_params);
+            free(required_ranks);
+            PyErr_Format(PyExc_ValueError, "Probabilities must be within [0, 1], got %f", pk);
+            return NULL;
+        }
+        // Type-7 quantile: h = 1 + (n-1)*p
+        double h = 1.0 + (total - 1) * pk;
+        long long lower_rank = (long long)floor(h);
+        long long upper_rank = (long long)ceil(h);
+        double weight = h - lower_rank;
+        // Clamp to valid range
+        if (lower_rank < 1) lower_rank = 1;
+        if (upper_rank > total) upper_rank = total;
+        if (lower_rank > total) lower_rank = total;
+        if (upper_rank < 1) upper_rank = 1;
+        interp_params[i].lower_rank = lower_rank;
+        interp_params[i].upper_rank = upper_rank;
+        interp_params[i].weight = weight;
+        // Add to required ranks if not already present
+        int found_lower = 0, found_upper = 0;
+        for (int j = 0; j < num_required; j++) {
+            if (required_ranks[j] == lower_rank) found_lower = 1;
+            if (required_ranks[j] == upper_rank) found_upper = 1;
+        }
+        if (!found_lower) required_ranks[num_required++] = lower_rank;
+        if (!found_upper && upper_rank != lower_rank) required_ranks[num_required++] = upper_rank;
+    }
+    // Compute rank values
+    double *rank_values = (double*)malloc(num_required * sizeof(double));
+    if (!rank_values) {
+        free(xs);
+        free(ys);
+        free(interp_params);
+        free(required_ranks);
+        PyErr_NoMemory();
+        return NULL;
+    }
+    for (int i = 0; i < num_required; i++) {
+        rank_values[i] = select_kth_pairwise_diff(xs, m, ys, n, required_ranks[i]);
+        if (isnan(rank_values[i])) {
+            // Error was set by select_kth_pairwise_diff
+            free(xs);
+            free(ys);
+            free(interp_params);
+            free(required_ranks);
+            free(rank_values);
+            return NULL;
+        }
+    }
+    // Create result array
+    npy_intp dims[1] = {num_quantiles};
+    PyArrayObject *result = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE);
+    if (!result) {
+        free(xs);
+        free(ys);
+        free(interp_params);
+        free(required_ranks);
+        free(rank_values);
+        return NULL;
+    }
+    // Interpolate to get final quantile values
+    for (npy_intp i = 0; i < num_quantiles; i++) {
+        long long lower_rank = interp_params[i].lower_rank;
+        long long upper_rank = interp_params[i].upper_rank;
+        double weight = interp_params[i].weight;
+        // Find rank values
+        double lower_val = 0.0, upper_val = 0.0;
+        for (int j = 0; j < num_required; j++) {
+            if (required_ranks[j] == lower_rank) lower_val = rank_values[j];
+            if (required_ranks[j] == upper_rank) upper_val = rank_values[j];
+        }
+        double result_val;
+        if (weight == 0.0) {
+            result_val = lower_val;
+        } else {
+            result_val = (1.0 - weight) * lower_val + weight * upper_val;
+        }
+        *(double*)PyArray_GETPTR1(result, i) = result_val;
+    }
+    // Cleanup
+    free(xs);
+    free(ys);
+    free(interp_params);
+    free(required_ranks);
+    free(rank_values);
+    return (PyObject*)result;
+}
+// Method definitions
+static PyMethodDef FastShiftMethods[] = {
+    {"fast_shift_c", fast_shift_c, METH_VARARGS, "Fast shift estimator in C"},
+    {NULL, NULL, 0, NULL}
+};
+// Module definition
+static struct PyModuleDef fast_shift_module = {
+    PyModuleDef_HEAD_INIT,
+    "_fast_shift_c",
+    "Fast shift estimator C extension",
+    -1,
+    FastShiftMethods
+};
+// Module initialization
+PyMODINIT_FUNC PyInit__fast_shift_c(void) {
+    import_array();
+    return PyModule_Create(&fast_shift_module);
+}

{pragmastat-3.1.29 → pragmastat-3.1.30}/tests/test_performance.py RENAMED Viewed

@@ -1,13 +1,11 @@
-"""Performance tests for fast Center and Spread implementations."""
 import time
 import numpy as np
 from pragmastat.fast_center import _fast_center
 from pragmastat.fast_spread import _fast_spread
+from pragmastat.fast_shift import _fast_shift
-def center_simple(x):
-    """Simple O(n^2) implementation for comparison."""
+def center_naive(x):
     n = len(x)
     pairwise_averages = []
     for i in range(n):
@@ -16,8 +14,7 @@ def center_simple(x):
     return np.median(pairwise_averages)
-def spread_simple(x):
-    """Simple O(n^2) implementation for comparison."""
+def spread_naive(x):
     n = len(x)
     if n == 1:
         return 0.0
@@ -28,13 +25,20 @@ def spread_simple(x):
     return np.median(pairwise_diffs)
+def shift_naive(x, y):
+    pairwise_shifts = []
+    for xi in x:
+        for yj in y:
+            pairwise_shifts.append(xi - yj)
+    return np.median(pairwise_shifts)
 def test_center_correctness():
-    """Test that _fast_center produces the same results as simple implementation."""
     np.random.seed(1729)
     for n in range(1, 101):
         for iteration in range(n):
             x = np.random.randn(n).tolist()
-            expected = center_simple(x)
+            expected = center_naive(x)
             actual = _fast_center(x)
             assert (
                 abs(expected - actual) < 1e-9
@@ -42,12 +46,11 @@ def test_center_correctness():
 def test_spread_correctness():
-    """Test that _fast_spread produces the same results as simple implementation."""
     np.random.seed(1729)
     for n in range(1, 101):
         for iteration in range(n):
             x = np.random.randn(n).tolist()
-            expected = spread_simple(x)
+            expected = spread_naive(x)
             actual = _fast_spread(x)
             assert (
                 abs(expected - actual) < 1e-9
@@ -55,7 +58,6 @@ def test_spread_correctness():
 def test_center_performance():
-    """Test performance of _fast_center on large dataset."""
     np.random.seed(1729)
     x = np.random.randn(100000).tolist()
@@ -69,7 +71,6 @@ def test_center_performance():
 def test_spread_performance():
-    """Test performance of _fast_spread on large dataset."""
     np.random.seed(1729)
     x = np.random.randn(100000).tolist()
@@ -82,6 +83,33 @@ def test_spread_performance():
     assert elapsed < 10.0, f"Performance too slow: {elapsed}s"
+def test_shift_correctness():
+    np.random.seed(1729)
+    for n in range(2, 51):
+        for m in range(2, 51):
+            x = np.random.randn(n).tolist()
+            y = np.random.randn(m).tolist()
+            expected = shift_naive(x, y)
+            actual = _fast_shift(x, y, p=0.5)
+            assert (
+                abs(expected - actual) < 1e-9
+            ), f"Mismatch for n={n}, m={m}: expected={expected}, actual={actual}"
+def test_shift_performance():
+    np.random.seed(1729)
+    x = np.random.randn(10000).tolist()
+    y = np.random.randn(10000).tolist()
+    start = time.time()
+    result = _fast_shift(x, y, p=0.5)
+    elapsed = time.time() - start
+    print(f"\nShift for n=m=10000: {result:.6f}")
+    print(f"Elapsed time: {elapsed:.3f}s")
+    assert elapsed < 10.0, f"Performance too slow: {elapsed}s"
 if __name__ == "__main__":
     test_center_correctness()
     print("✓ Center correctness tests passed")
@@ -89,8 +117,14 @@ if __name__ == "__main__":
     test_spread_correctness()
     print("✓ Spread correctness tests passed")
+    test_shift_correctness()
+    print("✓ Shift correctness tests passed")
     test_center_performance()
     print("✓ Center performance test passed")
     test_spread_performance()
     print("✓ Spread performance test passed")
+    test_shift_performance()
+    print("✓ Shift performance test passed")