PyPI - calsipro - Versions diffs - 0.10.0__tar.gz - Mend

calsipro 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

calsipro-0.10.0/PKG-INFO +31 -0
calsipro-0.10.0/pyproject.toml +50 -0
calsipro-0.10.0/src/calsipro/__init__.py +0 -0
calsipro-0.10.0/src/calsipro/analysis.py +365 -0
calsipro-0.10.0/src/calsipro/cli.py +443 -0
calsipro-0.10.0/src/calsipro/io.py +129 -0
calsipro-0.10.0/src/calsipro/organoid_database.py +98 -0
calsipro-0.10.0/src/calsipro/peak_calling.py +170 -0
calsipro-0.10.0/src/calsipro/py.typed +0 -0
calsipro-0.10.0/src/calsipro/visualisations.py +118 -0

calsipro-0.10.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,31 @@
+Metadata-Version: 2.1
+Name: calsipro
+Version: 0.10.0
+Summary:
+Author: Simon Haendeler
+Author-email: simon.ac@haend.de
+Requires-Python: >=3.8,<4.0
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Dist: Pillow (>=9.2.0,<10.0.0)
+Requires-Dist: aicsimageio (>=4.9.4,<5.0.0)
+Requires-Dist: aicspylibczi (>=3.0.5)
+Requires-Dist: bokeh (>=3.0.0,<4.0.0)
+Requires-Dist: click (>=8.1.3,<9.0.0)
+Requires-Dist: datashader (>=0.14.2,<0.15.0)
+Requires-Dist: ffmpeg-python (>=0.2.0,<0.3.0)
+Requires-Dist: fsspec (>=2022.7.1)
+Requires-Dist: matplotlib (>=3.5.3,<4.0.0)
+Requires-Dist: numba (>=0.56.0,<0.57.0) ; python_version >= "3.8" and python_version < "3.11"
+Requires-Dist: numba (>=0.57,<0.58) ; python_version >= "3.11"
+Requires-Dist: numpy (>=1.18)
+Requires-Dist: openpyxl (>=3.0.10,<4.0.0)
+Requires-Dist: polars (>=0.16.14,<0.17.0)
+Requires-Dist: pywavelets (>=1.4.1,<2.0.0)
+Requires-Dist: s3fs (>=2023.5.0,<2024.0.0)
+Requires-Dist: scikit-learn (>=1.2.1,<2.0.0)
+Requires-Dist: scipy (>=1.9.0,<2.0.0)
+Requires-Dist: syn-bokeh-helpers (>=0.5.0,<0.6.0)

calsipro-0.10.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,50 @@
+[tool.ruff]
+line-length = 120
+[tool.poetry]
+name = "calsipro"
+version = "0.10.0"
+description = ""
+authors = ["Simon Haendeler <simon.ac@haend.de>"]
+[tool.poetry.scripts]
+calsipro = 'calsipro.cli:cli'
+[tool.poetry.dependencies]
+python = "^3.8"
+openpyxl = "^3.0.10"
+datashader = "^0.14.2"
+numba = [{version = "^0.56.0", python = ">=3.8,<3.11"}, {version = "^0.57", python=">=3.11"}]
+Pillow = "^9.2.0"
+click = "^8.1.3"
+numpy = ">=1.18"
+scipy = "^1.9.0"
+matplotlib = "^3.5.3"
+bokeh = "^3.0.0"
+aicsimageio = "^4.9.4"
+aicspylibczi = ">=3.0.5"
+fsspec = ">=2022.7.1"
+syn-bokeh-helpers = {version = "^0.5.0", source = "syntonym"}
+pywavelets = "^1.4.1"
+ffmpeg-python = "^0.2.0"
+scikit-learn = "^1.2.1"
+polars = "^0.16.14"
+s3fs = "^2023.5.0"
+[tool.poetry.dev-dependencies]
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.2.0"
+mypy = "^0.991"
+popy = {version = "^0.1.1", source = "syntonym"}
+[[tool.poetry.source]]
+name = "syntonym"
+url = "http://localhost:8080/"
+default = false
+secondary = false
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"

calsipro-0.10.0/src/calsipro/__init__.py ADDED Viewed

File without changes

calsipro-0.10.0/src/calsipro/analysis.py ADDED Viewed

@@ -0,0 +1,365 @@
+import numpy as np
+import polars as pl
+import scipy.ndimage
+import numba
+def moving_average(a, n=3):
+    if n == 0:
+        return a
+    ret = np.cumsum(a, axis=2, dtype=float)
+    ret[:, :, n:] = ret[:, :, n:] - ret[:, :, :-n]
+    return ret[:, :, n - 1:] / n
+def normalize(data):
+    m, mm = np.min(data), np.max(data)
+    data = (data - m) / (mm - m)
+    return data
+def _calculate_bf_threshold_and_mask(data, min_size=30, border_size=5):
+    calculation_needed = True
+    pick = 1
+    while calculation_needed and pick < 80:
+        threshold = calculate_threshold(data, pick=pick)
+        mask = calculate_mask(data, th=threshold, raw=True, larger=False)
+        mask_size = mask.sum()
+        if mask_size == 0:
+            calculation_needed = False
+        elif mask_size < min_size:
+            pick += 1
+        else:
+            calculation_needed = False
+    return mask
+def calculate_mask(t, th=0.25, raw=False, labelling=True, larger=True):
+    if not raw:
+        t = np.max(t, axis=0)
+        t = normalize(t)
+    if larger:
+        mask = t >= th
+    else:
+        mask = t <= th
+    if not labelling:
+        return mask
+    image = np.ones(mask.shape)
+    image[~mask] = 0
+    image[mask] = 1
+    label, count = scipy.ndimage.label(image)
+    if count == 1:
+        return mask
+    else:
+        sizes = []
+        for k in range(1, count+1):
+            size = np.sum(label[mask] == k)
+            sizes.append(size)
+        if len(sizes) > 0:
+            biggest = np.argmax(sizes)+1
+        else:
+            biggest = 1
+        return label == biggest
+def calculate_threshold(data, pick=1):
+    if np.min(data) == 0:
+        offset = 1
+    else:
+        offset = 0
+    try:
+        counts, bins = np.histogram(np.log(data+offset), 80)
+    except Exception as e:
+        d1 = data+offset
+        d2 = np.log(d1)
+        print('data+offset', d1)
+        print('log(data+offset)', d2)
+        print('offset', offset)
+        print('data min', np.min(data))
+        print('data max', np.max(data))
+        print('data+offset min', np.min(d1))
+        print('data+offset max', np.max(d1))
+        print('log(data+offset min)', np.min(d2))
+        print('log(data+offset max)', np.max(d2))
+        raise e
+    for i in range(len(counts)):
+        if counts[i] <= 0:
+            counts[i] = 1
+        else:
+            break
+    for i in range(1, len(counts)):
+        if counts[-i] <= 0:
+            counts[-i] = 1
+        else:
+            break
+    freq = np.log(1+counts)
+    left_flood = freq.copy()
+    right_flood = freq.copy()
+    flood = freq.copy()
+    for i in range(1, len(freq)):
+        left_flood[i] = max(left_flood[i], left_flood[i-1])
+    for i in list(range(0, len(freq)-1))[::-1]:
+        right_flood[i] = max(right_flood[i], right_flood[i+1])
+    for i in range(0, len(freq)):
+        flood[i] = min(left_flood[i], right_flood[i])
+    f = flood - freq
+    if pick != 1:
+        idxs = np.argsort(flood-freq)
+        idx = idxs[-pick]
+    else:
+        idx = np.argmax(flood-freq)
+    rest = freq[idx:]
+    low = freq[idx]
+    high = np.max(rest)
+    idx_offset = max(0, np.argmax(rest >= (low + (high-low)*0.10))-1)
+    idx = idx + idx_offset
+    pixels = np.sum(counts[idx:]) / np.sum(counts)
+    if  pixels < 0.001:
+        return np.max(data)+1
+    if  0.999 < pixels:
+        return np.max(data)+1
+    if idx == 0:
+        return np.max(data)+1
+    return np.exp(bins[idx+1])-offset
+def _find_biggest(mask):
+    label, count = scipy.ndimage.label(mask)
+    if count == 1:
+        return mask
+    else:
+        sizes = list(np.bincount(label[mask]))
+        assert sizes[0] == 0
+        sizes = sizes[1:]
+        assert len(sizes) == count
+        if len(sizes) > 0:
+            biggest = np.argmax(sizes)+1
+        else:
+            biggest = 1
+        return label == biggest
+def time_analysis(t, intensity_cutoff=0.5):
+    t = t - np.min(t, axis=0).reshape((1, t.shape[1], t.shape[2]))
+    t = t / np.max(t, axis=0).reshape((1, t.shape[1], t.shape[2]))
+    time = np.argmax(t >= intensity_cutoff, axis=0)
+    return time
+def push_low_pixels(time, mask):
+    m = np.min(time[mask])
+    mm = np.max(time[mask])
+    time[~mask] = mm+1
+    for i in range(m, mm+1):
+        if np.sum(time == i) < 30:
+            time[time == i] = i+1
+            m = m+1
+        else:
+            break
+    for i in range(m, mm+1)[::-1]:
+        if np.sum(time == i) < 30:
+            time[time == i] = i-1
+            mm = mm-1
+        else:
+            break
+    time[~mask] = np.max(time)
+    return time
+def find_times(data, mask, times, as_index=True):
+    data = data.copy()
+    data[~mask] = np.min(data)-1
+    if as_index:
+        return [(data == time).nonzero() for time in times]
+    else:
+        return [(data == time) for time in times]
+def find_ori_cluster(data, mask, as_index=False):
+    cluster_mask = np.zeros(data.shape, dtype=np.bool_)
+    ori_time = np.min(data[mask])
+    cluster_mask[data == ori_time] = 1
+    cluster_mask[~mask] = 0
+    label, count = scipy.ndimage.label(cluster_mask, scipy.ndimage.generate_binary_structure(2, 2))
+    if count > 1:
+        sizes = []
+        for k in range(1, count+1):
+            size = np.sum(label[mask] == k)
+            sizes.append(size)
+        biggest = np.argmax(sizes)+1
+        cluster_mask = (label == biggest)
+    if as_index:
+        xs, ys = cluster_mask.nonzero()
+        return np.array((np.average(xs), np.average(ys))).reshape((2, 1))
+    else:
+        return cluster_mask
+def calculate_speed(time, mask):
+    m, mm = np.min(time[mask]), np.max(time[mask])
+    ori_pos = np.array(find_ori_cluster(time, mask, as_index=True)).reshape((2, 1))
+    timepoints = list(range(m+1, mm+1))
+    locations = [np.stack([x, y]) for x, y in find_times(time, mask, timepoints, as_index=True)]
+    dts = [0]
+    speeds = [-1]
+    ns = [np.sum(time == m)]
+    total_speed = 0
+    total_ns = 0
+    for t, l in zip(timepoints, locations):
+        dists = np.sqrt(np.sum((l - ori_pos)**2, axis=0))
+        dt = t-m
+        dl = np.sum(dists)
+        n = dists.shape[0]
+        if n == 0:
+            continue
+        total_speed += dl/dt
+        total_ns += n
+        speeds.append(dl/(n*dt))
+        ns.append(n)
+        dts.append(dt)
+    if total_ns == 0:
+        total_ns = 1
+    r = (pl.DataFrame({'time': np.array(dts, dtype=np.int64),
+                       'speed': np.array(speeds, dtype=np.float64),
+                       'n': np.array(ns, dtype=np.int64)}),
+         total_speed/total_ns)
+    return r
+def calculate_speed_better(time, mask):
+    ori_pos = find_ori_cluster(time, mask, as_index=True)
+    x_dist = np.repeat((np.arange(time.shape[0]) - ori_pos[0]).reshape((time.shape[0], 1)), time.shape[1], axis=1)
+    y_dist = np.repeat((np.arange(time.shape[1]) - ori_pos[1]).reshape((1, time.shape[1])), time.shape[0], axis=0)
+    dists = np.sqrt(x_dist**2 - y_dist**2)
+    time = time - np.min(time)
+    speed = dists / time
+    speed[time == 0] = 0
+    return speed
+def tabularize_speed(time, speed, mask):
+    m, mm = np.min(time), np.max(time)
+    timepoints = list(range(m+1, mm+1))
+    time[mask] = mm+1
+    ns = [np.sum(time == timepoint) for timepoint in timepoints]
+    speed = [np.average(speed[time == timepoint]) for timepoint in timepoints]
+    r = (pl.DataFrame({'time': np.array(timepoints, dtype=np.int64),
+                       'speed': np.array(speed, dtype=np.float64),
+                       'n': np.array(ns, dtype=np.int64)}),
+         np.average(speed[mask]))
+    return r
+def reachability(data, threshold=0.02):
+    data = normalize(data)
+    mask = np.zeros(shape=data.shape, dtype=np.bool_)
+    scheduled = np.zeros(shape=data.shape, dtype=np.bool_)
+    _reachability(data, scheduled, mask, threshold, 1, 1)
+    return mask
+@numba.njit(cache=True)
+def _reachability(data, scheduled, mask,  threshold, dx, dy):
+    next = []
+    x_len, y_len = data.shape
+    for y in range(y_len-1):
+        x = 0
+        mask[x, y] = True
+        next.append((x, y))
+        scheduled[x, y] = True
+        x = x_len-1
+        mask[x, y] = True
+        next.append((x, y))
+        scheduled[x, y] = True
+    for x in range(x_len-1):
+        y = 0
+        mask[x, y] = True
+        next.append((x, y))
+        scheduled[x, y] = True
+        y = y_len-1
+        mask[x, y] = True
+        next.append((x, y))
+        scheduled[x, y] = True
+    while (len(next) != 0):
+        x, y = next.pop(0)
+        b_v = data[x,y]
+        nx, ny = x+dx, y
+        if 0 <= nx < x_len:
+            v = data[nx, ny]
+            if abs(v-b_v) <= threshold:
+                mask[nx, ny] = True
+                if not scheduled[nx, ny]:
+                    scheduled[nx, ny] = True
+                    next.append((nx, ny))
+        nx, ny = x-dx, y
+        if 0 <= nx < x_len:
+            v = data[nx, ny]
+            if abs(v-b_v) <= threshold:
+                mask[nx, ny] = True
+                if not scheduled[nx, ny]:
+                    scheduled[nx, ny] = True
+                    next.append((nx, ny))
+        nx, ny = x, y+dy
+        if 0 <= ny < y_len:
+            v = data[nx, ny]
+            if abs(v-b_v) <= threshold:
+                mask[nx, ny] = True
+                if not scheduled[nx, ny]:
+                    scheduled[nx, ny] = True
+                    next.append((nx, ny))
+        nx, ny = x, y-dy
+        if 0 <= ny < y_len:
+            v = data[nx, ny]
+            if abs(v-b_v) <= threshold:
+                mask[nx, ny] = True
+                if not scheduled[nx, ny]:
+                    scheduled[nx, ny] = True
+                    next.append((nx, ny))
+def calculate_bf_mask(data):
+    data = data.copy()
+    reachability_mask = reachability(data)
+    data[reachability_mask] = np.mean(data[reachability_mask])
+    mask = _calculate_bf_threshold_and_mask(data)
+    return mask