dcnum 0.25.7__py3-none-any.whl → 0.25.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

dcnum/__init__.py CHANGED
@@ -1,2 +1,25 @@
1
+ """Base library for deformability cytometry postprocessing
2
+ MIT License
3
+
4
+ Copyright (c) 2023 Paul Müller
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
23
+ """
1
24
  # flake8: noqa: F401
2
25
  from ._version import __version__, __version_tuple__
dcnum/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.25.7'
21
- __version_tuple__ = version_tuple = (0, 25, 7)
20
+ __version__ = version = '0.25.8'
21
+ __version_tuple__ = version_tuple = (0, 25, 8)
dcnum/read/__init__.py CHANGED
@@ -2,5 +2,6 @@
2
2
  from .cache import md5sum
3
3
  from .const import PROTECTED_FEATURES
4
4
  from .detect_flicker import detect_flickering
5
- from .hdf5_data import HDF5Data, HDF5ImageCache, concatenated_hdf5_data
5
+ from .hdf5_data import HDF5Data, HDF5ImageCache
6
+ from .hdf5_concat import concatenated_hdf5_data
6
7
  from .mapped import get_mapping_indices, get_mapped_object
@@ -0,0 +1,145 @@
1
+ import io
2
+ import pathlib
3
+ import tempfile
4
+ import warnings
5
+
6
+ import h5py
7
+ import numpy as np
8
+
9
+
10
+ from .hdf5_data import HDF5Data
11
+
12
+
13
+ def concatenated_hdf5_data(paths: list[pathlib.Path],
14
+ path_out: bool | pathlib.Path | None = True,
15
+ compute_frame: bool = True,
16
+ features: list[str] | None = None):
17
+ """Return a virtual dataset concatenating all the input paths
18
+
19
+ Parameters
20
+ ----------
21
+ paths:
22
+ Path of the input HDF5 files that will be concatenated along
23
+ the feature axis. The metadata will be taken from the first
24
+ file.
25
+ path_out:
26
+ If `None`, then the dataset is created in memory. If `True`
27
+ (default), create a file on disk. If a pathlib.Path is specified,
28
+ the dataset is written to that file. Note that datasets in memory
29
+ are likely not pickable (so don't use them for multiprocessing).
30
+ compute_frame:
31
+ Whether to compute the "events/frame" feature, taking the frame
32
+ data from the input files and properly incrementing them along
33
+ the file index.
34
+ features:
35
+ List of features to take from the input files.
36
+
37
+ Notes
38
+ -----
39
+ - If one of the input files does not contain a feature from the first
40
+ input `paths`, then a `ValueError` is raised. Use the `features`
41
+ argument to specify which features you need instead.
42
+ - Basins are not considered.
43
+ """
44
+ h5kwargs = {"mode": "w", "libver": "latest"}
45
+ if isinstance(path_out, (pathlib.Path, str)):
46
+ h5kwargs["name"] = path_out
47
+ elif path_out is True:
48
+ tf = tempfile.NamedTemporaryFile(prefix="dcnum_vc_",
49
+ suffix=".hdf5",
50
+ delete=False)
51
+ tf.write(b"dummy")
52
+ h5kwargs["name"] = tf.name
53
+ tf.close()
54
+ elif path_out is None:
55
+ h5kwargs["name"] = io.BytesIO()
56
+ else:
57
+ raise ValueError(
58
+ f"Invalid type for `path_out`: {type(path_out)} ({path_out}")
59
+
60
+ if len(paths) == 0:
61
+ raise ValueError("Please specify at least one file in `paths`!")
62
+ elif len(paths) == 1:
63
+ warnings.warn("Only one file passed to `concatenated_hdf5_data`; this "
64
+ "is equivalent to using `HDF5Data`, but slower.")
65
+
66
+ frames = []
67
+
68
+ with h5py.File(**h5kwargs) as hv:
69
+ # determine the sizes of the input files
70
+ shapes = {}
71
+ dtypes = {}
72
+ size = 0
73
+ for ii, pp in enumerate(paths):
74
+ pp = pathlib.Path(pp).resolve()
75
+ with h5py.File(pp, libver="latest") as h5:
76
+ # get all feature keys
77
+ featsi = sorted(h5["events"].keys())
78
+ # get metadata
79
+ if ii == 0:
80
+ meta = dict(h5.attrs)
81
+ if not features:
82
+ features = featsi
83
+ # make sure number of features are consistent
84
+ if not set(features) <= set(featsi):
85
+ raise ValueError(
86
+ f"File {pp} contains more features than {paths[0]}!")
87
+ # populate shapes for all features
88
+ for feat in features:
89
+ if not isinstance(h5["events"][feat], h5py.Dataset):
90
+ warnings.warn(
91
+ f"Ignoring {feat}; not implemented yet!")
92
+ continue
93
+ if feat in ["frame", "time"]:
94
+ continue
95
+ shapes.setdefault(feat, []).append(
96
+ h5["events"][feat].shape)
97
+ if ii == 0:
98
+ dtypes[feat] = h5["events"][feat].dtype
99
+ # increment size
100
+ size += h5["events"][features[0]].shape[0]
101
+ # remember the frame feature if requested
102
+ if compute_frame:
103
+ frames.append(h5["events/frame"][:])
104
+
105
+ # write metadata
106
+ hv.attrs.update(meta)
107
+
108
+ # Create the virtual datasets
109
+ for feat in shapes:
110
+ if len(shapes[feat][0]) == 1:
111
+ # scalar feature
112
+ shape = (sum([sh[0] for sh in shapes[feat]]))
113
+ else:
114
+ # non-scalar feature
115
+ length = (sum([sh[0] for sh in shapes[feat]]))
116
+ shape = list(shapes[feat][0])
117
+ shape[0] = length
118
+ shape = tuple(shape)
119
+ layout = h5py.VirtualLayout(shape=shape, dtype=dtypes[feat])
120
+ loc = 0
121
+ for jj, pp in enumerate(paths):
122
+ vsource = h5py.VirtualSource(pp, f"events/{feat}",
123
+ shape=shapes[feat][jj])
124
+ cursize = shapes[feat][jj][0]
125
+ layout[loc:loc+cursize] = vsource
126
+ loc += cursize
127
+ hv.create_virtual_dataset(f"/events/{feat}", layout, fillvalue=0)
128
+
129
+ if compute_frame:
130
+ # concatenate frames and store in dataset
131
+ frame_concat = np.zeros(size, dtype=np.uint64)
132
+ locf = 0 # indexing location
133
+ prevmax = 0 # maximum frame number stored so far in array
134
+ for fr in frames:
135
+ offset = prevmax + 1 - fr[0]
136
+ frame_concat[locf:locf+fr.size] = fr + offset
137
+ locf += fr.size
138
+ prevmax = fr[-1] + offset
139
+ hv.create_dataset("/events/frame", data=frame_concat)
140
+
141
+ # write metadata
142
+ hv.attrs["experiment:event count"] = size
143
+
144
+ data = HDF5Data(h5kwargs["name"])
145
+ return data
dcnum/read/hdf5_data.py CHANGED
@@ -1,11 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import hashlib
4
- import io
5
4
  import json
6
5
  import numbers
7
6
  import pathlib
8
- import tempfile
9
7
  from typing import Dict, BinaryIO, List
10
8
  import uuid
11
9
  import warnings
@@ -160,7 +158,10 @@ class HDF5Data:
160
158
  if not hasattr(self, "h5"):
161
159
  self.h5 = None
162
160
 
163
- self.path = state["path"]
161
+ path = state["path"]
162
+ if isinstance(path, str):
163
+ path = pathlib.Path(path)
164
+ self.path = path
164
165
 
165
166
  self.md5_5m = state["md5_5m"]
166
167
  if self.md5_5m is None:
@@ -552,136 +553,10 @@ class HDF5Data:
552
553
  return self._keys
553
554
 
554
555
 
555
- def concatenated_hdf5_data(paths: List[pathlib.Path],
556
- path_out: True | pathlib.Path | None = True,
557
- compute_frame: bool = True,
558
- features: List[str] | None = None):
559
- """Return a virtual dataset concatenating all the input paths
560
-
561
- Parameters
562
- ----------
563
- paths:
564
- Path of the input HDF5 files that will be concatenated along
565
- the feature axis. The metadata will be taken from the first
566
- file.
567
- path_out:
568
- If `None`, then the dataset is created in memory. If `True`
569
- (default), create a file on disk. If a pathlib.Path is specified,
570
- the dataset is written to that file. Note that datasets in memory
571
- are likely not pickable (so don't use them for multiprocessing).
572
- compute_frame:
573
- Whether to compute the "events/frame" feature, taking the frame
574
- data from the input files and properly incrementing them along
575
- the file index.
576
- features:
577
- List of features to take from the input files.
578
-
579
- Notes
580
- -----
581
- - If one of the input files does not contain a feature from the first
582
- input `paths`, then a `ValueError` is raised. Use the `features`
583
- argument to specify which features you need instead.
584
- - Basins are not considered.
585
- """
586
- h5kwargs = {"mode": "w", "libver": "latest"}
587
- if isinstance(path_out, (pathlib.Path, str)):
588
- h5kwargs["name"] = path_out
589
- elif path_out is True:
590
- tf = tempfile.NamedTemporaryFile(prefix="dcnum_vc_",
591
- suffix=".hdf5",
592
- delete=False)
593
- tf.write(b"dummy")
594
- h5kwargs["name"] = tf.name
595
- tf.close()
596
- elif path_out is None:
597
- h5kwargs["name"] = io.BytesIO()
598
- else:
599
- raise ValueError(
600
- f"Invalid type for `path_out`: {type(path_out)} ({path_out}")
601
-
602
- if len(paths) == 0:
603
- raise ValueError("Please specify at least one file in `paths`!")
604
- elif len(paths) == 1:
605
- warnings.warn("Only one file passed to `concatenated_hdf5_data`; this "
606
- "is equivalent to using `HDF5Data`, but slower.")
607
-
608
- frames = []
609
-
610
- with h5py.File(**h5kwargs) as hv:
611
- # determine the sizes of the input files
612
- shapes = {}
613
- dtypes = {}
614
- size = 0
615
- for ii, pp in enumerate(paths):
616
- pp = pathlib.Path(pp).resolve()
617
- with h5py.File(pp, libver="latest") as h5:
618
- # get all feature keys
619
- featsi = sorted(h5["events"].keys())
620
- # get metadata
621
- if ii == 0:
622
- meta = dict(h5.attrs)
623
- if not features:
624
- features = featsi
625
- # make sure number of features are consistent
626
- if not set(features) <= set(featsi):
627
- raise ValueError(
628
- f"File {pp} contains more features than {paths[0]}!")
629
- # populate shapes for all features
630
- for feat in features:
631
- if not isinstance(h5["events"][feat], h5py.Dataset):
632
- warnings.warn(
633
- f"Ignoring {feat}; not implemented yet!")
634
- continue
635
- if feat in ["frame", "time"]:
636
- continue
637
- shapes.setdefault(feat, []).append(
638
- h5["events"][feat].shape)
639
- if ii == 0:
640
- dtypes[feat] = h5["events"][feat].dtype
641
- # increment size
642
- size += h5["events"][features[0]].shape[0]
643
- # remember the frame feature if requested
644
- if compute_frame:
645
- frames.append(h5["events/frame"][:])
646
-
647
- # write metadata
648
- hv.attrs.update(meta)
649
-
650
- # Create the virtual datasets
651
- for feat in shapes:
652
- if len(shapes[feat][0]) == 1:
653
- # scalar feature
654
- shape = (sum([sh[0] for sh in shapes[feat]]))
655
- else:
656
- # non-scalar feature
657
- length = (sum([sh[0] for sh in shapes[feat]]))
658
- shape = list(shapes[feat][0])
659
- shape[0] = length
660
- shape = tuple(shape)
661
- layout = h5py.VirtualLayout(shape=shape, dtype=dtypes[feat])
662
- loc = 0
663
- for jj, pp in enumerate(paths):
664
- vsource = h5py.VirtualSource(pp, f"events/{feat}",
665
- shape=shapes[feat][jj])
666
- cursize = shapes[feat][jj][0]
667
- layout[loc:loc+cursize] = vsource
668
- loc += cursize
669
- hv.create_virtual_dataset(f"/events/{feat}", layout, fillvalue=0)
670
-
671
- if compute_frame:
672
- # concatenate frames and store in dataset
673
- frame_concat = np.zeros(size, dtype=np.uint64)
674
- locf = 0 # indexing location
675
- prevmax = 0 # maximum frame number stored so far in array
676
- for fr in frames:
677
- offset = prevmax + 1 - fr[0]
678
- frame_concat[locf:locf+fr.size] = fr + offset
679
- locf += fr.size
680
- prevmax = fr[-1] + offset
681
- hv.create_dataset("/events/frame", data=frame_concat)
682
-
683
- # write metadata
684
- hv.attrs["experiment:event count"] = size
685
-
686
- data = HDF5Data(h5kwargs["name"])
687
- return data
556
+ def concatenated_hdf5_data(*args, **kwargs):
557
+ warnings.warn(
558
+ "Please use `dcnum.read.hdf5_concat.concatenated_hdf5_data`. "
559
+ "Accessing this method via `dcnum.read.hdf5_data` is deprecated.",
560
+ DeprecationWarning)
561
+ from . import hdf5_concat
562
+ return hdf5_concat.concatenated_hdf5_data(*args, **kwargs)
@@ -1,10 +1,10 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: dcnum
3
- Version: 0.25.7
3
+ Version: 0.25.8
4
4
  Summary: numerics toolbox for imaging deformability cytometry
5
5
  Author: Maximilian Schlögel, Paul Müller, Raghava Alajangi
6
6
  Maintainer-email: Paul Müller <dev@craban.de>
7
- License: MIT
7
+ License-Expression: MIT
8
8
  Project-URL: source, https://github.com/DC-Analysis/dcnum
9
9
  Project-URL: tracker, https://github.com/DC-Analysis/dcnum/issues
10
10
  Project-URL: documentation, https://dcnum.readthedocs.io/en/stable/
@@ -27,6 +27,7 @@ Requires-Dist: scikit-image<1,>=0.24
27
27
  Requires-Dist: scipy<1.15.0,>=1.8.0
28
28
  Provides-Extra: torch
29
29
  Requires-Dist: torch>=2.2; extra == "torch"
30
+ Dynamic: license-file
30
31
 
31
32
  |dcnum|
32
33
  =======
@@ -1,5 +1,5 @@
1
- dcnum/__init__.py,sha256=hcawIKS7utYiOyVhOAX9t7K3xYzP1b9862VV0b6qSrQ,74
2
- dcnum/_version.py,sha256=hjm1pvHN0ZX7nlIod7PG7Fba0cyr_wIaCY9stsERSag,513
1
+ dcnum/__init__.py,sha256=p0mYg01FQ6nsERYmx_FfVxqqHvYcSMEyIAMBIivAmO8,1206
2
+ dcnum/_version.py,sha256=IPFZ9tPIaduJC_jCnw8B-tCFsabLErCwnlpnrnKHV6M,513
3
3
  dcnum/os_env_st.py,sha256=4psq-gPuWTTQ118kCiTx0Mhoyads4Irn6JSUzZk8gyc,3052
4
4
  dcnum/feat/__init__.py,sha256=jUJYWTD3VIoDNKrmryXbjHb1rGwYtK4b7VPWihYgUoo,325
5
5
  dcnum/feat/event_extractor_manager_thread.py,sha256=6D3RVYBuH7gOoGZ4Kz74n6fhq7MtlTY26kpSwZRqg3M,7972
@@ -27,11 +27,12 @@ dcnum/logic/json_encoder.py,sha256=wb6uk6EeTkXyrvwtLm9uWe0cfmiBannzcsKLsDLHuQo,8
27
27
  dcnum/meta/__init__.py,sha256=AVqRgyKXO1orKnE305h88IBvoZ1oz6X11HN1WP5nGvg,60
28
28
  dcnum/meta/paths.py,sha256=aIG39JYbZpOlCbPQIlp0SqGumjbGINYhL2AAoznJt5o,1113
29
29
  dcnum/meta/ppid.py,sha256=JInGtwSCsO9nr1E1aishm0k9iQIFB-essBKvv5aBE98,8510
30
- dcnum/read/__init__.py,sha256=LYHyZHgiNTpjV5oEcty-7Kh5topLpHT_cFlNl-QX8gg,262
30
+ dcnum/read/__init__.py,sha256=vhriJFlJ3DlqkAnRPQsOfUQWKYSzLNNp_NZeZ5eBvmo,286
31
31
  dcnum/read/cache.py,sha256=ChxokVuMaTfi6N6ZbOTWpNYkPgAAYi1lR8nD7JbzjPQ,6497
32
32
  dcnum/read/const.py,sha256=x6LfRwWvIxm6nDWlSADVWqDuzMX6bLzy5kQprwLPzA4,496
33
33
  dcnum/read/detect_flicker.py,sha256=XVf7nqaHx6weRTtS7KPa5_WRU2flDQIZTbKspeguqdU,1829
34
- dcnum/read/hdf5_data.py,sha256=Q4sFT1HBrkrKCX1TUaOpibvz8VFj0ETMa9lw_xIF6tw,26360
34
+ dcnum/read/hdf5_concat.py,sha256=A4Ah_NLxa1ESapEWJcUhdglzi7_E3qKNd81ES7A-_2o,5589
35
+ dcnum/read/hdf5_data.py,sha256=KGMQJYtirBSjnen7FWwfMJB4sr_eOuT8qPGkLZwuMN0,21293
35
36
  dcnum/read/mapped.py,sha256=zU2fYdZfLNHn0rKHxDzBhNFMu4--WWa8nSeE2likyZA,3637
36
37
  dcnum/segm/__init__.py,sha256=9cLEAd3JWE8IGqDHV-eSDIYOGBfOepd8OcebtNs8Omk,309
37
38
  dcnum/segm/segm_thresh.py,sha256=iVhvIhzO0Gw0t3rXOgH71rOI0CNjJJQq4Gg6BulUhK8,948
@@ -50,8 +51,8 @@ dcnum/write/__init__.py,sha256=sK79IlvCFIqf2oFABVeyYedMnHOsEIQpxAauEeNO-Tw,273
50
51
  dcnum/write/deque_writer_thread.py,sha256=ao7F1yrVKyufgC4rC0Y2_Vt7snuT6KpI7W2qVxcjdhk,1994
51
52
  dcnum/write/queue_collector_thread.py,sha256=-p5vrk9cDhtaIMFIu_cCmvlZJafrFkW68uONonMURYo,11617
52
53
  dcnum/write/writer.py,sha256=JkVb4KDBV3oo9r3p2yy9wECO1REx7FG0PRBmVWTxJdk,20577
53
- dcnum-0.25.7.dist-info/LICENSE,sha256=YRChA1C8A2E-amJbudwMcbTCZy_HzmeY0hMIvduh1MM,1089
54
- dcnum-0.25.7.dist-info/METADATA,sha256=X3578YE2gN-g5mMPHH8bMnFyU9E64PP_ivRsIHPKcYc,2321
55
- dcnum-0.25.7.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
56
- dcnum-0.25.7.dist-info/top_level.txt,sha256=Hmh38rgG_MFTVDpUDGuO2HWTSq80P585Het4COQzFTg,6
57
- dcnum-0.25.7.dist-info/RECORD,,
54
+ dcnum-0.25.8.dist-info/licenses/LICENSE,sha256=rX7tNSxP-EhLz-yYUyoBGwjJheA2fiZpT1Iw0LXnJ2M,1069
55
+ dcnum-0.25.8.dist-info/METADATA,sha256=MH79v2fgGvYk_cmCvPoJij9jVq1LUY-7VHT62Y986PY,2354
56
+ dcnum-0.25.8.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
57
+ dcnum-0.25.8.dist-info/top_level.txt,sha256=Hmh38rgG_MFTVDpUDGuO2HWTSq80P585Het4COQzFTg,6
58
+ dcnum-0.25.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.1.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2023 Deformability Cytometry Analysis
3
+ Copyright (c) 2023 Paul Müller
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal