dcnum 0.25.7__tar.gz → 0.25.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

Files changed (132) hide show
  1. {dcnum-0.25.7 → dcnum-0.25.9}/CHANGELOG +7 -0
  2. {dcnum-0.25.7 → dcnum-0.25.9}/LICENSE +1 -1
  3. {dcnum-0.25.7 → dcnum-0.25.9}/PKG-INFO +4 -3
  4. {dcnum-0.25.7 → dcnum-0.25.9}/pyproject.toml +1 -1
  5. dcnum-0.25.9/src/dcnum/__init__.py +25 -0
  6. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/_version.py +2 -2
  7. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_background/base.py +5 -2
  8. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/read/__init__.py +2 -1
  9. dcnum-0.25.9/src/dcnum/read/hdf5_concat.py +145 -0
  10. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/read/hdf5_data.py +11 -136
  11. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum.egg-info/PKG-INFO +4 -3
  12. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum.egg-info/SOURCES.txt +2 -0
  13. dcnum-0.25.9/tests/test_init.py +7 -0
  14. dcnum-0.25.9/tests/test_read_hdf5_concat.py +29 -0
  15. dcnum-0.25.7/src/dcnum/__init__.py +0 -2
  16. dcnum-0.25.7/tests/test_init.py +0 -5
  17. {dcnum-0.25.7 → dcnum-0.25.9}/.github/workflows/check.yml +0 -0
  18. {dcnum-0.25.7 → dcnum-0.25.9}/.github/workflows/deploy_pypi.yml +0 -0
  19. {dcnum-0.25.7 → dcnum-0.25.9}/.gitignore +0 -0
  20. {dcnum-0.25.7 → dcnum-0.25.9}/.readthedocs.yml +0 -0
  21. {dcnum-0.25.7 → dcnum-0.25.9}/README.rst +0 -0
  22. {dcnum-0.25.7 → dcnum-0.25.9}/benchmark/.gitignore +0 -0
  23. {dcnum-0.25.7 → dcnum-0.25.9}/benchmark/Readme.md +0 -0
  24. {dcnum-0.25.7 → dcnum-0.25.9}/benchmark/benchmark.py +0 -0
  25. {dcnum-0.25.7 → dcnum-0.25.9}/benchmark/bm_write_deque_writer_thread.py +0 -0
  26. {dcnum-0.25.7 → dcnum-0.25.9}/benchmark/bm_write_queue_collector_thread.py +0 -0
  27. {dcnum-0.25.7 → dcnum-0.25.9}/docs/.gitignore +0 -0
  28. {dcnum-0.25.7 → dcnum-0.25.9}/docs/conf.py +0 -0
  29. {dcnum-0.25.7 → dcnum-0.25.9}/docs/extensions/github_changelog.py +0 -0
  30. {dcnum-0.25.7 → dcnum-0.25.9}/docs/index.rst +0 -0
  31. {dcnum-0.25.7 → dcnum-0.25.9}/docs/requirements.txt +0 -0
  32. {dcnum-0.25.7 → dcnum-0.25.9}/docs/sec_design.rst +0 -0
  33. {dcnum-0.25.7 → dcnum-0.25.9}/setup.cfg +0 -0
  34. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/__init__.py +0 -0
  35. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/event_extractor_manager_thread.py +0 -0
  36. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_background/__init__.py +0 -0
  37. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_background/bg_copy.py +0 -0
  38. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_background/bg_roll_median.py +0 -0
  39. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_background/bg_sparse_median.py +0 -0
  40. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_brightness/__init__.py +0 -0
  41. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_brightness/bright_all.py +0 -0
  42. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_brightness/common.py +0 -0
  43. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_contour/__init__.py +0 -0
  44. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_contour/contour.py +0 -0
  45. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_contour/moments.py +0 -0
  46. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_contour/volume.py +0 -0
  47. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_texture/__init__.py +0 -0
  48. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_texture/common.py +0 -0
  49. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/feat_texture/tex_all.py +0 -0
  50. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/gate.py +0 -0
  51. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/feat/queue_event_extractor.py +0 -0
  52. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/logic/__init__.py +0 -0
  53. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/logic/ctrl.py +0 -0
  54. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/logic/job.py +0 -0
  55. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/logic/json_encoder.py +0 -0
  56. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/meta/__init__.py +0 -0
  57. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/meta/paths.py +0 -0
  58. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/meta/ppid.py +0 -0
  59. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/os_env_st.py +0 -0
  60. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/read/cache.py +0 -0
  61. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/read/const.py +0 -0
  62. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/read/detect_flicker.py +0 -0
  63. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/read/mapped.py +0 -0
  64. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/__init__.py +0 -0
  65. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segm_thresh.py +0 -0
  66. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segm_torch/__init__.py +0 -0
  67. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segm_torch/segm_torch_base.py +0 -0
  68. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segm_torch/segm_torch_mpo.py +0 -0
  69. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segm_torch/segm_torch_sto.py +0 -0
  70. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segm_torch/torch_model.py +0 -0
  71. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segm_torch/torch_postproc.py +0 -0
  72. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segm_torch/torch_preproc.py +0 -0
  73. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segmenter.py +0 -0
  74. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segmenter_manager_thread.py +0 -0
  75. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segmenter_mpo.py +0 -0
  76. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/segm/segmenter_sto.py +0 -0
  77. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/write/__init__.py +0 -0
  78. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/write/deque_writer_thread.py +0 -0
  79. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/write/queue_collector_thread.py +0 -0
  80. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum/write/writer.py +0 -0
  81. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum.egg-info/dependency_links.txt +0 -0
  82. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum.egg-info/requires.txt +0 -0
  83. {dcnum-0.25.7 → dcnum-0.25.9}/src/dcnum.egg-info/top_level.txt +0 -0
  84. {dcnum-0.25.7 → dcnum-0.25.9}/tests/conftest.py +0 -0
  85. {dcnum-0.25.7 → dcnum-0.25.9}/tests/data/fmt-hdf5_cytoshot_extended-moments-features.zip +0 -0
  86. {dcnum-0.25.7 → dcnum-0.25.9}/tests/data/fmt-hdf5_cytoshot_full-features_2023.zip +0 -0
  87. {dcnum-0.25.7 → dcnum-0.25.9}/tests/data/fmt-hdf5_cytoshot_full-features_2024.zip +0 -0
  88. {dcnum-0.25.7 → dcnum-0.25.9}/tests/data/fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip +0 -0
  89. {dcnum-0.25.7 → dcnum-0.25.9}/tests/data/fmt-hdf5_shapein_empty.zip +0 -0
  90. {dcnum-0.25.7 → dcnum-0.25.9}/tests/data/fmt-hdf5_shapein_raw-with-variable-length-logs.zip +0 -0
  91. {dcnum-0.25.7 → dcnum-0.25.9}/tests/data/segm-torch-model_unet-dcnum-test_g1_910c2.zip +0 -0
  92. {dcnum-0.25.7 → dcnum-0.25.9}/tests/data/segm-torch-test-data_unet-dcnum-test_g1_910c2.zip +0 -0
  93. {dcnum-0.25.7 → dcnum-0.25.9}/tests/helper_methods.py +0 -0
  94. {dcnum-0.25.7 → dcnum-0.25.9}/tests/requirements.txt +0 -0
  95. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_feat_background_base.py +0 -0
  96. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_feat_background_bg_copy.py +0 -0
  97. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_feat_background_bg_roll_median.py +0 -0
  98. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_feat_background_bg_sparsemed.py +0 -0
  99. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_feat_brightness.py +0 -0
  100. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_feat_event_extractor_manager.py +0 -0
  101. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_feat_gate.py +0 -0
  102. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_feat_haralick.py +0 -0
  103. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_feat_moments_based.py +0 -0
  104. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_feat_moments_based_extended.py +0 -0
  105. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_feat_volume.py +0 -0
  106. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_logic_job.py +0 -0
  107. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_logic_join.py +0 -0
  108. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_logic_json.py +0 -0
  109. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_logic_pipeline.py +0 -0
  110. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_meta_paths.py +0 -0
  111. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_meta_ppid_base.py +0 -0
  112. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_meta_ppid_bg.py +0 -0
  113. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_meta_ppid_data.py +0 -0
  114. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_meta_ppid_feat.py +0 -0
  115. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_meta_ppid_gate.py +0 -0
  116. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_meta_ppid_segm.py +0 -0
  117. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_read_basin.py +0 -0
  118. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_read_concat_hdf5.py +0 -0
  119. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_read_detect_flicker.py +0 -0
  120. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_read_hdf5.py +0 -0
  121. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_read_hdf5_basins.py +0 -0
  122. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_read_hdf5_index_mapping.py +0 -0
  123. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_segm_base.py +0 -0
  124. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_segm_mpo.py +0 -0
  125. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_segm_no_mask_proc.py +0 -0
  126. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_segm_sto.py +0 -0
  127. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_segm_thresh.py +0 -0
  128. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_segm_torch.py +0 -0
  129. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_segm_torch_preproc.py +0 -0
  130. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_write_deque_writer_thread.py +0 -0
  131. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_write_queue_collector_thread.py +0 -0
  132. {dcnum-0.25.7 → dcnum-0.25.9}/tests/test_write_writer.py +0 -0
@@ -1,3 +1,10 @@
1
+ 0.25.9
2
+ - fix: md5 sum caused PermissionError on Windows for background computation
3
+ 0.25.8
4
+ - fix: invalid type definition for `concatenated_hdf5_data`
5
+ - enh: make sure `HDF5Data.path` is converted to `pathlib.Path` if it is `str`
6
+ - docs: properly employ license MIT
7
+ - ref: move `concatenated_hdf5_data` to `read.hdf5_concat` submodule
1
8
  0.25.7
2
9
  - enh: `HDF5Writer.store_log` returns created dataset
3
10
  - docs: add code reference using apidoc
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2023 Deformability Cytometry Analysis
3
+ Copyright (c) 2023 Paul Müller
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,10 +1,10 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: dcnum
3
- Version: 0.25.7
3
+ Version: 0.25.9
4
4
  Summary: numerics toolbox for imaging deformability cytometry
5
5
  Author: Maximilian Schlögel, Paul Müller, Raghava Alajangi
6
6
  Maintainer-email: Paul Müller <dev@craban.de>
7
- License: MIT
7
+ License-Expression: MIT
8
8
  Project-URL: source, https://github.com/DC-Analysis/dcnum
9
9
  Project-URL: tracker, https://github.com/DC-Analysis/dcnum/issues
10
10
  Project-URL: documentation, https://dcnum.readthedocs.io/en/stable/
@@ -27,6 +27,7 @@ Requires-Dist: scikit-image<1,>=0.24
27
27
  Requires-Dist: scipy<1.15.0,>=1.8.0
28
28
  Provides-Extra: torch
29
29
  Requires-Dist: torch>=2.2; extra == "torch"
30
+ Dynamic: license-file
30
31
 
31
32
  |dcnum|
32
33
  =======
@@ -23,7 +23,7 @@ classifiers = [
23
23
  'Topic :: Scientific/Engineering :: Visualization',
24
24
  'Intended Audience :: Science/Research',
25
25
  ]
26
- license = {text = "MIT"}
26
+ license = "MIT"
27
27
  dependencies = [
28
28
  "h5py>=3.0.0, <4", # BSD
29
29
  "hdf5plugin>=3.3.1, <6", # MIT and others (per plugin)
@@ -0,0 +1,25 @@
1
+ """Base library for deformability cytometry postprocessing
2
+ MIT License
3
+
4
+ Copyright (c) 2023 Paul Müller
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
23
+ """
24
+ # flake8: noqa: F401
25
+ from ._version import __version__, __version_tuple__
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.25.7'
21
- __version_tuple__ = version_tuple = (0, 25, 7)
20
+ __version__ = version = '0.25.9'
21
+ __version_tuple__ = version_tuple = (0, 25, 9)
@@ -9,7 +9,7 @@ import time
9
9
  import h5py
10
10
 
11
11
  from ...meta import ppid
12
- from ...read import HDF5Data
12
+ from ...read import HDF5Data, md5sum
13
13
  from ...write import HDF5Writer, create_with_basins, set_default_filter_kwargs
14
14
 
15
15
 
@@ -87,6 +87,9 @@ class Background(abc.ABC):
87
87
 
88
88
  # Check whether user passed an array or a path
89
89
  if isinstance(input_data, pathlib.Path):
90
+ # Compute MD5 sum before opening the file so that we don't
91
+ # get a file-locking issue (PermissionError) on Windows.
92
+ md5_5m = md5sum(input_data, blocksize=65536, count=80)
90
93
  if str(input_data.resolve()) == str(output_path.resolve()):
91
94
  self.h5in = h5py.File(input_data, "a", libver="latest")
92
95
  self.h5out = self.h5in
@@ -98,7 +101,7 @@ class Background(abc.ABC):
98
101
  # the ImageCache. We have to go via the ImageCache route,
99
102
  # because HDF5Data properly resolves basins and the image
100
103
  # feature might be in a basin.
101
- self.hdin = HDF5Data(self.h5in)
104
+ self.hdin = HDF5Data(self.h5in, md5_5m=md5_5m)
102
105
  self.input_data = self.hdin.image.h5ds
103
106
  else:
104
107
  self.input_data = input_data
@@ -2,5 +2,6 @@
2
2
  from .cache import md5sum
3
3
  from .const import PROTECTED_FEATURES
4
4
  from .detect_flicker import detect_flickering
5
- from .hdf5_data import HDF5Data, HDF5ImageCache, concatenated_hdf5_data
5
+ from .hdf5_data import HDF5Data, HDF5ImageCache
6
+ from .hdf5_concat import concatenated_hdf5_data
6
7
  from .mapped import get_mapping_indices, get_mapped_object
@@ -0,0 +1,145 @@
1
+ import io
2
+ import pathlib
3
+ import tempfile
4
+ import warnings
5
+
6
+ import h5py
7
+ import numpy as np
8
+
9
+
10
+ from .hdf5_data import HDF5Data
11
+
12
+
13
+ def concatenated_hdf5_data(paths: list[pathlib.Path],
14
+ path_out: bool | pathlib.Path | None = True,
15
+ compute_frame: bool = True,
16
+ features: list[str] | None = None):
17
+ """Return a virtual dataset concatenating all the input paths
18
+
19
+ Parameters
20
+ ----------
21
+ paths:
22
+ Path of the input HDF5 files that will be concatenated along
23
+ the feature axis. The metadata will be taken from the first
24
+ file.
25
+ path_out:
26
+ If `None`, then the dataset is created in memory. If `True`
27
+ (default), create a file on disk. If a pathlib.Path is specified,
28
+ the dataset is written to that file. Note that datasets in memory
29
+ are likely not pickable (so don't use them for multiprocessing).
30
+ compute_frame:
31
+ Whether to compute the "events/frame" feature, taking the frame
32
+ data from the input files and properly incrementing them along
33
+ the file index.
34
+ features:
35
+ List of features to take from the input files.
36
+
37
+ Notes
38
+ -----
39
+ - If one of the input files does not contain a feature from the first
40
+ input `paths`, then a `ValueError` is raised. Use the `features`
41
+ argument to specify which features you need instead.
42
+ - Basins are not considered.
43
+ """
44
+ h5kwargs = {"mode": "w", "libver": "latest"}
45
+ if isinstance(path_out, (pathlib.Path, str)):
46
+ h5kwargs["name"] = path_out
47
+ elif path_out is True:
48
+ tf = tempfile.NamedTemporaryFile(prefix="dcnum_vc_",
49
+ suffix=".hdf5",
50
+ delete=False)
51
+ tf.write(b"dummy")
52
+ h5kwargs["name"] = tf.name
53
+ tf.close()
54
+ elif path_out is None:
55
+ h5kwargs["name"] = io.BytesIO()
56
+ else:
57
+ raise ValueError(
58
+ f"Invalid type for `path_out`: {type(path_out)} ({path_out}")
59
+
60
+ if len(paths) == 0:
61
+ raise ValueError("Please specify at least one file in `paths`!")
62
+ elif len(paths) == 1:
63
+ warnings.warn("Only one file passed to `concatenated_hdf5_data`; this "
64
+ "is equivalent to using `HDF5Data`, but slower.")
65
+
66
+ frames = []
67
+
68
+ with h5py.File(**h5kwargs) as hv:
69
+ # determine the sizes of the input files
70
+ shapes = {}
71
+ dtypes = {}
72
+ size = 0
73
+ for ii, pp in enumerate(paths):
74
+ pp = pathlib.Path(pp).resolve()
75
+ with h5py.File(pp, libver="latest") as h5:
76
+ # get all feature keys
77
+ featsi = sorted(h5["events"].keys())
78
+ # get metadata
79
+ if ii == 0:
80
+ meta = dict(h5.attrs)
81
+ if not features:
82
+ features = featsi
83
+ # make sure number of features are consistent
84
+ if not set(features) <= set(featsi):
85
+ raise ValueError(
86
+ f"File {pp} contains more features than {paths[0]}!")
87
+ # populate shapes for all features
88
+ for feat in features:
89
+ if not isinstance(h5["events"][feat], h5py.Dataset):
90
+ warnings.warn(
91
+ f"Ignoring {feat}; not implemented yet!")
92
+ continue
93
+ if feat in ["frame", "time"]:
94
+ continue
95
+ shapes.setdefault(feat, []).append(
96
+ h5["events"][feat].shape)
97
+ if ii == 0:
98
+ dtypes[feat] = h5["events"][feat].dtype
99
+ # increment size
100
+ size += h5["events"][features[0]].shape[0]
101
+ # remember the frame feature if requested
102
+ if compute_frame:
103
+ frames.append(h5["events/frame"][:])
104
+
105
+ # write metadata
106
+ hv.attrs.update(meta)
107
+
108
+ # Create the virtual datasets
109
+ for feat in shapes:
110
+ if len(shapes[feat][0]) == 1:
111
+ # scalar feature
112
+ shape = (sum([sh[0] for sh in shapes[feat]]))
113
+ else:
114
+ # non-scalar feature
115
+ length = (sum([sh[0] for sh in shapes[feat]]))
116
+ shape = list(shapes[feat][0])
117
+ shape[0] = length
118
+ shape = tuple(shape)
119
+ layout = h5py.VirtualLayout(shape=shape, dtype=dtypes[feat])
120
+ loc = 0
121
+ for jj, pp in enumerate(paths):
122
+ vsource = h5py.VirtualSource(pp, f"events/{feat}",
123
+ shape=shapes[feat][jj])
124
+ cursize = shapes[feat][jj][0]
125
+ layout[loc:loc+cursize] = vsource
126
+ loc += cursize
127
+ hv.create_virtual_dataset(f"/events/{feat}", layout, fillvalue=0)
128
+
129
+ if compute_frame:
130
+ # concatenate frames and store in dataset
131
+ frame_concat = np.zeros(size, dtype=np.uint64)
132
+ locf = 0 # indexing location
133
+ prevmax = 0 # maximum frame number stored so far in array
134
+ for fr in frames:
135
+ offset = prevmax + 1 - fr[0]
136
+ frame_concat[locf:locf+fr.size] = fr + offset
137
+ locf += fr.size
138
+ prevmax = fr[-1] + offset
139
+ hv.create_dataset("/events/frame", data=frame_concat)
140
+
141
+ # write metadata
142
+ hv.attrs["experiment:event count"] = size
143
+
144
+ data = HDF5Data(h5kwargs["name"])
145
+ return data
@@ -1,11 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import hashlib
4
- import io
5
4
  import json
6
5
  import numbers
7
6
  import pathlib
8
- import tempfile
9
7
  from typing import Dict, BinaryIO, List
10
8
  import uuid
11
9
  import warnings
@@ -160,7 +158,10 @@ class HDF5Data:
160
158
  if not hasattr(self, "h5"):
161
159
  self.h5 = None
162
160
 
163
- self.path = state["path"]
161
+ path = state["path"]
162
+ if isinstance(path, str):
163
+ path = pathlib.Path(path)
164
+ self.path = path
164
165
 
165
166
  self.md5_5m = state["md5_5m"]
166
167
  if self.md5_5m is None:
@@ -552,136 +553,10 @@ class HDF5Data:
552
553
  return self._keys
553
554
 
554
555
 
555
- def concatenated_hdf5_data(paths: List[pathlib.Path],
556
- path_out: True | pathlib.Path | None = True,
557
- compute_frame: bool = True,
558
- features: List[str] | None = None):
559
- """Return a virtual dataset concatenating all the input paths
560
-
561
- Parameters
562
- ----------
563
- paths:
564
- Path of the input HDF5 files that will be concatenated along
565
- the feature axis. The metadata will be taken from the first
566
- file.
567
- path_out:
568
- If `None`, then the dataset is created in memory. If `True`
569
- (default), create a file on disk. If a pathlib.Path is specified,
570
- the dataset is written to that file. Note that datasets in memory
571
- are likely not pickable (so don't use them for multiprocessing).
572
- compute_frame:
573
- Whether to compute the "events/frame" feature, taking the frame
574
- data from the input files and properly incrementing them along
575
- the file index.
576
- features:
577
- List of features to take from the input files.
578
-
579
- Notes
580
- -----
581
- - If one of the input files does not contain a feature from the first
582
- input `paths`, then a `ValueError` is raised. Use the `features`
583
- argument to specify which features you need instead.
584
- - Basins are not considered.
585
- """
586
- h5kwargs = {"mode": "w", "libver": "latest"}
587
- if isinstance(path_out, (pathlib.Path, str)):
588
- h5kwargs["name"] = path_out
589
- elif path_out is True:
590
- tf = tempfile.NamedTemporaryFile(prefix="dcnum_vc_",
591
- suffix=".hdf5",
592
- delete=False)
593
- tf.write(b"dummy")
594
- h5kwargs["name"] = tf.name
595
- tf.close()
596
- elif path_out is None:
597
- h5kwargs["name"] = io.BytesIO()
598
- else:
599
- raise ValueError(
600
- f"Invalid type for `path_out`: {type(path_out)} ({path_out}")
601
-
602
- if len(paths) == 0:
603
- raise ValueError("Please specify at least one file in `paths`!")
604
- elif len(paths) == 1:
605
- warnings.warn("Only one file passed to `concatenated_hdf5_data`; this "
606
- "is equivalent to using `HDF5Data`, but slower.")
607
-
608
- frames = []
609
-
610
- with h5py.File(**h5kwargs) as hv:
611
- # determine the sizes of the input files
612
- shapes = {}
613
- dtypes = {}
614
- size = 0
615
- for ii, pp in enumerate(paths):
616
- pp = pathlib.Path(pp).resolve()
617
- with h5py.File(pp, libver="latest") as h5:
618
- # get all feature keys
619
- featsi = sorted(h5["events"].keys())
620
- # get metadata
621
- if ii == 0:
622
- meta = dict(h5.attrs)
623
- if not features:
624
- features = featsi
625
- # make sure number of features are consistent
626
- if not set(features) <= set(featsi):
627
- raise ValueError(
628
- f"File {pp} contains more features than {paths[0]}!")
629
- # populate shapes for all features
630
- for feat in features:
631
- if not isinstance(h5["events"][feat], h5py.Dataset):
632
- warnings.warn(
633
- f"Ignoring {feat}; not implemented yet!")
634
- continue
635
- if feat in ["frame", "time"]:
636
- continue
637
- shapes.setdefault(feat, []).append(
638
- h5["events"][feat].shape)
639
- if ii == 0:
640
- dtypes[feat] = h5["events"][feat].dtype
641
- # increment size
642
- size += h5["events"][features[0]].shape[0]
643
- # remember the frame feature if requested
644
- if compute_frame:
645
- frames.append(h5["events/frame"][:])
646
-
647
- # write metadata
648
- hv.attrs.update(meta)
649
-
650
- # Create the virtual datasets
651
- for feat in shapes:
652
- if len(shapes[feat][0]) == 1:
653
- # scalar feature
654
- shape = (sum([sh[0] for sh in shapes[feat]]))
655
- else:
656
- # non-scalar feature
657
- length = (sum([sh[0] for sh in shapes[feat]]))
658
- shape = list(shapes[feat][0])
659
- shape[0] = length
660
- shape = tuple(shape)
661
- layout = h5py.VirtualLayout(shape=shape, dtype=dtypes[feat])
662
- loc = 0
663
- for jj, pp in enumerate(paths):
664
- vsource = h5py.VirtualSource(pp, f"events/{feat}",
665
- shape=shapes[feat][jj])
666
- cursize = shapes[feat][jj][0]
667
- layout[loc:loc+cursize] = vsource
668
- loc += cursize
669
- hv.create_virtual_dataset(f"/events/{feat}", layout, fillvalue=0)
670
-
671
- if compute_frame:
672
- # concatenate frames and store in dataset
673
- frame_concat = np.zeros(size, dtype=np.uint64)
674
- locf = 0 # indexing location
675
- prevmax = 0 # maximum frame number stored so far in array
676
- for fr in frames:
677
- offset = prevmax + 1 - fr[0]
678
- frame_concat[locf:locf+fr.size] = fr + offset
679
- locf += fr.size
680
- prevmax = fr[-1] + offset
681
- hv.create_dataset("/events/frame", data=frame_concat)
682
-
683
- # write metadata
684
- hv.attrs["experiment:event count"] = size
685
-
686
- data = HDF5Data(h5kwargs["name"])
687
- return data
556
+ def concatenated_hdf5_data(*args, **kwargs):
557
+ warnings.warn(
558
+ "Please use `dcnum.read.hdf5_concat.concatenated_hdf5_data`. "
559
+ "Accessing this method via `dcnum.read.hdf5_data` is deprecated.",
560
+ DeprecationWarning)
561
+ from . import hdf5_concat
562
+ return hdf5_concat.concatenated_hdf5_data(*args, **kwargs)
@@ -1,10 +1,10 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: dcnum
3
- Version: 0.25.7
3
+ Version: 0.25.9
4
4
  Summary: numerics toolbox for imaging deformability cytometry
5
5
  Author: Maximilian Schlögel, Paul Müller, Raghava Alajangi
6
6
  Maintainer-email: Paul Müller <dev@craban.de>
7
- License: MIT
7
+ License-Expression: MIT
8
8
  Project-URL: source, https://github.com/DC-Analysis/dcnum
9
9
  Project-URL: tracker, https://github.com/DC-Analysis/dcnum/issues
10
10
  Project-URL: documentation, https://dcnum.readthedocs.io/en/stable/
@@ -27,6 +27,7 @@ Requires-Dist: scikit-image<1,>=0.24
27
27
  Requires-Dist: scipy<1.15.0,>=1.8.0
28
28
  Provides-Extra: torch
29
29
  Requires-Dist: torch>=2.2; extra == "torch"
30
+ Dynamic: license-file
30
31
 
31
32
  |dcnum|
32
33
  =======
@@ -55,6 +55,7 @@ src/dcnum/read/__init__.py
55
55
  src/dcnum/read/cache.py
56
56
  src/dcnum/read/const.py
57
57
  src/dcnum/read/detect_flicker.py
58
+ src/dcnum/read/hdf5_concat.py
58
59
  src/dcnum/read/hdf5_data.py
59
60
  src/dcnum/read/mapped.py
60
61
  src/dcnum/segm/__init__.py
@@ -105,6 +106,7 @@ tests/test_read_concat_hdf5.py
105
106
  tests/test_read_detect_flicker.py
106
107
  tests/test_read_hdf5.py
107
108
  tests/test_read_hdf5_basins.py
109
+ tests/test_read_hdf5_concat.py
108
110
  tests/test_read_hdf5_index_mapping.py
109
111
  tests/test_segm_base.py
110
112
  tests/test_segm_mpo.py
@@ -0,0 +1,7 @@
1
+ import dcnum
2
+
3
+
4
+ def test_init():
5
+ # Checks if the object `dcnum` has an attribute named `__version__`.
6
+ # If not, an AssertionError is raised.
7
+ assert hasattr(dcnum, "__version__")
@@ -0,0 +1,29 @@
1
+ import pathlib
2
+
3
+ from dcnum import read
4
+
5
+ from helper_methods import retrieve_data
6
+
7
+
8
+ def test_concatenated_hdf5_as_file(tmp_path):
9
+ path = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
10
+ path_concat = tmp_path / "concatenated.rtdc"
11
+ with read.concatenated_hdf5_data([path] * 10, path_out=path_concat):
12
+ pass
13
+ assert path_concat.exists()
14
+
15
+
16
+ def test_concatenated_hdf5_new_file():
17
+ path = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
18
+ with read.concatenated_hdf5_data([path] * 10, path_out=True) as hd:
19
+ pass
20
+ path_out = hd.path
21
+ assert path_out.exists()
22
+
23
+
24
+ def test_concatenated_hdf5_in_memory():
25
+ path = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
26
+ with read.concatenated_hdf5_data([path] * 10, path_out=None) as hd:
27
+ pass
28
+ path_out = hd.path
29
+ assert not isinstance(path_out, (pathlib.Path, str))
@@ -1,2 +0,0 @@
1
- # flake8: noqa: F401
2
- from ._version import __version__, __version_tuple__
@@ -1,5 +0,0 @@
1
- import dcnum
2
-
3
-
4
- def test_init():
5
- assert hasattr(dcnum, "__version__")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes